diff --git a/bench.py b/bench.py index c220a3bc..60e99fa0 100755 --- a/bench.py +++ b/bench.py @@ -20,133 +20,46 @@ async def main(args: argparse.Namespace) -> int: url = f'http://localhost:3000/v1/{endpoint}' # len=572 prompts = [ - "Translate the following English text to French: 'Hello, how are you?'", - "Summarize the plot of the book 'To Kill a Mockingbird.'", - 'Generate a list of 10 random numbers between 1 and 100.', - 'What is the capital of France?', - 'Write a poem about nature.', - 'Convert 25 degrees Celsius to Fahrenheit.', - 'Describe the process of photosynthesis.', - 'Tell me a joke.', - 'List five famous scientists and their contributions to science.', - 'Write a short story about a detective solving a mystery.', - 'Explain the theory of relativity.', - 'Provide a brief history of the Roman Empire.', - 'Create a shopping list for a BBQ party.', - "Write a movie review for the film 'Inception.'", - 'Explain the concept of artificial intelligence.', - 'Write a letter to your future self.', - 'Describe the life cycle of a butterfly.', - 'List the top 10 tourist destinations in Europe.', - 'Explain the principles of supply and demand.', - 'Create a menu for a vegetarian restaurant.', - 'Write a haiku about the ocean.', - 'Explain the importance of renewable energy sources.', - 'List the ingredients for making chocolate chip cookies.', - 'Write a persuasive essay on the benefits of exercise.', - 'Describe the cultural significance of the Taj Mahal.', - 'Explain the process of DNA replication.', - 'Write a speech about the importance of education.', - 'List the steps to start a small business.', - 'Explain the concept of biodiversity.', - 'Create a playlist for a road trip.', - 'Write a short biography of Albert Einstein.', - 'Describe the impact of social media on society.', - 'Explain the principles of good nutrition.', - 'List the 10 tallest mountains in the world.', - 'Write a product review for a smartphone.', - 'Create a workout routine for building muscle.', - 'Explain the concept of climate change.', - 'Describe the life and achievements of Marie Curie.', - 'List the ingredients for making a classic margarita.', - 'Write a blog post about time management.', - 'Explain the process of cellular respiration.', - 'Create a budget for a family vacation.', - "Write a book summary for 'The Great Gatsby.'", - 'Describe the history of the Internet.', - 'Explain the principles of effective communication.', - 'List the top 10 historical landmarks in the world.', - 'Write a love letter to someone special.', - 'Explain the concept of human rights.', - 'Create a recipe for homemade pizza.', - 'Write a movie script for a short film.', - 'Describe the structure of the atom.', - 'List the 10 most influential artists of the 20th century.', - 'Explain the process of mitosis.', - 'Create a travel itinerary for a trip to Japan.', - 'Write a poem about the beauty of nature.', - 'Explain the importance of environmental conservation.', - 'List the essential items for a hiking trip.', - 'Write a short story set in a post-apocalyptic world.', - 'Describe the history of the Olympic Games.', - 'Explain the principles of democracy.', - 'Create a business plan for a tech startup.', - 'Write a letter of recommendation for a colleague.', - 'List the ingredients for a classic Caesar salad.', - 'Explain the concept of artificial neural networks.', - 'Describe the life and work of Leonardo da Vinci.', - 'List the 10 most popular tourist attractions in the United States.', - 'Write a persuasive speech on the dangers of smoking.', - 'Explain the process of natural selection.', - 'Create a menu for a fine dining restaurant.', - 'Write a poem about the beauty of the night sky.', - 'Explain the importance of renewable energy.', - 'List the necessary equipment for a camping trip.', - 'Write a short biography of William Shakespeare.', - 'Describe the impact of social media on business marketing.', - 'Explain the principles of project management.', - 'Create a playlist for a relaxing evening at home.', - 'Write a blog post about the history of space exploration.', - 'Explain the process of protein synthesis.', - 'List the 10 most famous landmarks in Europe.', - 'Write a book review for a classic novel.', - 'Describe the history of ancient Egypt.', - 'Explain the concept of cultural diversity.', - 'Create a recipe for a gourmet sandwich.', - 'Write a screenplay for a science fiction movie.', - "Describe the structure of the Earth's atmosphere.", - 'List the 10 greatest inventions of all time.', - 'Explain the process of meiosis.', - 'Create a travel guide for a visit to Paris.', - 'Write a poem about the changing seasons.', - 'Explain the importance of clean energy sources.', - 'List the essential camping gear for a wilderness adventure.', - 'Write a short story about a time-traveling adventure.', - 'Describe the history of the Renaissance.', - 'Explain the principles of economics.', - 'Create a business proposal for a new restaurant.', - 'Write a letter to your future self 10 years from now.', - 'List the ingredients for a classic lasagna.', - 'Explain the concept of machine learning.', - 'Describe the life and contributions of Martin Luther King Jr.', - 'List the 10 most famous museums in the world.', - 'Write a persuasive essay on the importance of environmental conservation.', - 'Explain the process of geological erosion.', - 'Create a menu for a vegan cafe.', - 'Write a poem about the power of imagination.', - 'Explain the significance of the Industrial Revolution.', - 'List the items needed for a beach vacation.', - 'Write a short biography of Charles Darwin.', - 'Describe the impact of globalization on cultures.', - 'Explain the principles of time management.', - 'Create a playlist for a high-energy workout.', - 'Write a blog post about the future of artificial intelligence.', - 'Explain the process of DNA transcription.', - 'List the 10 most iconic landmarks in Asia.', - 'Write a book summary for a popular self-help book.', - 'Describe the history of the ancient Greeks.', - 'Explain the concept of social justice.', - 'Create a recipe for a gourmet salad.', - 'Write a screenplay for a romantic comedy movie.', - "Describe the layers of the Earth's atmosphere.", - 'List the 10 most influential inventors in history.', - 'Explain the process of plate tectonics.', - 'Create a travel itinerary for a road trip across the USA.', - 'Write a poem about the wonders of the natural world.', - 'Explain the importance of sustainable agriculture.', - 'List the essential hiking gear for a mountain expedition.', - 'Write a short story about a futuristic dystopia.', - 'Describe the history of the Middle Ages.', + "Translate the following English text to French: 'Hello, how are you?'", "Summarize the plot of the book 'To Kill a Mockingbird.'", + 'Generate a list of 10 random numbers between 1 and 100.', 'What is the capital of France?', 'Write a poem about nature.', 'Convert 25 degrees Celsius to Fahrenheit.', + 'Describe the process of photosynthesis.', 'Tell me a joke.', 'List five famous scientists and their contributions to science.', + 'Write a short story about a detective solving a mystery.', 'Explain the theory of relativity.', 'Provide a brief history of the Roman Empire.', + 'Create a shopping list for a BBQ party.', "Write a movie review for the film 'Inception.'", 'Explain the concept of artificial intelligence.', 'Write a letter to your future self.', + 'Describe the life cycle of a butterfly.', 'List the top 10 tourist destinations in Europe.', 'Explain the principles of supply and demand.', + 'Create a menu for a vegetarian restaurant.', 'Write a haiku about the ocean.', 'Explain the importance of renewable energy sources.', + 'List the ingredients for making chocolate chip cookies.', 'Write a persuasive essay on the benefits of exercise.', 'Describe the cultural significance of the Taj Mahal.', + 'Explain the process of DNA replication.', 'Write a speech about the importance of education.', 'List the steps to start a small business.', 'Explain the concept of biodiversity.', + 'Create a playlist for a road trip.', 'Write a short biography of Albert Einstein.', 'Describe the impact of social media on society.', 'Explain the principles of good nutrition.', + 'List the 10 tallest mountains in the world.', 'Write a product review for a smartphone.', 'Create a workout routine for building muscle.', 'Explain the concept of climate change.', + 'Describe the life and achievements of Marie Curie.', 'List the ingredients for making a classic margarita.', 'Write a blog post about time management.', + 'Explain the process of cellular respiration.', 'Create a budget for a family vacation.', "Write a book summary for 'The Great Gatsby.'", 'Describe the history of the Internet.', + 'Explain the principles of effective communication.', 'List the top 10 historical landmarks in the world.', 'Write a love letter to someone special.', + 'Explain the concept of human rights.', 'Create a recipe for homemade pizza.', 'Write a movie script for a short film.', 'Describe the structure of the atom.', + 'List the 10 most influential artists of the 20th century.', 'Explain the process of mitosis.', 'Create a travel itinerary for a trip to Japan.', + 'Write a poem about the beauty of nature.', 'Explain the importance of environmental conservation.', 'List the essential items for a hiking trip.', + 'Write a short story set in a post-apocalyptic world.', 'Describe the history of the Olympic Games.', 'Explain the principles of democracy.', + 'Create a business plan for a tech startup.', 'Write a letter of recommendation for a colleague.', 'List the ingredients for a classic Caesar salad.', + 'Explain the concept of artificial neural networks.', 'Describe the life and work of Leonardo da Vinci.', 'List the 10 most popular tourist attractions in the United States.', + 'Write a persuasive speech on the dangers of smoking.', 'Explain the process of natural selection.', 'Create a menu for a fine dining restaurant.', + 'Write a poem about the beauty of the night sky.', 'Explain the importance of renewable energy.', 'List the necessary equipment for a camping trip.', + 'Write a short biography of William Shakespeare.', 'Describe the impact of social media on business marketing.', 'Explain the principles of project management.', + 'Create a playlist for a relaxing evening at home.', 'Write a blog post about the history of space exploration.', 'Explain the process of protein synthesis.', + 'List the 10 most famous landmarks in Europe.', 'Write a book review for a classic novel.', 'Describe the history of ancient Egypt.', 'Explain the concept of cultural diversity.', + 'Create a recipe for a gourmet sandwich.', 'Write a screenplay for a science fiction movie.', "Describe the structure of the Earth's atmosphere.", + 'List the 10 greatest inventions of all time.', 'Explain the process of meiosis.', 'Create a travel guide for a visit to Paris.', 'Write a poem about the changing seasons.', + 'Explain the importance of clean energy sources.', 'List the essential camping gear for a wilderness adventure.', 'Write a short story about a time-traveling adventure.', + 'Describe the history of the Renaissance.', 'Explain the principles of economics.', 'Create a business proposal for a new restaurant.', + 'Write a letter to your future self 10 years from now.', 'List the ingredients for a classic lasagna.', 'Explain the concept of machine learning.', + 'Describe the life and contributions of Martin Luther King Jr.', 'List the 10 most famous museums in the world.', + 'Write a persuasive essay on the importance of environmental conservation.', 'Explain the process of geological erosion.', 'Create a menu for a vegan cafe.', + 'Write a poem about the power of imagination.', 'Explain the significance of the Industrial Revolution.', 'List the items needed for a beach vacation.', + 'Write a short biography of Charles Darwin.', 'Describe the impact of globalization on cultures.', 'Explain the principles of time management.', + 'Create a playlist for a high-energy workout.', 'Write a blog post about the future of artificial intelligence.', 'Explain the process of DNA transcription.', + 'List the 10 most iconic landmarks in Asia.', 'Write a book summary for a popular self-help book.', 'Describe the history of the ancient Greeks.', + 'Explain the concept of social justice.', 'Create a recipe for a gourmet salad.', 'Write a screenplay for a romantic comedy movie.', "Describe the layers of the Earth's atmosphere.", + 'List the 10 most influential inventors in history.', 'Explain the process of plate tectonics.', 'Create a travel itinerary for a road trip across the USA.', + 'Write a poem about the wonders of the natural world.', 'Explain the importance of sustainable agriculture.', 'List the essential hiking gear for a mountain expedition.', + 'Write a short story about a futuristic dystopia.', 'Describe the history of the Middle Ages.', 'Write a letter to your future self, offering reflections on personal growth, achievements, and aspirations, as well as words of encouragement and guidance for your future journey.', 'List the ingredients for a classic chicken pot pie recipe, a beloved comfort food that combines tender chicken, vegetables, and a flaky pastry crust in a savory filling.', 'Explain the concept of artificial neural networks and their pivotal role in machine learning and artificial intelligence applications, from image recognition to natural language processing.', diff --git a/examples/openai_client.py b/examples/openai_client.py index 86098807..a8c61c66 100644 --- a/examples/openai_client.py +++ b/examples/openai_client.py @@ -7,33 +7,14 @@ response = openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt="Writ print(response) -for chunk in openai.Completion.create( - model="gpt-3.5-turbo-instruct", - prompt="Say this is a test", - max_tokens=7, - temperature=0, - stream=True -): +for chunk in openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt="Say this is a test", max_tokens=7, temperature=0, stream=True): print(chunk) - -completion = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello!"} - ] -) +completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}]) print(completion) -completion = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello!"} - ], - stream=True -) +completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], stream=True) -for chunk in completion: print(chunk) +for chunk in completion: + print(chunk) diff --git a/openllm-client/src/openllm_client/benmin/_grpc.py b/openllm-client/src/openllm_client/benmin/_grpc.py index dc62d154..9d381d4a 100644 --- a/openllm-client/src/openllm_client/benmin/_grpc.py +++ b/openllm-client/src/openllm_client/benmin/_grpc.py @@ -154,10 +154,12 @@ class GrpcClient(Client): try: reflection.apis[api.name] = InferenceAPI[t.Any](None, bentoml.io.from_spec({ - 'id': api.input.descriptor_id, 'args': json_format.MessageToDict(api.input.attributes).get('args', None) + 'id': api.input.descriptor_id, + 'args': json_format.MessageToDict(api.input.attributes).get('args', None) }), bentoml.io.from_spec({ - 'id': api.output.descriptor_id, 'args': json_format.MessageToDict(api.output.attributes).get('args', None) + 'id': api.output.descriptor_id, + 'args': json_format.MessageToDict(api.output.attributes).get('args', None) }), name=api.name, doc=api.docs) @@ -256,10 +258,12 @@ class AsyncGrpcClient(AsyncClient): try: reflection.apis[api.name] = InferenceAPI[t.Any](None, bentoml.io.from_spec({ - 'id': api.input.descriptor_id, 'args': json_format.MessageToDict(api.input.attributes).get('args', None) + 'id': api.input.descriptor_id, + 'args': json_format.MessageToDict(api.input.attributes).get('args', None) }), bentoml.io.from_spec({ - 'id': api.output.descriptor_id, 'args': json_format.MessageToDict(api.output.attributes).get('args', None) + 'id': api.output.descriptor_id, + 'args': json_format.MessageToDict(api.output.attributes).get('args', None) }), name=api.name, doc=api.docs) diff --git a/openllm-core/src/openllm_core/_configuration.py b/openllm-core/src/openllm_core/_configuration.py index 126b0309..61c5d14a 100644 --- a/openllm-core/src/openllm_core/_configuration.py +++ b/openllm-core/src/openllm_core/_configuration.py @@ -355,8 +355,7 @@ class GenerationConfig(ReprMixin): return {i.name for i in attr.fields(self.__class__)} bentoml_cattr.register_unstructure_hook_factory( - lambda cls: attr.has(cls) and lenient_issubclass(cls, GenerationConfig), - lambda cls: make_dict_unstructure_fn( + lambda cls: attr.has(cls) and lenient_issubclass(cls, GenerationConfig), lambda cls: make_dict_unstructure_fn( cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True, **{ k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING) })) @@ -436,8 +435,7 @@ class SamplingParams(ReprMixin): return cls(_internal=True, temperature=temperature, top_k=top_k, top_p=top_p, max_tokens=max_tokens, **attrs) bentoml_cattr.register_unstructure_hook_factory( - lambda cls: attr.has(cls) and lenient_issubclass(cls, SamplingParams), - lambda cls: make_dict_unstructure_fn( + lambda cls: attr.has(cls) and lenient_issubclass(cls, SamplingParams), lambda cls: make_dict_unstructure_fn( cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True, **{ k: override(omit=True) for k, v in attr.fields_dict(cls).items() if v.default in (None, attr.NOTHING) })) @@ -494,8 +492,7 @@ _transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, Fin @attr.define(frozen=False, slots=True, - field_transformer=lambda _, - __: [ + field_transformer=lambda _, __: [ attr.Attribute.from_counting_attr( k, dantic.Field(kw_only=False if t.get_origin(ann) is not Required else True, @@ -503,8 +500,7 @@ _transformed_type: DictStrAny = {'fine_tune_strategies': t.Dict[AdapterType, Fin use_default_converter=False, type=_transformed_type.get(k, ann), metadata={'target': f'__openllm_{k}__'}, - description=f'ModelSettings field for {k}.')) for k, - ann in t.get_type_hints(ModelSettings).items() + description=f'ModelSettings field for {k}.')) for k, ann in t.get_type_hints(ModelSettings).items() ]) class _ModelSettingsAttr: '''Internal attrs representation of ModelSettings.''' @@ -521,7 +517,8 @@ class _ModelSettingsAttr: model_ids=['__default__'], architecture='PreTrainedModel', default_backend={ - 'cpu': 'pt', 'nvidia.com/gpu': 'pt' + 'cpu': 'pt', + 'nvidia.com/gpu': 'pt' }, serialisation='legacy', name_type='dasherize', @@ -1013,8 +1010,7 @@ class LLMConfig(_ConfigAttr): cls.__openllm_model_name__, suffix=suffix_env, globs=globs, - default_callback=lambda field_name, - field_default: getattr(getattr(cls, class_attr), field_name, field_default) + default_callback=lambda field_name, field_default: getattr(getattr(cls, class_attr), field_name, field_default) if codegen.has_own_attribute(cls, class_attr) else field_default)) # For pickling to work, the __module__ variable needs to be set to the # frame where the class is created. This respect the module that is created from cls @@ -1329,9 +1325,9 @@ class LLMConfig(_ConfigAttr): _new_cfg = {k: v for k, v in attrs.items() if k in attr.fields_dict(_ModelSettingsAttr)} attrs = {k: v for k, v in attrs.items() if k not in _new_cfg} new_cls = types.new_class( - name or f"{cls.__name__.replace('Config', '')}DerivateConfig", (cls,), {}, - lambda ns: ns.update({ - '__config__': config_merger.merge(copy.deepcopy(cls.__dict__['__config__']), _new_cfg), '__base_config__': cls, # keep a reference for easy access + name or f"{cls.__name__.replace('Config', '')}DerivateConfig", (cls,), {}, lambda ns: ns.update({ + '__config__': config_merger.merge(copy.deepcopy(cls.__dict__['__config__']), _new_cfg), + '__base_config__': cls, # keep a reference for easy access })) # For pickling to work, the __module__ variable needs to be set to the diff --git a/openllm-core/src/openllm_core/_strategies.py b/openllm-core/src/openllm_core/_strategies.py index 078a93d1..07777275 100644 --- a/openllm-core/src/openllm_core/_strategies.py +++ b/openllm-core/src/openllm_core/_strategies.py @@ -217,8 +217,7 @@ def _validate(cls: type[DynResource], val: list[t.Any]) -> None: def _make_resource_class(name: str, resource_kind: str, docstring: str) -> type[DynResource]: return types.new_class( - name, (bentoml.Resource[t.List[str]], ReprMixin), {'resource_id': resource_kind}, - lambda ns: ns.update({ + name, (bentoml.Resource[t.List[str]], ReprMixin), {'resource_id': resource_kind}, lambda ns: ns.update({ 'resource_id': resource_kind, 'from_spec': classmethod(_from_spec), 'from_system': classmethod(_from_system), @@ -235,16 +234,12 @@ _NVIDIA_GPU_RESOURCE: t.Literal['nvidia.com/gpu'] = 'nvidia.com/gpu' _CPU_RESOURCE: t.Literal['cpu'] = 'cpu' NvidiaGpuResource = _make_resource_class( - 'NvidiaGpuResource', - _NVIDIA_GPU_RESOURCE, - '''NVIDIA GPU resource. + 'NvidiaGpuResource', _NVIDIA_GPU_RESOURCE, '''NVIDIA GPU resource. This is a modified version of internal's BentoML's NvidiaGpuResource where it respects and parse CUDA_VISIBLE_DEVICES correctly.''') AmdGpuResource = _make_resource_class( - 'AmdGpuResource', - _AMD_GPU_RESOURCE, - '''AMD GPU resource. + 'AmdGpuResource', _AMD_GPU_RESOURCE, '''AMD GPU resource. Since ROCm will respect CUDA_VISIBLE_DEVICES, the behaviour of from_spec, from_system are similar to ``NvidiaGpuResource``. Currently ``validate`` is not yet supported.''') diff --git a/openllm-core/src/openllm_core/config/configuration_baichuan.py b/openllm-core/src/openllm_core/config/configuration_baichuan.py index 0522c54d..ba3be4e8 100644 --- a/openllm-core/src/openllm_core/config/configuration_baichuan.py +++ b/openllm-core/src/openllm_core/config/configuration_baichuan.py @@ -46,11 +46,7 @@ class BaichuanConfig(openllm_core.LLMConfig): 'architecture': 'BaiChuanForCausalLM', 'default_id': 'baichuan-inc/baichuan-7b', 'model_ids': [ - 'baichuan-inc/baichuan-7b', - 'baichuan-inc/baichuan-13b-base', - 'baichuan-inc/baichuan-13b-chat', - 'fireballoon/baichuan-vicuna-chinese-7b', - 'fireballoon/baichuan-vicuna-7b', + 'baichuan-inc/baichuan-7b', 'baichuan-inc/baichuan-13b-base', 'baichuan-inc/baichuan-13b-chat', 'fireballoon/baichuan-vicuna-chinese-7b', 'fireballoon/baichuan-vicuna-7b', 'hiyouga/baichuan-7b-sft' ] } diff --git a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py index dbdf4a9b..cd916101 100644 --- a/openllm-core/src/openllm_core/config/configuration_dolly_v2.py +++ b/openllm-core/src/openllm_core/config/configuration_dolly_v2.py @@ -101,7 +101,11 @@ class DollyV2Config(openllm_core.LLMConfig): use_default_prompt_template: bool = True, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), { - 'max_new_tokens': max_new_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature, **attrs + 'max_new_tokens': max_new_tokens, + 'top_k': top_k, + 'top_p': top_p, + 'temperature': temperature, + **attrs }, {} def postprocess_generate(self, prompt: str, generation_result: list[dict[t.Literal['generated_text'], str]], **_: t.Any) -> str: diff --git a/openllm-core/src/openllm_core/config/configuration_falcon.py b/openllm-core/src/openllm_core/config/configuration_falcon.py index 06c0b2a5..9de349a1 100644 --- a/openllm-core/src/openllm_core/config/configuration_falcon.py +++ b/openllm-core/src/openllm_core/config/configuration_falcon.py @@ -49,7 +49,12 @@ class FalconConfig(openllm_core.LLMConfig): 'default_id': 'tiiuae/falcon-7b', 'model_ids': ['tiiuae/falcon-7b', 'tiiuae/falcon-40b', 'tiiuae/falcon-7b-instruct', 'tiiuae/falcon-40b-instruct'], 'fine_tune_strategies': ({ - 'adapter_type': 'lora', 'r': 64, 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none', 'target_modules': ['query_key_value', 'dense', 'dense_h_to_4h', 'dense_4h_to_h'] + 'adapter_type': 'lora', + 'r': 64, + 'lora_alpha': 16, + 'lora_dropout': 0.1, + 'bias': 'none', + 'target_modules': ['query_key_value', 'dense', 'dense_h_to_4h', 'dense_4h_to_h'] },) } @@ -71,7 +76,11 @@ class FalconConfig(openllm_core.LLMConfig): use_default_prompt_template: bool = False, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), { - 'max_new_tokens': max_new_tokens, 'top_k': top_k, 'num_return_sequences': num_return_sequences, 'eos_token_id': eos_token_id, **attrs + 'max_new_tokens': max_new_tokens, + 'top_k': top_k, + 'num_return_sequences': num_return_sequences, + 'eos_token_id': eos_token_id, + **attrs }, {} def postprocess_generate(self, prompt: str, generation_result: t.Sequence[str], **_: t.Any) -> str: diff --git a/openllm-core/src/openllm_core/config/configuration_flan_t5.py b/openllm-core/src/openllm_core/config/configuration_flan_t5.py index 37964fdc..0ede6ad1 100644 --- a/openllm-core/src/openllm_core/config/configuration_flan_t5.py +++ b/openllm-core/src/openllm_core/config/configuration_flan_t5.py @@ -67,7 +67,11 @@ class FlanT5Config(openllm_core.LLMConfig): use_default_prompt_template: bool = True, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), { - 'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'top_p': top_p, 'repetition_penalty': repetition_penalty + 'max_new_tokens': max_new_tokens, + 'temperature': temperature, + 'top_k': top_k, + 'top_p': top_p, + 'repetition_penalty': repetition_penalty }, {} def postprocess_generate(self, prompt: str, generation_result: t.Sequence[str], **_: t.Any) -> str: diff --git a/openllm-core/src/openllm_core/config/configuration_llama.py b/openllm-core/src/openllm_core/config/configuration_llama.py index 464605e8..66c580dd 100644 --- a/openllm-core/src/openllm_core/config/configuration_llama.py +++ b/openllm-core/src/openllm_core/config/configuration_llama.py @@ -74,7 +74,8 @@ class LlamaConfig(openllm_core.LLMConfig): 'name_type': 'lowercase', 'url': 'https://github.com/facebookresearch/llama', 'default_backend': { - 'cpu': 'pt', 'nvidia.com/gpu': 'pt' + 'cpu': 'pt', + 'nvidia.com/gpu': 'pt' }, 'architecture': 'LlamaForCausalLM', 'requirements': ['fairscale', 'sentencepiece', 'scipy'], @@ -82,21 +83,16 @@ class LlamaConfig(openllm_core.LLMConfig): 'default_id': 'NousResearch/llama-2-7b-hf', 'serialisation': 'safetensors', 'model_ids': [ - 'meta-llama/Llama-2-70b-chat-hf', - 'meta-llama/Llama-2-13b-chat-hf', - 'meta-llama/Llama-2-7b-chat-hf', - 'meta-llama/Llama-2-70b-hf', - 'meta-llama/Llama-2-13b-hf', - 'meta-llama/Llama-2-7b-hf', - 'NousResearch/llama-2-70b-chat-hf', - 'NousResearch/llama-2-13b-chat-hf', - 'NousResearch/llama-2-7b-chat-hf', - 'NousResearch/llama-2-70b-hf', - 'NousResearch/llama-2-13b-hf', - 'NousResearch/llama-2-7b-hf', + 'meta-llama/Llama-2-70b-chat-hf', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-7b-chat-hf', 'meta-llama/Llama-2-70b-hf', 'meta-llama/Llama-2-13b-hf', + 'meta-llama/Llama-2-7b-hf', 'NousResearch/llama-2-70b-chat-hf', 'NousResearch/llama-2-13b-chat-hf', 'NousResearch/llama-2-7b-chat-hf', 'NousResearch/llama-2-70b-hf', + 'NousResearch/llama-2-13b-hf', 'NousResearch/llama-2-7b-hf', ], 'fine_tune_strategies': ({ - 'adapter_type': 'lora', 'r': 64, 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none' + 'adapter_type': 'lora', + 'r': 64, + 'lora_alpha': 16, + 'lora_dropout': 0.1, + 'bias': 'none' },) } @@ -124,7 +120,10 @@ class LlamaConfig(openllm_core.LLMConfig): if prompt_template is None: prompt_template = DEFAULT_PROMPT_TEMPLATE('v2' if use_llama2_prompt else 'v1') elif isinstance(prompt_template, str): prompt_template = PromptTemplate(template=prompt_template) return prompt_template.with_options(system_message=system_message).format(instruction=prompt), { - 'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_p': top_p, 'top_k': top_k + 'max_new_tokens': max_new_tokens, + 'temperature': temperature, + 'top_p': top_p, + 'top_k': top_k }, {} def postprocess_generate(self, prompt: str, generation_result: list[str], **_: t.Any) -> str: diff --git a/openllm-core/src/openllm_core/config/configuration_opt.py b/openllm-core/src/openllm_core/config/configuration_opt.py index b57f3739..cee9083d 100644 --- a/openllm-core/src/openllm_core/config/configuration_opt.py +++ b/openllm-core/src/openllm_core/config/configuration_opt.py @@ -54,7 +54,12 @@ class OPTConfig(openllm_core.LLMConfig): 'architecture': 'OPTForCausalLM', 'model_ids': ['facebook/opt-125m', 'facebook/opt-350m', 'facebook/opt-1.3b', 'facebook/opt-2.7b', 'facebook/opt-6.7b', 'facebook/opt-66b'], 'fine_tune_strategies': ({ - 'adapter_type': 'lora', 'r': 16, 'lora_alpha': 32, 'target_modules': ['q_proj', 'v_proj'], 'lora_dropout': 0.05, 'bias': 'none' + 'adapter_type': 'lora', + 'r': 16, + 'lora_alpha': 32, + 'target_modules': ['q_proj', 'v_proj'], + 'lora_dropout': 0.05, + 'bias': 'none' },) } format_outputs: bool = dantic.Field(False, description='''Whether to format the outputs. This can be used when num_return_sequences > 1.''') @@ -76,7 +81,10 @@ class OPTConfig(openllm_core.LLMConfig): use_default_prompt_template: bool = False, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), { - 'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences + 'max_new_tokens': max_new_tokens, + 'temperature': temperature, + 'top_k': top_k, + 'num_return_sequences': num_return_sequences }, {} def postprocess_generate(self, prompt: str, generation_result: t.Sequence[str], **attrs: t.Any) -> str: diff --git a/openllm-core/src/openllm_core/utils/__init__.py b/openllm-core/src/openllm_core/utils/__init__.py index dfe76ea6..092a39fa 100644 --- a/openllm-core/src/openllm_core/utils/__init__.py +++ b/openllm-core/src/openllm_core/utils/__init__.py @@ -152,24 +152,32 @@ _LOGGING_CONFIG: dict[str, t.Any] = { 'filters': { 'excfilter': { '()': 'openllm_core.utils.ExceptionFilter' - }, 'infofilter': { + }, + 'infofilter': { '()': 'openllm_core.utils.InfoFilter' } }, 'handlers': { 'bentomlhandler': { - 'class': 'logging.StreamHandler', 'filters': ['excfilter', 'infofilter'], 'stream': 'ext://sys.stdout' + 'class': 'logging.StreamHandler', + 'filters': ['excfilter', 'infofilter'], + 'stream': 'ext://sys.stdout' }, 'defaulthandler': { - 'class': 'logging.StreamHandler', 'level': logging.WARNING + 'class': 'logging.StreamHandler', + 'level': logging.WARNING } }, 'loggers': { 'bentoml': { - 'handlers': ['bentomlhandler', 'defaulthandler'], 'level': logging.INFO, 'propagate': False + 'handlers': ['bentomlhandler', 'defaulthandler'], + 'level': logging.INFO, + 'propagate': False }, 'openllm': { - 'handlers': ['bentomlhandler', 'defaulthandler'], 'level': logging.INFO, 'propagate': False + 'handlers': ['bentomlhandler', 'defaulthandler'], + 'level': logging.INFO, + 'propagate': False } }, 'root': { @@ -319,31 +327,10 @@ _import_structure: dict[str, list[str]] = { 'lazy': [], 'representation': ['ReprMixin'], 'import_utils': [ - 'OPTIONAL_DEPENDENCIES', - 'DummyMetaclass', - 'EnvVarMixin', - 'require_backends', - 'is_cpm_kernels_available', - 'is_einops_available', - 'is_flax_available', - 'is_tf_available', - 'is_vllm_available', - 'is_torch_available', - 'is_bitsandbytes_available', - 'is_peft_available', - 'is_datasets_available', - 'is_jupyter_available', - 'is_jupytext_available', - 'is_notebook_available', - 'is_triton_available', - 'is_autogptq_available', - 'is_sentencepiece_available', - 'is_xformers_available', - 'is_fairscale_available', - 'is_grpc_available', - 'is_grpc_health_available', - 'is_transformers_available', - 'is_optimum_supports_gptq', + 'OPTIONAL_DEPENDENCIES', 'DummyMetaclass', 'EnvVarMixin', 'require_backends', 'is_cpm_kernels_available', 'is_einops_available', 'is_flax_available', 'is_tf_available', + 'is_vllm_available', 'is_torch_available', 'is_bitsandbytes_available', 'is_peft_available', 'is_datasets_available', 'is_jupyter_available', 'is_jupytext_available', + 'is_notebook_available', 'is_triton_available', 'is_autogptq_available', 'is_sentencepiece_available', 'is_xformers_available', 'is_fairscale_available', 'is_grpc_available', + 'is_grpc_health_available', 'is_transformers_available', 'is_optimum_supports_gptq', ] } diff --git a/openllm-core/src/openllm_core/utils/codegen.py b/openllm-core/src/openllm_core/utils/codegen.py index 83e7fb1d..aa710184 100644 --- a/openllm-core/src/openllm_core/utils/codegen.py +++ b/openllm-core/src/openllm_core/utils/codegen.py @@ -163,8 +163,7 @@ def gen_sdk(func: _T, name: str | None = None, **attrs: t.Any) -> _T: '__doc__': inspect.cleandoc(doc), '__module__': 'openllm' }), - )(func, **attrs), - func, + )(func, **attrs), func, )) __all__ = ['gen_sdk', 'make_attr_tuple_class', 'make_env_transformer', 'generate_unique_filename', 'generate_function'] diff --git a/openllm-core/src/openllm_core/utils/dantic.py b/openllm-core/src/openllm_core/utils/dantic.py index a6fae514..13c5d3a7 100644 --- a/openllm-core/src/openllm_core/utils/dantic.py +++ b/openllm-core/src/openllm_core/utils/dantic.py @@ -25,23 +25,8 @@ AnyCallable = t.Callable[..., t.Any] FC = t.TypeVar('FC', bound=t.Union[AnyCallable, click.Command]) __all__ = [ - 'FC', - 'attrs_to_options', - 'Field', - 'parse_type', - 'is_typing', - 'is_literal', - 'ModuleType', - 'EnumChoice', - 'LiteralChoice', - 'allows_multiple', - 'is_mapping', - 'is_container', - 'parse_container_args', - 'parse_single_arg', - 'CUDA', - 'JsonType', - 'BytesType' + 'FC', 'attrs_to_options', 'Field', 'parse_type', 'is_typing', 'is_literal', 'ModuleType', 'EnumChoice', 'LiteralChoice', 'allows_multiple', 'is_mapping', 'is_container', + 'parse_container_args', 'parse_single_arg', 'CUDA', 'JsonType', 'BytesType' ] def __dir__() -> list[str]: diff --git a/openllm-core/src/openllm_core/utils/import_utils.py b/openllm-core/src/openllm_core/utils/import_utils.py index e0a4aef2..988a2e30 100644 --- a/openllm-core/src/openllm_core/utils/import_utils.py +++ b/openllm-core/src/openllm_core/utils/import_utils.py @@ -139,17 +139,8 @@ def is_tf_available() -> bool: _tf_version = None if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES: if _tf_available: - candidates = ('tensorflow', - 'tensorflow-cpu', - 'tensorflow-gpu', - 'tf-nightly', - 'tf-nightly-cpu', - 'tf-nightly-gpu', - 'intel-tensorflow', - 'intel-tensorflow-avx512', - 'tensorflow-rocm', - 'tensorflow-macos', - 'tensorflow-aarch64', + candidates = ('tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-cpu', 'tf-nightly-gpu', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-rocm', + 'tensorflow-macos', 'tensorflow-aarch64', ) _tf_version = None # For the metadata, we have to look for both tensorflow and tensorflow-cpu diff --git a/openllm-python/src/openllm/_embeddings.py b/openllm-python/src/openllm/_embeddings.py index 84be1890..9f5f3257 100644 --- a/openllm-python/src/openllm/_embeddings.py +++ b/openllm-python/src/openllm/_embeddings.py @@ -34,7 +34,8 @@ def get_or_download(ids: str = _BENTOMODEL_ID) -> bentoml.Model: options=ModelOptions(), context=openllm.utils.generate_context(framework_name='transformers'), labels={ - 'runtime': 'pt', 'framework': 'openllm' + 'runtime': 'pt', + 'framework': 'openllm' }, signatures=model_signatures) as bentomodel: snapshot_download(_GENERIC_EMBEDDING_ID, diff --git a/openllm-python/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py index 8dcf8b1a..61952026 100644 --- a/openllm-python/src/openllm/_llm.py +++ b/openllm-python/src/openllm/_llm.py @@ -281,22 +281,10 @@ class LLM(LLMInterface[M, T], ReprMixin): if t.TYPE_CHECKING: __name__: str if t.TYPE_CHECKING and not MYPY: - def __attrs_init__(self, - config: LLMConfig, - quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig, transformers.GPTQConfig]], - quantize: t.Optional[LiteralQuantise], - model_id: str, - model_decls: TupleAny, - model_attrs: DictStrAny, - tokenizer_attrs: DictStrAny, - tag: bentoml.Tag, - adapters_mapping: t.Optional[AdaptersMapping], - model_version: t.Optional[str], - serialisation: LiteralSerialisation, - _local: bool, - prompt_template: PromptTemplate | None, - system_message: str | None, - **attrs: t.Any) -> None: + def __attrs_init__(self, config: LLMConfig, quantization_config: t.Optional[t.Union[transformers.BitsAndBytesConfig, + transformers.GPTQConfig]], quantize: t.Optional[LiteralQuantise], model_id: str, model_decls: TupleAny, + model_attrs: DictStrAny, tokenizer_attrs: DictStrAny, tag: bentoml.Tag, adapters_mapping: t.Optional[AdaptersMapping], model_version: t.Optional[str], + serialisation: LiteralSerialisation, _local: bool, prompt_template: PromptTemplate | None, system_message: str | None, **attrs: t.Any) -> None: '''Generated __attrs_init__ for openllm.LLM.''' config: LLMConfig @@ -540,20 +528,9 @@ class LLM(LLMInterface[M, T], ReprMixin): def generate_tag(cls, *param_decls: t.Any, **attrs: t.Any) -> bentoml.Tag: return bentoml.Tag.from_taglike(cls._generate_tag_str(*param_decls, **attrs)) - def __init__(self, - *args: t.Any, - model_id: str, - llm_config: LLMConfig, - quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None, - _quantize: LiteralQuantise | None, - _model_version: str, - _tag: bentoml.Tag, - _serialisation: LiteralSerialisation, - _local: bool, - _prompt_template: PromptTemplate | None, - _system_message: str | None, - _adapters_mapping: AdaptersMapping | None, - **attrs: t.Any, + def __init__(self, *args: t.Any, model_id: str, llm_config: LLMConfig, quantization_config: transformers.BitsAndBytesConfig | transformers.GPTQConfig | None, + _quantize: LiteralQuantise | None, _model_version: str, _tag: bentoml.Tag, _serialisation: LiteralSerialisation, _local: bool, _prompt_template: PromptTemplate | None, + _system_message: str | None, _adapters_mapping: AdaptersMapping | None, **attrs: t.Any, ): '''Initialize the LLM with given pretrained model. @@ -651,22 +628,13 @@ class LLM(LLMInterface[M, T], ReprMixin): # parsing tokenizer and model kwargs, as the hierachy is param pass > default normalized_model_kwds, normalized_tokenizer_kwds = normalize_attrs_to_model_tokenizer_pair(**attrs) # NOTE: Save the args and kwargs for latter load - self.__attrs_init__(llm_config, - quantization_config, - _quantize, - model_id, - args, { - **model_kwds, **normalized_model_kwds - }, { - **tokenizer_kwds, **normalized_tokenizer_kwds - }, - _tag, - _adapters_mapping, - _model_version, - _serialisation, - _local, - _prompt_template, - _system_message) + self.__attrs_init__(llm_config, quantization_config, _quantize, model_id, args, { + **model_kwds, + **normalized_model_kwds + }, { + **tokenizer_kwds, + **normalized_tokenizer_kwds + }, _tag, _adapters_mapping, _model_version, _serialisation, _local, _prompt_template, _system_message) self.llm_post_init() @@ -1306,10 +1274,11 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate pre = now yield ' '.join(output_text[pre:]) + ' ' - return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {}, - lambda ns: ns.update({ - 'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'), '__module__': self.__module__, '__doc__': self.config['env'].start_docstring - })) + return types.new_class(self.__class__.__name__ + 'Runnable', (_Runnable,), {}, lambda ns: ns.update({ + 'SUPPORTED_RESOURCES': ('nvidia.com/gpu', 'amd.com/gpu', 'cpu'), + '__module__': self.__module__, + '__doc__': self.config['env'].start_docstring + })) def llm_runner_class(self: LLM[M, T]) -> type[LLMRunner[M, T]]: def available_adapters(_: LLMRunner[M, T]) -> PeftAdapterOutput: diff --git a/openllm-python/src/openllm/_service.py b/openllm-python/src/openllm/_service.py index 94d04fa9..37e2201b 100644 --- a/openllm-python/src/openllm/_service.py +++ b/openllm-python/src/openllm/_service.py @@ -120,8 +120,13 @@ async def completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> s @svc.api(route='/v1/chat/completions', input=bentoml.io.JSON.from_sample( openllm.utils.bentoml_cattr.unstructure( - openllm.openai.ChatCompletionRequest(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}], - model=runner.llm_type))), + openllm.openai.ChatCompletionRequest(messages=[{ + 'role': 'system', + 'content': 'You are a helpful assistant.' + }, { + 'role': 'user', + 'content': 'Hello!' + }], model=runner.llm_type))), output=bentoml.io.Text()) async def chat_completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) -> str | t.AsyncGenerator[str, None]: prompt = openllm.openai.messages_to_prompt(input_dict['messages']) @@ -194,32 +199,10 @@ def metadata_v1(_: str) -> openllm.MetadataOutput: input=bentoml.io.JSON.from_sample(['Hey Jude, welcome to the jungle!', 'What is the meaning of life?']), output=bentoml.io.JSON.from_sample({ 'embeddings': [ - 0.007917795330286026, - -0.014421648345887661, - 0.00481307040899992, - 0.007331526838243008, - -0.0066398633643984795, - 0.00945580005645752, - 0.0087016262114048, - -0.010709521360695362, - 0.012635177001357079, - 0.010541186667978764, - -0.00730888033285737, - -0.001783102168701589, - 0.02339819073677063, - -0.010825827717781067, - -0.015888236463069916, - 0.01876218430697918, - 0.0076906150206923485, - 0.0009032754460349679, - -0.010024012066423893, - 0.01090280432254076, - -0.008668390102684498, - 0.02070549875497818, - 0.0014594447566196322, - -0.018775740638375282, - -0.014814382418990135, - 0.01796768605709076 + 0.007917795330286026, -0.014421648345887661, 0.00481307040899992, 0.007331526838243008, -0.0066398633643984795, 0.00945580005645752, 0.0087016262114048, -0.010709521360695362, + 0.012635177001357079, 0.010541186667978764, -0.00730888033285737, -0.001783102168701589, 0.02339819073677063, -0.010825827717781067, -0.015888236463069916, + 0.01876218430697918, 0.0076906150206923485, 0.0009032754460349679, -0.010024012066423893, 0.01090280432254076, -0.008668390102684498, 0.02070549875497818, + 0.0014594447566196322, -0.018775740638375282, -0.014814382418990135, 0.01796768605709076 ], 'num_tokens': 20 })) diff --git a/openllm-python/src/openllm/bundle/_package.py b/openllm-python/src/openllm/bundle/_package.py index f0172012..ae59052b 100644 --- a/openllm-python/src/openllm/bundle/_package.py +++ b/openllm-python/src/openllm/bundle/_package.py @@ -87,15 +87,7 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d elif backend_envvar == 'tf': if not openllm_core.utils.is_tf_available(): raise ValueError(f"TensorFlow is not available, while {env.backend} is set to 'tf'") - candidates = ('tensorflow', - 'tensorflow-cpu', - 'tensorflow-gpu', - 'tf-nightly', - 'tf-nightly-cpu', - 'tf-nightly-gpu', - 'intel-tensorflow', - 'intel-tensorflow-avx512', - 'tensorflow-rocm', + candidates = ('tensorflow', 'tensorflow-cpu', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-cpu', 'tf-nightly-gpu', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-rocm', 'tensorflow-macos', ) # For the metadata, we have to look for both tensorflow and tensorflow-cpu @@ -123,14 +115,8 @@ def construct_python_options(llm: openllm.LLM[t.Any, t.Any], llm_fs: FS, extra_d lock_packages=False, extra_index_url=['https://download.pytorch.org/whl/cu118', 'https://huggingface.github.io/autogptq-index/whl/cu118/']) -def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], - _: FS, - workers_per_resource: float, - quantize: LiteralString | None, - adapter_map: dict[str, str | None] | None, - dockerfile_template: str | None, - serialisation: LiteralSerialisation, - container_registry: LiteralContainerRegistry, +def construct_docker_options(llm: openllm.LLM[t.Any, t.Any], _: FS, workers_per_resource: float, quantize: LiteralString | None, adapter_map: dict[str, str | None] | None, + dockerfile_template: str | None, serialisation: LiteralSerialisation, container_registry: LiteralContainerRegistry, container_version_strategy: LiteralContainerVersionStrategy) -> DockerOptions: from openllm.cli._factory import parse_config_options environ = parse_config_options(llm.config, llm.config['timeout'], workers_per_resource, None, True, os.environ.copy()) @@ -217,7 +203,11 @@ def create_bento(bento_tag: bentoml.Tag, _serialisation: LiteralSerialisation = openllm_core.utils.first_not_none(serialisation, default=llm.config['serialisation']) labels = dict(llm.identifying_params) labels.update({ - '_type': llm.llm_type, '_framework': llm.config['env']['backend_value'], 'start_name': llm.config['start_name'], 'base_name_or_path': llm.model_id, 'bundler': 'openllm.bundle' + '_type': llm.llm_type, + '_framework': llm.config['env']['backend_value'], + 'start_name': llm.config['start_name'], + 'base_name_or_path': llm.model_id, + 'bundler': 'openllm.bundle' }) if adapter_map: labels.update(adapter_map) if isinstance(workers_per_resource, str): @@ -244,14 +234,7 @@ def create_bento(bento_tag: bentoml.Tag, exclude=['/venv', '/.venv', '__pycache__/', '*.py[cod]', '*$py.class'], python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map), models=[llm_spec], - docker=construct_docker_options(llm, - llm_fs, - workers_per_resource, - quantize, - adapter_map, - dockerfile_template, - _serialisation, - container_registry, + docker=construct_docker_options(llm, llm_fs, workers_per_resource, quantize, adapter_map, dockerfile_template, _serialisation, container_registry, container_version_strategy)) bento = bentoml.Bento.create(build_config=build_config, version=bento_tag.version, build_ctx=llm_fs.getsyspath('/')) diff --git a/openllm-python/src/openllm/cli/_factory.py b/openllm-python/src/openllm/cli/_factory.py index 2ff736ca..cbd57584 100644 --- a/openllm-python/src/openllm/cli/_factory.py +++ b/openllm-python/src/openllm/cli/_factory.py @@ -54,9 +54,7 @@ def parse_config_options(config: LLMConfig, server_timeout: int, workers_per_res # TODO: Support amd.com/gpu on k8s _bentoml_config_options_env = environ.pop('BENTOML_CONFIG_OPTIONS', '') _bentoml_config_options_opts = [ - 'tracing.sample_rate=1.0', - f'api_server.traffic.timeout={server_timeout}', - f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}', + 'tracing.sample_rate=1.0', f'api_server.traffic.timeout={server_timeout}', f'runners."llm-{config["start_name"]}-runner".traffic.timeout={config["timeout"]}', f'runners."llm-{config["start_name"]}-runner".workers_per_resource={workers_per_resource}' ] if device: @@ -118,22 +116,9 @@ Available official model_id(s): [default: {llm_config['default_id']}] @group.command(**command_attrs) @start_decorator(llm_config, serve_grpc=_serve_grpc) @click.pass_context - def start_cmd(ctx: click.Context, - /, - server_timeout: int, - model_id: str | None, - model_version: str | None, - system_message: str | None, - prompt_template_file: t.IO[t.Any] | None, - workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString, - device: t.Tuple[str, ...], - quantize: LiteralQuantise | None, - backend: LiteralBackend, - serialisation: LiteralSerialisation | None, - cors: bool, - adapter_id: str | None, - return_process: bool, - **attrs: t.Any, + def start_cmd(ctx: click.Context, /, server_timeout: int, model_id: str | None, model_version: str | None, system_message: str | None, prompt_template_file: t.IO[t.Any] | None, + workers_per_resource: t.Literal['conserved', 'round_robin'] | LiteralString, device: t.Tuple[str, ...], quantize: LiteralQuantise | None, backend: LiteralBackend, + serialisation: LiteralSerialisation | None, cors: bool, adapter_id: str | None, return_process: bool, **attrs: t.Any, ) -> LLMConfig | subprocess.Popen[bytes]: _serialisation = openllm_core.utils.first_not_none(serialisation, default=llm_config['serialisation']) if _serialisation == 'safetensors' and quantize is not None and openllm_core.utils.check_bool_env('OPENLLM_SERIALIZATION_WARNING'): @@ -235,16 +220,10 @@ Available official model_id(s): [default: {llm_config['default_id']}] def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callable[[FC], t.Callable[[FC], FC]]: def wrapper(fn: FC) -> t.Callable[[FC], FC]: composed = openllm.utils.compose( - llm_config.to_click_options, - _http_server_args if not serve_grpc else _grpc_server_args, - cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."), - model_id_option(factory=cog.optgroup), - model_version_option(factory=cog.optgroup), - system_message_option(factory=cog.optgroup), - prompt_template_file_option(factory=cog.optgroup), - cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'), - workers_per_resource_option(factory=cog.optgroup), - cors_option(factory=cog.optgroup), + llm_config.to_click_options, _http_server_args if not serve_grpc else _grpc_server_args, + cog.optgroup.group('General LLM Options', help=f"The following options are related to running '{llm_config['start_name']}' LLM Server."), model_id_option(factory=cog.optgroup), + model_version_option(factory=cog.optgroup), system_message_option(factory=cog.optgroup), prompt_template_file_option(factory=cog.optgroup), + cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'), workers_per_resource_option(factory=cog.optgroup), cors_option(factory=cog.optgroup), backend_option(factory=cog.optgroup), cog.optgroup.group('LLM Optimization Options', help='''Optimization related options. @@ -255,9 +234,7 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab - DeepSpeed Inference: [link](https://www.deepspeed.ai/inference/) - GGML: Fast inference on [bare metal](https://github.com/ggerganov/ggml) - '''), - quantize_option(factory=cog.optgroup), - serialisation_option(factory=cog.optgroup), + '''), quantize_option(factory=cog.optgroup), serialisation_option(factory=cog.optgroup), cog.optgroup.option('--device', type=openllm.utils.dantic.CUDA, multiple=True, @@ -286,8 +263,8 @@ def start_decorator(llm_config: LLMConfig, serve_grpc: bool = False) -> t.Callab help='Optional name or path for given LoRA adapter' + f" to wrap '{llm_config['model_name']}'", multiple=True, callback=_id_callback, - metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'), - click.option('--return-process', is_flag=True, default=False, help='Internal use only.', hidden=True), + metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]'), click.option('--return-process', is_flag=True, default=False, help='Internal use only.', + hidden=True), ) return composed(fn) diff --git a/openllm-python/src/openllm/cli/extension/list_bentos.py b/openllm-python/src/openllm/cli/extension/list_bentos.py index 3b13f38b..90004c6f 100644 --- a/openllm-python/src/openllm/cli/extension/list_bentos.py +++ b/openllm-python/src/openllm/cli/extension/list_bentos.py @@ -22,7 +22,8 @@ def cli(ctx: click.Context, output: LiteralOutput) -> None: 'tag': str(b.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(b.path)), 'models': [{ - 'tag': str(m.tag), 'size': human_readable_size(openllm.utils.calc_dir_size(m.path)) + 'tag': str(m.tag), + 'size': human_readable_size(openllm.utils.calc_dir_size(m.path)) } for m in (bentoml.models.get(_.tag) for _ in b.info.models)] } for b in tuple(i for i in bentoml.list() if all( k in i.info.labels for k in {'start_name', 'bundler'})) if b.info.labels['start_name'] == k] for k in tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys()) diff --git a/openllm-python/src/openllm/models/auto/__init__.py b/openllm-python/src/openllm/models/auto/__init__.py index bf61359b..1b0f9a1c 100644 --- a/openllm-python/src/openllm/models/auto/__init__.py +++ b/openllm-python/src/openllm/models/auto/__init__.py @@ -57,7 +57,9 @@ __lazy = LazyModule(__name__, os.path.abspath('__file__'), _import_structure, extra_objects={ - 'CONFIG_MAPPING': CONFIG_MAPPING, 'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES, 'AutoConfig': AutoConfig, + 'CONFIG_MAPPING': CONFIG_MAPPING, + 'CONFIG_MAPPING_NAMES': CONFIG_MAPPING_NAMES, + 'AutoConfig': AutoConfig, }) __all__ = __lazy.__all__ __dir__ = __lazy.__dir__ diff --git a/openllm-python/src/openllm/models/auto/factory.py b/openllm-python/src/openllm/models/auto/factory.py index d309f423..9b083954 100644 --- a/openllm-python/src/openllm/models/auto/factory.py +++ b/openllm-python/src/openllm/models/auto/factory.py @@ -160,10 +160,9 @@ class _LazyAutoMapping(OrderedDict, ReprMixin): [self._load_attr_from_module(key, name) for key, name in self._model_mapping.items() if key in self._config_mapping.keys()] + list(self._extra_content.values())) def items(self) -> ConfigModelItemsView: - return t.cast('ConfigModelItemsView', - [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key])) - for key in self._model_mapping.keys() - if key in self._config_mapping.keys()] + list(self._extra_content.items())) + return t.cast('ConfigModelItemsView', [(self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key])) + for key in self._model_mapping.keys() + if key in self._config_mapping.keys()] + list(self._extra_content.items())) def __iter__(self) -> t.Iterator[type[openllm.LLMConfig]]: return iter(t.cast('SupportsIter[t.Iterator[type[openllm.LLMConfig]]]', self.keys())) diff --git a/openllm-python/src/openllm/models/chatglm/__init__.py b/openllm-python/src/openllm/models/chatglm/__init__.py index 9dcafb02..612083f6 100644 --- a/openllm-python/src/openllm/models/chatglm/__init__.py +++ b/openllm-python/src/openllm/models/chatglm/__init__.py @@ -23,5 +23,7 @@ sys.modules[__name__] = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects={ - 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING, 'ChatGLMConfig': ChatGLMConfig + 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, + 'START_CHATGLM_COMMAND_DOCSTRING': START_CHATGLM_COMMAND_DOCSTRING, + 'ChatGLMConfig': ChatGLMConfig }) diff --git a/openllm-python/src/openllm/models/dolly_v2/__init__.py b/openllm-python/src/openllm/models/dolly_v2/__init__.py index c48243d3..5cdf2e97 100644 --- a/openllm-python/src/openllm/models/dolly_v2/__init__.py +++ b/openllm-python/src/openllm/models/dolly_v2/__init__.py @@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects={ - 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING, 'DollyV2Config': DollyV2Config + 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, + 'START_DOLLY_V2_COMMAND_DOCSTRING': START_DOLLY_V2_COMMAND_DOCSTRING, + 'DollyV2Config': DollyV2Config }) diff --git a/openllm-python/src/openllm/models/falcon/__init__.py b/openllm-python/src/openllm/models/falcon/__init__.py index d553c4a7..909447b6 100644 --- a/openllm-python/src/openllm/models/falcon/__init__.py +++ b/openllm-python/src/openllm/models/falcon/__init__.py @@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects={ - 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING, 'FalconConfig': FalconConfig + 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, + 'START_FALCON_COMMAND_DOCSTRING': START_FALCON_COMMAND_DOCSTRING, + 'FalconConfig': FalconConfig }) diff --git a/openllm-python/src/openllm/models/gpt_neox/__init__.py b/openllm-python/src/openllm/models/gpt_neox/__init__.py index 437645e1..1aef40fe 100644 --- a/openllm-python/src/openllm/models/gpt_neox/__init__.py +++ b/openllm-python/src/openllm/models/gpt_neox/__init__.py @@ -30,5 +30,7 @@ sys.modules[__name__] = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects={ - 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING, 'GPTNeoXConfig': GPTNeoXConfig + 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, + 'START_GPT_NEOX_COMMAND_DOCSTRING': START_GPT_NEOX_COMMAND_DOCSTRING, + 'GPTNeoXConfig': GPTNeoXConfig }) diff --git a/openllm-python/src/openllm/models/opt/__init__.py b/openllm-python/src/openllm/models/opt/__init__.py index 2f818789..0ae7ec97 100644 --- a/openllm-python/src/openllm/models/opt/__init__.py +++ b/openllm-python/src/openllm/models/opt/__init__.py @@ -46,5 +46,7 @@ sys.modules[__name__] = LazyModule(__name__, globals()['__file__'], _import_structure, extra_objects={ - 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, 'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING, 'OPTConfig': OPTConfig, + 'DEFAULT_PROMPT_TEMPLATE': DEFAULT_PROMPT_TEMPLATE, + 'START_OPT_COMMAND_DOCSTRING': START_OPT_COMMAND_DOCSTRING, + 'OPTConfig': OPTConfig, }) diff --git a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py index 7b2bc981..934971e8 100644 --- a/openllm-python/src/openllm/models/opt/modeling_flax_opt.py +++ b/openllm-python/src/openllm/models/opt/modeling_flax_opt.py @@ -33,7 +33,11 @@ class FlaxOPT(openllm.LLM['transformers.TFOPTForCausalLM', 'transformers.GPT2Tok use_default_prompt_template: bool = False, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), { - 'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences, 'repetition_penalty': repetition_penalty + 'max_new_tokens': max_new_tokens, + 'temperature': temperature, + 'top_k': top_k, + 'num_return_sequences': num_return_sequences, + 'repetition_penalty': repetition_penalty }, {} def generate(self, prompt: str, **attrs: t.Any) -> list[str]: diff --git a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py index 6c677ac3..04408893 100644 --- a/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py +++ b/openllm-python/src/openllm/models/opt/modeling_vllm_opt.py @@ -19,5 +19,8 @@ class VLLMOPT(openllm.LLM['vllm.LLMEngine', 'transformers.GPT2Tokenizer']): use_default_prompt_template: bool = True, **attrs: t.Any) -> tuple[str, dict[str, t.Any], dict[str, t.Any]]: return process_prompt(prompt, DEFAULT_PROMPT_TEMPLATE, use_default_prompt_template, **attrs), { - 'max_new_tokens': max_new_tokens, 'temperature': temperature, 'top_k': top_k, 'num_return_sequences': num_return_sequences + 'max_new_tokens': max_new_tokens, + 'temperature': temperature, + 'top_k': top_k, + 'num_return_sequences': num_return_sequences }, {} diff --git a/openllm-python/tests/configuration_test.py b/openllm-python/tests/configuration_test.py index a0262a6e..1d913c3d 100644 --- a/openllm-python/tests/configuration_test.py +++ b/openllm-python/tests/configuration_test.py @@ -34,11 +34,13 @@ def test_missing_default(): make_llm_config('MissingArchitecture', {'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing'], 'requirements': ['bentoml'],},) def test_forbidden_access(): - cl_ = make_llm_config( - 'ForbiddenAccess', { - 'default_id': 'huggingface/t5-tiny-testing', 'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'], 'architecture': 'PreTrainedModel', 'requirements': ['bentoml'], - }, - ) + cl_ = make_llm_config('ForbiddenAccess', { + 'default_id': 'huggingface/t5-tiny-testing', + 'model_ids': ['huggingface/t5-tiny-testing', 'bentoml/t5-tiny-testing'], + 'architecture': 'PreTrainedModel', + 'requirements': ['bentoml'], + }, + ) assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), '__config__',) assert pytest.raises(openllm.exceptions.ForbiddenAttributeError, cl_.__getattribute__, cl_(), 'GenerationConfig',) @@ -128,7 +130,9 @@ def test_struct_envvar_with_overwrite_provided_env(monkeypatch: pytest.MonkeyPat mk.setenv(field_env_key('field1'), str(4.0)) mk.setenv(field_env_key('temperature', suffix='generation'), str(0.2)) sent = make_llm_config('OverwriteWithEnvAvailable', { - 'default_id': 'asdfasdf', 'model_ids': ['asdf', 'asdfasdfads'], 'architecture': 'PreTrainedModel' + 'default_id': 'asdfasdf', + 'model_ids': ['asdf', 'asdfasdfads'], + 'architecture': 'PreTrainedModel' }, fields=(('field1', 'float', 3.0),), ).model_construct_env(field1=20.0, temperature=0.4) diff --git a/openllm-python/tests/models/conftest.py b/openllm-python/tests/models/conftest.py index 24b29eb6..0e2a42ce 100644 --- a/openllm-python/tests/models/conftest.py +++ b/openllm-python/tests/models/conftest.py @@ -196,7 +196,8 @@ def _container_handle(model: str, model_id: str, image_tag: str, deployment_mode detach=True, device_requests=devs, ports={ - '3000/tcp': port, '3001/tcp': prom_port + '3000/tcp': port, + '3001/tcp': prom_port }, ) diff --git a/pyproject.toml b/pyproject.toml index 8a6b57f7..2db7a2b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -273,17 +273,6 @@ ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT = true ALLOW_MULTILINE_DICTIONARY_KEYS = false ALLOW_SPLIT_BEFORE_DICT_VALUE = false COALESCE_BRACKETS = true -NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS = true -SPACES_AROUND_SUBSCRIPT_COLON = false -SPACES_AROUND_DICT_DELIMITERS = false -SPACES_AROUND_LIST_DELIMITERS = false -SPACES_AROUND_POWER_OPERATOR = false -SPACES_AROUND_TUPLE_DELIMITERS = false -SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = false -SPACE_INSIDE_BRACKETS = false -SPLIT_ALL_COMMA_SEPARATED_VALUES = false -SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES = true -SPLIT_BEFORE_DOT = true [tool.pytest.ini_options] addopts = ["-rfEX", "-pno:warnings", "--snapshot-warn-unused"] diff --git a/tools/dependencies.py b/tools/dependencies.py index 37d6f873..c7b25751 100755 --- a/tools/dependencies.py +++ b/tools/dependencies.py @@ -168,8 +168,7 @@ def create_classifiers() -> Array: Classifier.create_classifier('audience', 'Developers'), Classifier.create_classifier('audience', 'Science/Research'), Classifier.create_classifier('audience', 'System Administrators'), - Classifier.create_classifier('typing', 'Typed'), - *Classifier.create_python_classifier(), + Classifier.create_classifier('typing', 'Typed'), *Classifier.create_python_classifier(), ]) return arr.multiline(True) @@ -216,23 +215,8 @@ def authors() -> Array: def keywords() -> Array: arr = correct_style(tomlkit.array()) arr.extend([ - 'MLOps', - 'AI', - 'BentoML', - 'Model Serving', - 'Model Deployment', - 'LLMOps', - 'Falcon', - 'Vicuna', - 'Llama 2', - 'Fine tuning', - 'Serverless', - 'Large Language Model', - 'Generative AI', - 'StableLM', - 'Alpaca', - 'PyTorch', - 'Transformers' + 'MLOps', 'AI', 'BentoML', 'Model Serving', 'Model Deployment', 'LLMOps', 'Falcon', 'Vicuna', 'Llama 2', 'Fine tuning', 'Serverless', 'Large Language Model', 'Generative AI', 'StableLM', + 'Alpaca', 'PyTorch', 'Transformers' ]) return arr.multiline(True) diff --git a/tools/update-brew-tap.py b/tools/update-brew-tap.py index de11e05b..892500d6 100755 --- a/tools/update-brew-tap.py +++ b/tools/update-brew-tap.py @@ -16,7 +16,9 @@ _OWNER = 'bentoml' _REPO = 'openllm' _gz_strategies: dict[t.Literal['macos_arm', 'macos_intel', 'linux_intel'], str] = { - 'macos_arm': 'aarch64-apple-darwin', 'macos_intel': 'x86_64-apple-darwin', 'linux_intel': 'x86_64-unknown-linux-musl' + 'macos_arm': 'aarch64-apple-darwin', + 'macos_intel': 'x86_64-apple-darwin', + 'linux_intel': 'x86_64-unknown-linux-musl' } def determine_release_url(svn_url: str, tag: str, target: t.Literal['macos_arm', 'macos_intel', 'linux_intel', 'archive']) -> str: diff --git a/tools/update-config-stubs.py b/tools/update-config-stubs.py index 616823f9..b97da4ed 100755 --- a/tools/update-config-stubs.py +++ b/tools/update-config-stubs.py @@ -103,11 +103,8 @@ def main() -> int: lines.append(' ' * 2 + '# NOTE: generation_class, sampling_class and extras arguments\n') lines.extend([ ' ' * 2 + line for line in [ - '@overload\n', - "def __getitem__(self, item: t.Literal['generation_class']) -> t.Type[openllm_core.GenerationConfig]: ...\n", - '@overload\n', - "def __getitem__(self, item: t.Literal['sampling_class']) -> t.Type[openllm_core.SamplingParams]: ...\n", - '@overload\n', + '@overload\n', "def __getitem__(self, item: t.Literal['generation_class']) -> t.Type[openllm_core.GenerationConfig]: ...\n", '@overload\n', + "def __getitem__(self, item: t.Literal['sampling_class']) -> t.Type[openllm_core.SamplingParams]: ...\n", '@overload\n', "def __getitem__(self, item: t.Literal['extras']) -> t.Dict[str, t.Any]: ...\n", ] ]) diff --git a/tools/update-dummy.py b/tools/update-dummy.py index 28fbdb99..e1a405d3 100755 --- a/tools/update-dummy.py +++ b/tools/update-dummy.py @@ -33,18 +33,14 @@ def make_class_stub(model_name: str, backend: LiteralBackend, indentation: int = if auto: cl_ = _auto[backend] else: cl_ = get_mapping(backend)[model_name] lines = [ - f'class {cl_}(metaclass=_DummyMetaclass):', - ' ' * indentation + f"_backends=[{','.join(_dep_list)}]", + f'class {cl_}(metaclass=_DummyMetaclass):', ' ' * indentation + f"_backends=[{','.join(_dep_list)}]", ' ' * indentation + f"def __init__(self,*param_decls:_t.Any,**attrs: _t.Any):_require_backends(self,[{','.join(_dep_list)}])" ] return lines def write_stub(backend: LiteralBackend, _path: str) -> list[str]: base = [ - f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!', - f'# To update this, run ./{_path}', - 'from __future__ import annotations', - 'import typing as _t', + f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!', f'# To update this, run ./{_path}', 'from __future__ import annotations', 'import typing as _t', 'from openllm_core.utils import DummyMetaclass as _DummyMetaclass, require_backends as _require_backends', ] base.extend([v for it in [make_class_stub(k, backend) for k in get_mapping(backend)] for v in it]) diff --git a/tools/update-models-import.py b/tools/update-models-import.py index 709b6f5c..dcc0426b 100755 --- a/tools/update-models-import.py +++ b/tools/update-models-import.py @@ -12,23 +12,16 @@ def create_module_import() -> str: def create_stubs_import() -> list[str]: return [ 'if t.TYPE_CHECKING:from . import ' + ','.join([f'{p.name} as {p.name}' for p in sorted(_TARGET_FILE.parent.glob('*/')) if p.name not in {'__pycache__', '__init__.py', '.DS_Store'}]), - '__lazy=LazyModule(__name__, globals()["__file__"], {k: [] for k in _MODELS})', - '__all__=__lazy.__all__', - '__dir__=__lazy.__dir__', - '__getattr__=__lazy.__getattr__\n' + '__lazy=LazyModule(__name__, globals()["__file__"], {k: [] for k in _MODELS})', '__all__=__lazy.__all__', '__dir__=__lazy.__dir__', '__getattr__=__lazy.__getattr__\n' ] def main() -> int: _path = os.path.join(os.path.basename(os.path.dirname(__file__)), os.path.basename(__file__)) with _TARGET_FILE.open('w') as f: f.writelines('\n'.join([ - f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!', - f'# To update this, run ./{_path}', - 'from __future__ import annotations', - 'import typing as t', + f'# This file is generated by {_path}. DO NOT EDIT MANUALLY!', f'# To update this, run ./{_path}', 'from __future__ import annotations', 'import typing as t', 'from openllm_core.utils import LazyModule', - create_module_import(), - *create_stubs_import(), + create_module_import(), *create_stubs_import(), ])) return 0