import functools import inspect import types import typing as t import attr from starlette.routing import Host, Mount, Route from starlette.schemas import EndpointInfo, SchemaGenerator from openllm_core.utils import first_not_none OPENAPI_VERSION, API_VERSION = '3.0.2', '1.0' # NOTE: OpenAI schema LIST_MODELS_SCHEMA = '''\ --- consumes: - application/json description: > List and describe the various models available in the API. You can refer to the available supported models with `openllm models` for more information. operationId: openai__list_models produces: - application/json summary: Describes a model offering that can be used with the API. tags: - OpenAI x-bentoml-name: list_models responses: 200: description: The Model object content: application/json: example: object: 'list' data: - id: __model_id__ object: model created: 1686935002 owned_by: 'na' schema: $ref: '#/components/schemas/ModelList' ''' CHAT_COMPLETIONS_SCHEMA = '''\ --- consumes: - application/json description: >- Given a list of messages comprising a conversation, the model will return a response. operationId: openai__chat_completions produces: - application/json tags: - OpenAI x-bentoml-name: create_chat_completions summary: Creates a model response for the given chat conversation. requestBody: required: true content: application/json: examples: one-shot: summary: One-shot input example value: messages: __chat_messages__ model: __model_id__ max_tokens: 256 temperature: 0.7 top_p: 0.43 n: 1 stream: false chat_template: __chat_template__ add_generation_prompt: __add_generation_prompt__ echo: false streaming: summary: Streaming input example value: messages: - role: system content: You are a helpful assistant. - role: user content: Hello, I'm looking for a chatbot that can help me with my work. model: __model_id__ max_tokens: 256 temperature: 0.7 top_p: 0.43 n: 1 stream: true stop: - "\\n" - "<|endoftext|>" chat_template: __chat_template__ add_generation_prompt: __add_generation_prompt__ echo: false schema: $ref: '#/components/schemas/ChatCompletionRequest' responses: 200: description: OK content: application/json: schema: $ref: '#/components/schemas/ChatCompletionResponse' examples: streaming: summary: Streaming output example value: > {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0613","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]} one-shot: summary: One-shot output example value: > {"id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, "model": "gpt-3.5-turbo-0613", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Hello there, how may I assist you today?"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21}} 404: content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' examples: wrong-model: summary: Wrong model value: > { "error": { "message": "Model 'meta-llama--Llama-2-13b-chat-hf' does not exists. Try 'GET /v1/models' to see available models.\\nTip: If you are migrating from OpenAI, make sure to update your 'model' parameters in the request.", "type": "invalid_request_error", "object": "error", "param": null, "code": 404 } } description: NotFound 500: content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' examples: invalid-parameters: summary: Invalid parameters value: > { "error": { "message": "`top_p` has to be a float > 0 and < 1, but is 4.0", "type": "invalid_request_error", "object": "error", "param": null, "code": 500 } } description: Internal Server Error 400: content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' examples: invalid-json: summary: Invalid JSON sent value: > { "error": { "message": "Invalid JSON input received (Check server log).", "type": "invalid_request_error", "object": "error", "param": null, "code": 400 } } invalid-prompt: summary: Invalid prompt value: > { "error": { "message": "Please provide a prompt.", "type": "invalid_request_error", "object": "error", "param": null, "code": 400 } } description: Bad Request ''' COMPLETIONS_SCHEMA = '''\ --- consumes: - application/json description: >- Given a prompt, the model will return one or more predicted completions, and can also return the probabilities of alternative tokens at each position. We recommend most users use our Chat completions API. operationId: openai__completions produces: - application/json tags: - OpenAI x-bentoml-name: create_completions summary: Creates a completion for the provided prompt and parameters. requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/CompletionRequest' examples: one-shot: summary: One-shot input example value: prompt: This is a test model: __model_id__ max_tokens: 256 temperature: 0.7 logprobs: 1 top_p: 0.43 n: 1 stream: false streaming: summary: Streaming input example value: prompt: This is a test model: __model_id__ max_tokens: 256 temperature: 0.7 top_p: 0.43 logprobs: 1 n: 1 stream: true stop: - "\\n" - "<|endoftext|>" responses: 200: description: OK content: application/json: schema: $ref: '#/components/schemas/CompletionResponse' examples: one-shot: summary: One-shot output example value: id: cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7 object: text_completion created: 1589478378 model: VAR_model_id choices: - text: This is indeed a test index: 0 logprobs: null finish_reason: length usage: prompt_tokens: 5 completion_tokens: 7 total_tokens: 12 streaming: summary: Streaming output example value: id: cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe object: text_completion created: 1690759702 choices: - text: This index: 0 logprobs: null finish_reason: null model: gpt-3.5-turbo-instruct 404: content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' examples: wrong-model: summary: Wrong model value: > { "error": { "message": "Model 'meta-llama--Llama-2-13b-chat-hf' does not exists. Try 'GET /v1/models' to see available models.\\nTip: If you are migrating from OpenAI, make sure to update your 'model' parameters in the request.", "type": "invalid_request_error", "object": "error", "param": null, "code": 404 } } description: NotFound 500: content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' examples: invalid-parameters: summary: Invalid parameters value: > { "error": { "message": "`top_p` has to be a float > 0 and < 1, but is 4.0", "type": "invalid_request_error", "object": "error", "param": null, "code": 500 } } description: Internal Server Error 400: content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' examples: invalid-json: summary: Invalid JSON sent value: > { "error": { "message": "Invalid JSON input received (Check server log).", "type": "invalid_request_error", "object": "error", "param": null, "code": 400 } } invalid-prompt: summary: Invalid prompt value: > { "error": { "message": "Please provide a prompt.", "type": "invalid_request_error", "object": "error", "param": null, "code": 400 } } description: Bad Request ''' HF_AGENT_SCHEMA = '''\ --- consumes: - application/json description: Generate instruction for given HF Agent chain for all OpenLLM supported models. operationId: hf__agent summary: Generate instruction for given HF Agent. tags: - HF x-bentoml-name: hf_agent produces: - application/json requestBody: content: application/json: schema: $ref: '#/components/schemas/AgentRequest' example: inputs: "Is the following `text` positive or negative?" parameters: text: "This is a positive text." stop: ["\n"] required: true responses: 200: description: Successfull generated instruction. content: application/json: example: - generated_text: "This is a generated instruction." schema: $ref: '#/components/schemas/AgentResponse' 400: content: application/json: schema: $ref: '#/components/schemas/HFErrorResponse' description: Bad Request 500: content: application/json: schema: $ref: '#/components/schemas/HFErrorResponse' description: Not Found ''' HF_ADAPTERS_SCHEMA = '''\ --- consumes: - application/json description: Return current list of adapters for given LLM. operationId: hf__adapters_map produces: - application/json summary: Describes a model offering that can be used with the API. tags: - HF x-bentoml-name: hf_adapters responses: 200: description: Return list of LoRA adapters. content: application/json: example: aarnphm/opt-6-7b-quotes: adapter_name: default adapter_type: LORA aarnphm/opt-6-7b-dolly: adapter_name: dolly adapter_type: LORA 500: content: application/json: schema: $ref: '#/components/schemas/HFErrorResponse' description: Not Found ''' COHERE_GENERATE_SCHEMA = '''\ --- consumes: - application/json description: >- Given a prompt, the model will return one or more predicted completions, and can also return the probabilities of alternative tokens at each position. operationId: cohere__generate produces: - application/json tags: - Cohere x-bentoml-name: cohere_generate summary: Creates a completion for the provided prompt and parameters. requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/CohereGenerateRequest' examples: one-shot: summary: One-shot input example value: prompt: This is a test max_tokens: 256 temperature: 0.7 p: 0.43 k: 12 num_generations: 2 stream: false streaming: summary: Streaming input example value: prompt: This is a test max_tokens: 256 temperature: 0.7 p: 0.43 k: 12 num_generations: 2 stream: true stop_sequences: - "\\n" - "<|endoftext|>" ''' COHERE_CHAT_SCHEMA = '''\ --- consumes: - application/json description: >- Given a list of messages comprising a conversation, the model will return a response. operationId: cohere__chat produces: - application/json tags: - Cohere x-bentoml-name: cohere_chat summary: Creates a model response for the given chat conversation. ''' _SCHEMAS = {k[:-7].lower(): v for k, v in locals().items() if k.endswith('_SCHEMA')} def apply_schema(func, **attrs): for k, v in attrs.items(): func.__doc__ = func.__doc__.replace(k, v) return func def add_schema_definitions(func): append_str = _SCHEMAS.get(func.__name__.lower(), '') if not append_str: return func if func.__doc__ is None: func.__doc__ = '' func.__doc__ = func.__doc__.strip() + '\n\n' + append_str.strip() return func class OpenLLMSchemaGenerator(SchemaGenerator): def get_endpoints(self, routes): endpoints_info = [] for route in routes: if isinstance(route, (Mount, Host)): routes = route.routes or [] path = self._remove_converter(route.path) if isinstance(route, Mount) else '' sub_endpoints = [ EndpointInfo(path=f'{path}{sub_endpoint.path}', http_method=sub_endpoint.http_method, func=sub_endpoint.func) for sub_endpoint in self.get_endpoints(routes) ] endpoints_info.extend(sub_endpoints) elif not isinstance(route, Route) or not route.include_in_schema: continue elif ( inspect.isfunction(route.endpoint) or inspect.ismethod(route.endpoint) or isinstance(route.endpoint, functools.partial) ): endpoint = route.endpoint.func if isinstance(route.endpoint, functools.partial) else route.endpoint path = self._remove_converter(route.path) for method in route.methods or ['GET']: if method == 'HEAD': continue endpoints_info.append(EndpointInfo(path, method.lower(), endpoint)) else: path = self._remove_converter(route.path) for method in ['get', 'post', 'put', 'patch', 'delete', 'options']: if not hasattr(route.endpoint, method): continue func = getattr(route.endpoint, method) endpoints_info.append(EndpointInfo(path, method.lower(), func)) return endpoints_info def get_schema(self, routes, mount_path=None): schema = dict(self.base_schema) schema.setdefault('paths', {}) endpoints_info = self.get_endpoints(routes) if mount_path: mount_path = f'/{mount_path}' if not mount_path.startswith('/') else mount_path for endpoint in endpoints_info: parsed = self.parse_docstring(endpoint.func) if not parsed: continue path = endpoint.path if mount_path is None else mount_path + endpoint.path if path not in schema['paths']: schema['paths'][path] = {} schema['paths'][path][endpoint.http_method] = parsed return schema def get_generator(title, components=None, tags=None, inject=True): base_schema = {'info': {'title': title, 'version': API_VERSION}, 'version': OPENAPI_VERSION} if components and inject: base_schema['components'] = {'schemas': {c.__name__: component_schema_generator(c) for c in components}} if tags is not None and tags and inject: base_schema['tags'] = tags return OpenLLMSchemaGenerator(base_schema) def component_schema_generator(attr_cls, description=None): schema = {'type': 'object', 'required': [], 'properties': {}, 'title': attr_cls.__name__} schema['description'] = first_not_none( getattr(attr_cls, '__doc__', None), description, default=f'Generated components for {attr_cls.__name__}' ) for field in attr.fields(attr.resolve_types(attr_cls)): attr_type = field.type origin_type = t.get_origin(attr_type) args_type = t.get_args(attr_type) # Map Python types to OpenAPI schema types if attr_type == str: schema_type = 'string' elif attr_type == int: schema_type = 'integer' elif attr_type == float: schema_type = 'number' elif attr_type == bool: schema_type = 'boolean' elif origin_type is list or origin_type is tuple: schema_type = 'array' elif origin_type is dict: schema_type = 'object' # Assuming string keys for simplicity, and handling Any type for values prop_schema = {'type': 'object', 'additionalProperties': True if args_type[1] is t.Any else {'type': 'string'}} elif attr_type == t.Optional[str]: schema_type = 'string' elif origin_type is t.Union and t.Any in args_type: schema_type = 'object' prop_schema = {'type': 'object', 'additionalProperties': True} else: schema_type = 'string' if 'prop_schema' not in locals(): prop_schema = {'type': schema_type} if field.default is not attr.NOTHING and not isinstance(field.default, attr.Factory): prop_schema['default'] = field.default if field.default is attr.NOTHING and not isinstance(attr_type, type(t.Optional)): schema['required'].append(field.name) schema['properties'][field.name] = prop_schema locals().pop('prop_schema', None) return schema _SimpleSchema = types.new_class( '_SimpleSchema', (object,), {}, lambda ns: ns.update({'__init__': lambda self, it: setattr(self, 'it', it), 'asdict': lambda self: self.it}), ) def append_schemas(svc, generated_schema, tags_order='prepend', inject=True): # HACK: Dirty hack to append schemas to existing service. We def need to support mounting Starlette app OpenAPI spec. from bentoml._internal.service.openapi.specification import OpenAPISpecification if not inject: return svc svc_schema = svc.openapi_spec if isinstance(svc_schema, (OpenAPISpecification, _SimpleSchema)): svc_schema = svc_schema.asdict() if 'tags' in generated_schema: if tags_order == 'prepend': svc_schema['tags'] = generated_schema['tags'] + svc_schema['tags'] elif tags_order == 'append': svc_schema['tags'].extend(generated_schema['tags']) else: raise ValueError(f'Invalid tags_order: {tags_order}') if 'components' in generated_schema: svc_schema['components']['schemas'].update(generated_schema['components']['schemas']) svc_schema['paths'].update(generated_schema['paths']) # HACK: mk this attribute until we have a better way to add starlette schemas. from bentoml._internal.service import openapi def _generate_spec(svc, openapi_version=OPENAPI_VERSION): return _SimpleSchema(svc_schema) def asdict(self): return svc_schema openapi.generate_spec = _generate_spec OpenAPISpecification.asdict = asdict return svc