experimental: Cohere compatible endpoints. (#644)

* feat: add generate endpoint

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: update generation

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* fix(cohere): generate endpoints

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: --wip--

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* feat: update testing clients and chat implementation

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>

* chore: disable schemas for easter eggs

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
Signed-off-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-11-14 01:07:43 -05:00
committed by GitHub
parent 0bf6ec7537
commit b0ab8ccdf6
8 changed files with 638 additions and 16 deletions

View File

@@ -396,7 +396,7 @@ produces:
summary: Describes a model offering that can be used with the API.
tags:
- HF
x-bentoml-name: adapters_map
x-bentoml-name: hf_adapters
responses:
200:
description: Return list of LoRA adapters.
@@ -416,6 +416,65 @@ responses:
$ref: '#/components/schemas/HFErrorResponse'
description: Not Found
"""
COHERE_GENERATE_SCHEMA = """\
---
consumes:
- application/json
description: >-
Given a prompt, the model will return one or more predicted completions, and
can also return the probabilities of alternative tokens at each position.
operationId: cohere__generate
produces:
- application/json
tags:
- Cohere
x-bentoml-name: cohere_generate
summary: Creates a completion for the provided prompt and parameters.
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CohereGenerateRequest'
examples:
one-shot:
summary: One-shot input example
value:
prompt: This is a test
max_tokens: 256
temperature: 0.7
p: 0.43
k: 12
num_generations: 2
stream: false
streaming:
summary: Streaming input example
value:
prompt: This is a test
max_tokens: 256
temperature: 0.7
p: 0.43
k: 12
num_generations: 2
stream: true
stop_sequences:
- "\\n"
- "<|endoftext|>"
"""
COHERE_CHAT_SCHEMA = """\
---
consumes:
- application/json
description: >-
Given a list of messages comprising a conversation, the model will return a response.
operationId: cohere__chat
produces:
- application/json
tags:
- Cohere
x-bentoml-name: cohere_chat
summary: Creates a model response for the given chat conversation.
"""
_SCHEMAS = {k[:-7].lower(): v for k, v in locals().items() if k.endswith('_SCHEMA')}
@@ -485,12 +544,15 @@ class OpenLLMSchemaGenerator(SchemaGenerator):
def get_generator(
title: str, components: list[type[AttrsInstance]] | None = None, tags: list[dict[str, t.Any]] | None = None
title: str,
components: list[type[AttrsInstance]] | None = None,
tags: list[dict[str, t.Any]] | None = None,
inject: bool = True,
) -> OpenLLMSchemaGenerator:
base_schema: dict[str, t.Any] = dict(info={'title': title, 'version': API_VERSION}, version=OPENAPI_VERSION)
if components:
if components and inject:
base_schema['components'] = {'schemas': {c.__name__: component_schema_generator(c) for c in components}}
if tags is not None and tags:
if tags is not None and tags and inject:
base_schema['tags'] = tags
return OpenLLMSchemaGenerator(base_schema)