From 1539c3f7dc35bf60c2431316a64f11ac7aba9988 Mon Sep 17 00:00:00 2001 From: Aaron Pham <29749331+aarnphm@users.noreply.github.com> Date: Thu, 12 Oct 2023 17:21:54 -0400 Subject: [PATCH] feat(client): simple implementation and streaming (#256) --- .editorconfig | 2 + .gitattributes | 4 + cz.py | 9 +- hatch.toml | 1 + openllm-client/README.md | 7 +- openllm-client/dev.Dockerfile | 98 +++ openllm-client/generate-grpc-stubs | 66 ++ openllm-client/protos/service.proto | 297 +++++++++ openllm-client/pyproject.toml | 12 +- openllm-client/src/openllm_client/__init__.py | 12 +- openllm-client/src/openllm_client/_base.py | 298 --------- openllm-client/src/openllm_client/_http.py | 137 ++++ openllm-client/src/openllm_client/_schemas.py | 24 + .../src/openllm_client/benmin/__init__.py | 124 ---- .../src/openllm_client/benmin/_grpc.py | 291 -------- .../src/openllm_client/benmin/_http.py | 201 ------ openllm-client/src/openllm_client/client.py | 36 - .../src/openllm_client/pb/__init__.py | 2 + .../{_adapters.py => pb/v1/__init__.py} | 0 .../pb/v1/_generated_pb3/__init__.py | 0 .../pb/v1/_generated_pb3/service_pb2.py | 205 ++++++ .../pb/v1/_generated_pb3/service_pb2.pyi | 611 +++++++++++++++++ .../pb/v1/_generated_pb3/service_pb2_grpc.py | 104 +++ .../pb/v1/_generated_pb3/service_pb2_grpc.pyi | 42 ++ .../pb/v1/_generated_pb4/__init__.py | 0 .../pb/v1/_generated_pb4/service_pb2.py | 84 +++ .../pb/v1/_generated_pb4/service_pb2.pyi | 624 ++++++++++++++++++ .../pb/v1/_generated_pb4/service_pb2_grpc.py | 104 +++ .../pb/v1/_generated_pb4/service_pb2_grpc.pyi | 67 ++ .../src/openllm_client/pb/v1/service_pb2.py | 6 + .../src/openllm_client/pb/v1/service_pb2.pyi | 6 + .../openllm_client/pb/v1/service_pb2_grpc.py | 6 + .../openllm_client/pb/v1/service_pb2_grpc.pyi | 6 + openllm-core/src/openllm_core/_schema.py | 2 + .../src/openllm_core/_typing_compat.py | 2 + .../config/configuration_baichuan.py | 8 + .../config/configuration_llama.py | 10 +- openllm-python/src/openllm/_llm.py | 12 +- openllm-python/src/openllm/_service.py | 19 +- openllm-python/src/openllm/cli/entrypoint.py | 32 +- openllm-python/src/openllm/client.py | 6 +- pyproject.toml | 1 + 42 files changed, 2581 insertions(+), 997 deletions(-) create mode 100644 openllm-client/dev.Dockerfile create mode 100755 openllm-client/generate-grpc-stubs create mode 100644 openllm-client/protos/service.proto delete mode 100644 openllm-client/src/openllm_client/_base.py create mode 100644 openllm-client/src/openllm_client/_http.py create mode 100644 openllm-client/src/openllm_client/_schemas.py delete mode 100644 openllm-client/src/openllm_client/benmin/__init__.py delete mode 100644 openllm-client/src/openllm_client/benmin/_grpc.py delete mode 100644 openllm-client/src/openllm_client/benmin/_http.py delete mode 100644 openllm-client/src/openllm_client/client.py create mode 100644 openllm-client/src/openllm_client/pb/__init__.py rename openllm-client/src/openllm_client/{_adapters.py => pb/v1/__init__.py} (100%) create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb3/__init__.py create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.py create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.pyi create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.py create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.pyi create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb4/__init__.py create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.py create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.pyi create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.py create mode 100644 openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.pyi create mode 100644 openllm-client/src/openllm_client/pb/v1/service_pb2.py create mode 100644 openllm-client/src/openllm_client/pb/v1/service_pb2.pyi create mode 100644 openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.py create mode 100644 openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.pyi diff --git a/.editorconfig b/.editorconfig index 3fcb4be2..0d53e419 100644 --- a/.editorconfig +++ b/.editorconfig @@ -11,3 +11,5 @@ indent_size = 2 [openllm-python/src/openllm/cli/entrypoint.py] indent_size = unset +[openllm-client/src/openllm_client/pb/v1/*] +indent_size = unset diff --git a/.gitattributes b/.gitattributes index a6e8db87..f25ee99a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,6 +2,10 @@ contrib/clojure/pnpm-lock.yaml linguist-generated=true contrib/clojure/src/generated/** linguist-generated=true +# Python Client +*_pb2*.py linguist-generated=true +*_pb2*.pyi linguist-generated=true + # Python sdk openllm-python/tests/models/__snapshots__/* linguist-generated=true openllm-python/src/openllm/utils/dummy_*.py linguist-generated=true diff --git a/cz.py b/cz.py index a1fd3cb4..ac604cfb 100755 --- a/cz.py +++ b/cz.py @@ -20,10 +20,13 @@ def run_cz(dir: str, package: str): tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST] token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens])) table.append([filepath.replace(os.path.join(dir, 'src'), ''), line_count, token_count / line_count if line_count != 0 else 0]) + print(f'\n{"=" * 80}\n') print(tabulate([headers, *sorted(table, key=lambda x: -x[1])], headers='firstrow', floatfmt='.1f') + '\n') - for dir_name, group in itertools.groupby(sorted([(x[0].rsplit('/', 1)[0], x[1]) for x in table]), key=lambda x: x[0]): - print(f'{dir_name:35s} : {sum([x[1] for x in group]):6d}') - print(f'\ntotal line count: {sum([x[1] for x in table])}') + print( + tabulate([(dir_name, sum([x[1] for x in group])) for dir_name, group in itertools.groupby(sorted([(x[0].rsplit('/', 1)[0], x[1]) for x in table]), key=lambda x: x[0])], + headers=['Directory', 'LOC'], + floatfmt='.1f')) + print(f'total line count for {package}: {sum([x[1] for x in table])}\n') def main() -> int: run_cz('openllm-python', 'openllm') diff --git a/hatch.toml b/hatch.toml index 87bdaf16..10162e77 100644 --- a/hatch.toml +++ b/hatch.toml @@ -95,6 +95,7 @@ clojure = ["bash openllm-contrib/clojure/run-clojure-ui.sh"] [envs.ci] detached = true [envs.ci.scripts] +client-stubs = "bash openllm-client/generate-grpc-stubs" compile = "bash ./compile.sh {args}" recompile = ["bash ./clean.sh", "compile"] edi = "bash local.sh" diff --git a/openllm-client/README.md b/openllm-client/README.md index 417dc2e7..c20d4db7 100644 --- a/openllm-client/README.md +++ b/openllm-client/README.md @@ -45,12 +45,13 @@ This package holds the underlying client implementation for OpenLLM. If you are coming from OpenLLM, the client can be accessed via `openllm.client`. It provides somewhat of a "similar" APIs to [`bentoml.Client`](https://docs.bentoml.com/en/latest/guides/client.html) -(via `openllm_client.benmin`) for interacting with OpenLLM server. This can also be extended to use with general +(via `openllm_client.min`) for interacting with OpenLLM server. This can also be extended to use with general BentoML server as well. > [!NOTE] -> The component of interop with generic BentoML server will be considered as experimental that will/can be merged back to BentoML. -> If you are just using this package for interacting with OpenLLM server, nothing should change from `openllm.client` namespace. +> The component of interop with generic BentoML server will be considered as _EXPERIMENTAL_ and +> will be refactored to new client implementation soon! +> If you are just using this package for interacting with OpenLLM server, The API should be the same as `openllm.client` namespace. ```python import openllm diff --git a/openllm-client/dev.Dockerfile b/openllm-client/dev.Dockerfile new file mode 100644 index 00000000..211e1fc0 --- /dev/null +++ b/openllm-client/dev.Dockerfile @@ -0,0 +1,98 @@ +# syntax=docker/dockerfile-upstream:master + +FROM python:3.10-slim as base + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /workspace + +RUN --mount=type=cache,target=/var/lib/apt \ + --mount=type=cache,target=/var/cache/apt \ + apt-get update && \ + apt-get install -q -y --no-install-recommends --allow-remove-essential \ + bash build-essential ca-certificates git tree + +FROM base as protobuf-3 + +COPY <<-EOT requirements.txt + protobuf>=3.5.0,<4.0dev + grpcio-tools + mypy-protobuf +EOT + +RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt + +FROM base as protobuf-4 + +COPY <<-EOT requirements.txt + protobuf>=4.0,<5.0dev + grpcio-tools + mypy-protobuf +EOT + +RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt + +############################################ + +# BentoML gRPC protobuf 3 generation + +FROM protobuf-3 as run-grpcio-tools-3 + +ARG PROTOCOL_VERSION +ARG GENERATED_PB3_DIR + +RUN mkdir -p /result/${GENERATED_PB3_DIR} + +RUN --mount=type=bind,target=.,rw < /dev/null 2>&1; then + set -x + docker buildx "$@" $progress + elif buildx version > /dev/null 2>&1; then + buildx "$@" $progress + else + echo "Make sure to have Docker Buildx installed." + exit 1 + fi +} + +GIT_ROOT=$(git rev-parse --show-toplevel) + +cd "$GIT_ROOT/openllm-client" || exit 1 + +main() { + VERSION="${1:-v1}" + echo "Generating gRPC stubs for $VERSION..." + + mkdir -p "src/openllm_client/pb/${VERSION}" + + GENERATED_PB3_DIR="src/openllm_client/pb/${VERSION}/_generated_pb3" + \rm -rf "$GENERATED_PB3_DIR" + buildxCmd build --build-arg PROTOCOL_VERSION="$VERSION" \ + --build-arg BUILDKIT_CONTEXT_KEEP_GIT_DIR=1 \ + --build-arg GENERATED_PB3_DIR="${GENERATED_PB3_DIR}" \ + --target "protobuf-3-output" --output "type=local,dest=${GENERATED_PB3_DIR}" --file "dev.Dockerfile" . + + GENERATED_PB4_DIR="src/openllm_client/pb/${VERSION}/_generated_pb4" + \rm -rf "$GENERATED_PB4_DIR" + buildxCmd build --build-arg PROTOCOL_VERSION="$VERSION" \ + --build-arg BUILDKIT_CONTEXT_KEEP_GIT_DIR=1 \ + --build-arg GENERATED_PB4_DIR="${GENERATED_PB4_DIR}" \ + --target "protobuf-4-output" --output "type=local,dest=${GENERATED_PB4_DIR}" --file "dev.Dockerfile" . + touch "src/openllm_client/pb/${VERSION}/__init__.py" + files=("service_pb2.py" "service_pb2.pyi" "service_pb2_grpc.py" "service_pb2_grpc.pyi") + for file in "${files[@]}";do + module="${file%.*}" + cat < "src/openllm_client/pb/${VERSION}/${file}" +from __future__ import annotations +from google.protobuf import __version__ +if __version__.startswith("4"): + from ._generated_pb4.$module import * +else: + from ._generated_pb3.$module import * +EOF + done +} + +if [ "${#}" -gt 1 ]; then + echo "$0 takes one optional argument. Usage: $0 [v1]" + exit 1 +fi +main "$@" diff --git a/openllm-client/protos/service.proto b/openllm-client/protos/service.proto new file mode 100644 index 00000000..be24c8ec --- /dev/null +++ b/openllm-client/protos/service.proto @@ -0,0 +1,297 @@ +// Vendorred from: https://github.com/bentoml/BentoML/blob/main/src/bentoml/grpc/v1/service.proto +syntax = "proto3"; + +package bentoml.grpc.v1; + +import "google/protobuf/struct.proto"; +import "google/protobuf/wrappers.proto"; + +// cc_enable_arenas pre-allocate memory for given message to improve speed. (C++ only) +option cc_enable_arenas = true; +option go_package = "github.com/bentoml/bentoml/grpc/v1;service"; +option java_multiple_files = true; +option java_outer_classname = "ServiceProto"; +option java_package = "com.bentoml.grpc.v1"; +option objc_class_prefix = "SVC"; +option py_generic_services = true; + +// a gRPC BentoServer. +service BentoService { + // Call handles methodcaller of given API entrypoint. + rpc Call(Request) returns (Response) {} + // ServiceMetadata returns metadata of bentoml.Service. + rpc ServiceMetadata(ServiceMetadataRequest) returns (ServiceMetadataResponse) {} +} + +// ServiceMetadataRequest message doesn't take any arguments. +message ServiceMetadataRequest {} + +// ServiceMetadataResponse returns metadata of bentoml.Service. +// Currently it includes name, version, apis, and docs. +message ServiceMetadataResponse { + // DescriptorMetadata is a metadata of any given IODescriptor. + message DescriptorMetadata { + // descriptor_id describes the given ID of the descriptor, which matches with our OpenAPI definition. + optional string descriptor_id = 1; + + // attributes is the kwargs of the given descriptor. + google.protobuf.Struct attributes = 2; + } + // InferenceAPI is bentoml._internal.service.inferece_api.InferenceAPI + // that is exposed to gRPC client. + // There is no way for reflection to get information of given @svc.api. + message InferenceAPI { + // name is the name of the API. + string name = 1; + // input is the input descriptor of the API. + optional DescriptorMetadata input = 2; + // output is the output descriptor of the API. + optional DescriptorMetadata output = 3; + // docs is the optional documentation of the API. + optional string docs = 4; + } + // name is the service name. + string name = 1; + // apis holds a list of InferenceAPI of the service. + repeated InferenceAPI apis = 2; + // docs is the documentation of the service. + string docs = 3; +} + +// Request message for incoming Call. +message Request { + // api_name defines the API entrypoint to call. + // api_name is the name of the function defined in bentoml.Service. + // Example: + // + // @svc.api(input=NumpyNdarray(), output=File()) + // def predict(input: NDArray[float]) -> bytes: + // ... + // + // api_name is "predict" in this case. + string api_name = 1; + + oneof content { + // NDArray represents a n-dimensional array of arbitrary type. + NDArray ndarray = 3; + + // DataFrame represents any tabular data type. We are using + // DataFrame as a trivial representation for tabular type. + DataFrame dataframe = 5; + + // Series portrays a series of values. This can be used for + // representing Series types in tabular data. + Series series = 6; + + // File represents for any arbitrary file type. This can be + // plaintext, image, video, audio, etc. + File file = 7; + + // Text represents a string inputs. + google.protobuf.StringValue text = 8; + + // JSON is represented by using google.protobuf.Value. + // see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + google.protobuf.Value json = 9; + + // Multipart represents a multipart message. + // It comprises of a mapping from given type name to a subset of aforementioned types. + Multipart multipart = 10; + + // serialized_bytes is for data serialized in BentoML's internal serialization format. + bytes serialized_bytes = 2; + } + + // Tensor is similiar to ndarray but with a name + // We are reserving it for now for future use. + // repeated Tensor tensors = 4; + reserved 4, 11 to 13; +} + +// Request message for incoming Call. +message Response { + oneof content { + // NDArray represents a n-dimensional array of arbitrary type. + NDArray ndarray = 1; + + // DataFrame represents any tabular data type. We are using + // DataFrame as a trivial representation for tabular type. + DataFrame dataframe = 3; + + // Series portrays a series of values. This can be used for + // representing Series types in tabular data. + Series series = 5; + + // File represents for any arbitrary file type. This can be + // plaintext, image, video, audio, etc. + File file = 6; + + // Text represents a string inputs. + google.protobuf.StringValue text = 7; + + // JSON is represented by using google.protobuf.Value. + // see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + google.protobuf.Value json = 8; + + // Multipart represents a multipart message. + // It comprises of a mapping from given type name to a subset of aforementioned types. + Multipart multipart = 9; + + // serialized_bytes is for data serialized in BentoML's internal serialization format. + bytes serialized_bytes = 2; + } + // Tensor is similiar to ndarray but with a name + // We are reserving it for now for future use. + // repeated Tensor tensors = 4; + reserved 4, 10 to 13; +} + +// Part represents possible value types for multipart message. +// These are the same as the types in Request message. +message Part { + oneof representation { + // NDArray represents a n-dimensional array of arbitrary type. + NDArray ndarray = 1; + + // DataFrame represents any tabular data type. We are using + // DataFrame as a trivial representation for tabular type. + DataFrame dataframe = 3; + + // Series portrays a series of values. This can be used for + // representing Series types in tabular data. + Series series = 5; + + // File represents for any arbitrary file type. This can be + // plaintext, image, video, audio, etc. + File file = 6; + + // Text represents a string inputs. + google.protobuf.StringValue text = 7; + + // JSON is represented by using google.protobuf.Value. + // see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + google.protobuf.Value json = 8; + + // serialized_bytes is for data serialized in BentoML's internal serialization format. + bytes serialized_bytes = 4; + } + + // Tensor is similiar to ndarray but with a name + // We are reserving it for now for future use. + // Tensor tensors = 4; + reserved 2, 9 to 13; +} + +// Multipart represents a multipart message. +// It comprises of a mapping from given type name to a subset of aforementioned types. +message Multipart { + map fields = 1; +} + +// File represents for any arbitrary file type. This can be +// plaintext, image, video, audio, etc. +message File { + // optional file type, let it be csv, text, parquet, etc. + // v1alpha1 uses 1 as FileType enum. + optional string kind = 3; + // contents of file as bytes. + bytes content = 2; +} + +// DataFrame represents any tabular data type. We are using +// DataFrame as a trivial representation for tabular type. +// This message carries given implementation of tabular data based on given orientation. +// TODO: support index, records, etc. +message DataFrame { + // columns name + repeated string column_names = 1; + + // columns orient. + // { column ↠ { index ↠ value } } + repeated Series columns = 2; +} + +// Series portrays a series of values. This can be used for +// representing Series types in tabular data. +message Series { + // A bool parameter value + repeated bool bool_values = 1 [packed = true]; + + // A float parameter value + repeated float float_values = 2 [packed = true]; + + // A int32 parameter value + repeated int32 int32_values = 3 [packed = true]; + + // A int64 parameter value + repeated int64 int64_values = 6 [packed = true]; + + // A string parameter value + repeated string string_values = 5; + + // represents a double parameter value. + repeated double double_values = 4 [packed = true]; +} + +// NDArray represents a n-dimensional array of arbitrary type. +message NDArray { + // Represents data type of a given array. + enum DType { + // Represents a None type. + DTYPE_UNSPECIFIED = 0; + + // Represents an float type. + DTYPE_FLOAT = 1; + + // Represents an double type. + DTYPE_DOUBLE = 2; + + // Represents a bool type. + DTYPE_BOOL = 3; + + // Represents an int32 type. + DTYPE_INT32 = 4; + + // Represents an int64 type. + DTYPE_INT64 = 5; + + // Represents a uint32 type. + DTYPE_UINT32 = 6; + + // Represents a uint64 type. + DTYPE_UINT64 = 7; + + // Represents a string type. + DTYPE_STRING = 8; + } + + // DTYPE is the data type of given array + DType dtype = 1; + + // shape is the shape of given array. + repeated int32 shape = 2; + + // represents a string parameter value. + repeated string string_values = 5; + + // represents a float parameter value. + repeated float float_values = 3 [packed = true]; + + // represents a double parameter value. + repeated double double_values = 4 [packed = true]; + + // represents a bool parameter value. + repeated bool bool_values = 6 [packed = true]; + + // represents a int32 parameter value. + repeated int32 int32_values = 7 [packed = true]; + + // represents a int64 parameter value. + repeated int64 int64_values = 8 [packed = true]; + + // represents a uint32 parameter value. + repeated uint32 uint32_values = 9 [packed = true]; + + // represents a uint64 parameter value. + repeated uint64 uint64_values = 10 [packed = true]; +} diff --git a/openllm-client/pyproject.toml b/openllm-client/pyproject.toml index 573f73cf..988573ec 100644 --- a/openllm-client/pyproject.toml +++ b/openllm-client/pyproject.toml @@ -57,7 +57,7 @@ keywords = [ "PyTorch", "Transformers", ] -dependencies = ["openllm-core", "httpx"] +dependencies = ["orjson", "httpx", "attrs>=23.1.0", "cattrs>=23.1.0"] license = "Apache-2.0" name = "openllm-client" requires-python = ">=3.8" @@ -72,7 +72,7 @@ Tracker = "https://github.com/bentoml/OpenLLM/issues" Twitter = "https://twitter.com/bentomlai" [project.optional-dependencies] full = ["openllm-client[grpc,agents]"] -grpc = ["bentoml[grpc]>=1.0.25"] +grpc = ["bentoml[grpc]>=1.1.6"] agents = ["transformers[agents]>=4.30", "diffusers", "soundfile"] [tool.hatch.version] @@ -97,7 +97,13 @@ allow-direct-references = true only-include = ["src/openllm_client"] sources = ["src"] [tool.hatch.build.targets.sdist] -exclude = ["/.git_archival.txt", "tests", "/.python-version-default"] +exclude = [ + "/.git_archival.txt", + "tests", + "/.python-version-default", + "/generate-grpc-stubs", + "/dev.Dockerfile", +] [tool.hatch.build.targets.wheel.hooks.mypyc] dependencies = [ "hatch-mypyc==0.16.0", diff --git a/openllm-client/src/openllm_client/__init__.py b/openllm-client/src/openllm_client/__init__.py index 1bb51e3d..1147e6e7 100644 --- a/openllm-client/src/openllm_client/__init__.py +++ b/openllm-client/src/openllm_client/__init__.py @@ -1,9 +1,7 @@ from __future__ import annotations -from . import benmin as benmin -from ._base import BaseAsyncClient as BaseAsyncClient -from ._base import BaseClient as BaseClient -from .client import AsyncGrpcClient as AsyncGrpcClient -from .client import AsyncHTTPClient as AsyncHTTPClient -from .client import GrpcClient as GrpcClient -from .client import HTTPClient as HTTPClient +from ._http import AsyncHTTPClient as AsyncHTTPClient +from ._http import HTTPClient as HTTPClient + +# from ._grpc import GrpcClient as GrpcClient +# from ._grpc import AsyncGrpcClient as AsyncGrpcClient diff --git a/openllm-client/src/openllm_client/_base.py b/openllm-client/src/openllm_client/_base.py deleted file mode 100644 index 268c0176..00000000 --- a/openllm-client/src/openllm_client/_base.py +++ /dev/null @@ -1,298 +0,0 @@ -# mypy: disable-error-code="override,no-redef" -from __future__ import annotations -import abc -import functools -import logging -import typing as t - -from http import HTTPStatus -from urllib.parse import urljoin - -import attr -import httpx -import orjson - -import openllm_core - -from openllm_core._typing_compat import LiteralString -from openllm_core._typing_compat import overload -from openllm_core.utils import bentoml_cattr -from openllm_core.utils import ensure_exec_coro -from openllm_core.utils import is_transformers_available - -from .benmin import AsyncClient as AsyncBentoClient -from .benmin import Client as BentoClient - -if t.TYPE_CHECKING: - import transformers - - from openllm_core._typing_compat import DictStrAny - from openllm_core._typing_compat import LiteralBackend - -logger = logging.getLogger(__name__) - -@attr.define(slots=False, init=False) -class _ClientAttr: - _address: str - _timeout: float = attr.field(default=30) - _api_version: str = attr.field(default='v1') - - def __init__(self, address: str, timeout: float = 30, api_version: str = 'v1'): - self.__attrs_init__(address, timeout, api_version) - - @abc.abstractmethod - def call(self, api_name: str, *args: t.Any, **attrs: t.Any) -> t.Any: - raise NotImplementedError - - @abc.abstractmethod - def _run_hf_agent(self, *args: t.Any, **kwargs: t.Any) -> t.Any: - raise NotImplementedError - - @overload - @abc.abstractmethod - def query(self, prompt: str, *, return_response: t.Literal['processed'], **attrs: t.Any) -> str: - ... - - @overload - @abc.abstractmethod - def query(self, prompt: str, *, return_response: t.Literal['raw'], **attrs: t.Any) -> DictStrAny: - ... - - @overload - @abc.abstractmethod - def query(self, prompt: str, *, return_response: t.Literal['attrs'], **attrs: t.Any) -> openllm_core.GenerationOutput: - ... - - @abc.abstractmethod - def query(self, prompt: str, return_response: t.Literal['attrs', 'raw', 'processed'] = 'processed', **attrs: t.Any) -> t.Any: - raise NotImplementedError - - # NOTE: Scikit interface - @overload - @abc.abstractmethod - def predict(self, prompt: str, *, return_response: t.Literal['processed'], **attrs: t.Any) -> str: - ... - - @overload - @abc.abstractmethod - def predict(self, prompt: str, *, return_response: t.Literal['raw'], **attrs: t.Any) -> DictStrAny: - ... - - @overload - @abc.abstractmethod - def predict(self, prompt: str, *, return_response: t.Literal['attrs'], **attrs: t.Any) -> openllm_core.GenerationOutput: - ... - - @abc.abstractmethod - def predict(self, prompt: str, **attrs: t.Any) -> t.Any: - raise NotImplementedError - - @functools.cached_property - def _hf_agent(self) -> transformers.HfAgent: - if not is_transformers_available(): - raise RuntimeError("transformers is required to use HF agent. Install with 'pip install \"openllm-client[agents]\"'.") - if not self.supports_hf_agent: - raise RuntimeError(f'{self.model_name} ({self.backend}) does not support running HF agent.') - import transformers - return transformers.HfAgent(urljoin(self._address, '/hf/agent')) - - @property - def _metadata(self) -> t.Any: - return self.call('metadata') - - @property - def model_name(self) -> str: - try: - return self._metadata['model_name'] - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def model_id(self) -> str: - try: - return self._metadata['model_id'] - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def backend(self) -> LiteralBackend: - try: - return self._metadata['backend'] - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def timeout(self) -> int: - try: - return self._metadata['timeout'] - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def configuration(self) -> dict[str, t.Any]: - try: - return orjson.loads(self._metadata['configuration']) - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def supports_embeddings(self) -> bool: - try: - return self._metadata.get('supports_embeddings', False) - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def supports_hf_agent(self) -> bool: - try: - return self._metadata.get('supports_hf_agent', False) - except KeyError: - raise RuntimeError('Malformed service endpoint. (Possible malicious)') from None - - @property - def config(self) -> openllm_core.LLMConfig: - return openllm_core.AutoConfig.for_model(self.model_name).model_construct_env(**self.configuration) - - @functools.cached_property - def inner(self) -> t.Any: - raise NotImplementedError("'inner' client is not implemented.") - -class _Client(_ClientAttr): - _host: str - _port: str - - def call(self, api_name: str, *args: t.Any, **attrs: t.Any) -> t.Any: - return self.inner.call(f'{api_name}_{self._api_version}', *args, **attrs) - - def health(self) -> t.Any: - return self.inner.health() - - @functools.cached_property - def inner(self) -> BentoClient: - BentoClient.wait_until_server_ready(self._address, timeout=self._timeout) - return BentoClient.from_url(self._address) - - # Agent integration - def ask_agent(self, task: str, *, return_code: bool = False, remote: bool = False, agent_type: LiteralString = 'hf', **attrs: t.Any) -> t.Any: - if agent_type == 'hf': return self._run_hf_agent(task, return_code=return_code, remote=remote, **attrs) - else: raise RuntimeError(f"Unknown 'agent_type={agent_type}'") - - def _run_hf_agent(self, *args: t.Any, **kwargs: t.Any) -> t.Any: - if len(args) > 1: raise ValueError("'args' should only take one positional argument.") - task = kwargs.pop('task', args[0]) - return_code = kwargs.pop('return_code', False) - remote = kwargs.pop('remote', False) - try: - return self._hf_agent.run(task, return_code=return_code, remote=remote, **kwargs) - except Exception as err: - logger.error('Exception caught while sending instruction to HF agent: %s', err, exc_info=err) - logger.info("Tip: LLMServer at '%s' might not support 'generate_one'.", self._address) - -class _AsyncClient(_ClientAttr): - _host: str - _port: str - - def __init__(self, address: str, timeout: float = 30): - self._address, self._timeout = address, timeout - - async def call(self, api_name: str, *args: t.Any, **attrs: t.Any) -> t.Any: - return await self.inner.call(f'{api_name}_{self._api_version}', *args, **attrs) - - async def health(self) -> t.Any: - return await self.inner.health() - - @functools.cached_property - def inner(self) -> AsyncBentoClient: - ensure_exec_coro(AsyncBentoClient.wait_until_server_ready(self._address, timeout=self._timeout)) - return ensure_exec_coro(AsyncBentoClient.from_url(self._address)) - - # Agent integration - async def ask_agent(self, task: str, *, return_code: bool = False, remote: bool = False, agent_type: LiteralString = 'hf', **attrs: t.Any) -> t.Any: - if agent_type == 'hf': return await self._run_hf_agent(task, return_code=return_code, remote=remote, **attrs) - else: raise RuntimeError(f"Unknown 'agent_type={agent_type}'") - - async def _run_hf_agent(self, *args: t.Any, **kwargs: t.Any) -> t.Any: - if len(args) > 1: raise ValueError("'args' should only take one positional argument.") - from transformers.tools.agents import clean_code_for_run - from transformers.tools.agents import get_tool_creation_code - from transformers.tools.agents import resolve_tools - from transformers.tools.python_interpreter import evaluate - - task = kwargs.pop('task', args[0]) - return_code = kwargs.pop('return_code', False) - remote = kwargs.pop('remote', False) - stop = ['Task:'] - prompt = t.cast(str, self._hf_agent.format_prompt(task)) - async with httpx.AsyncClient(timeout=httpx.Timeout(self.timeout)) as client: - response = await client.post(self._hf_agent.url_endpoint, json={'inputs': prompt, 'parameters': {'max_new_tokens': 200, 'return_full_text': False, 'stop': stop}}) - if response.status_code != HTTPStatus.OK: raise ValueError(f'Error {response.status_code}: {response.json()}') - - result = response.json()[0]['generated_text'] - # Inference API returns the stop sequence - for stop_seq in stop: - if result.endswith(stop_seq): - result = result[:-len(stop_seq)] - break - # the below have the same logic as agent.run API - explanation, code = clean_code_for_run(result) - self._hf_agent.log(f'==Explanation from the agent==\n{explanation}') - self._hf_agent.log(f'\n\n==Code generated by the agent==\n{code}') - if not return_code: - self._hf_agent.log('\n\n==Result==') - self._hf_agent.cached_tools = resolve_tools(code, self._hf_agent.toolbox, remote=remote, cached_tools=self._hf_agent.cached_tools) - return evaluate(code, self._hf_agent.cached_tools, state=kwargs.copy()) - else: - tool_code = get_tool_creation_code(code, self._hf_agent.toolbox, remote=remote) - return f'{tool_code}\n{code}' - -class BaseClient(_Client): - def chat(self, prompt: str, history: list[str], **attrs: t.Any) -> str: - raise NotImplementedError - - def embed(self, prompt: t.Sequence[str] | str) -> openllm_core.EmbeddingsOutput: - return openllm_core.EmbeddingsOutput(**self.call('embeddings', list([prompt] if isinstance(prompt, str) else prompt))) - - def predict(self, prompt: str, **attrs: t.Any) -> openllm_core.GenerationOutput | DictStrAny | str: - return self.query(prompt, **attrs) - - def query(self, prompt: str, return_response: t.Literal['attrs', 'raw', 'processed'] = 'processed', **attrs: t.Any) -> t.Any: - return_raw_response = attrs.pop('return_raw_response', None) - if return_raw_response is not None: - logger.warning("'return_raw_response' is now deprecated. Please use 'return_response=\"raw\"' instead.") - if return_raw_response is True: return_response = 'raw' - return_attrs = attrs.pop('return_attrs', None) - if return_attrs is not None: - logger.warning("'return_attrs' is now deprecated. Please use 'return_response=\"attrs\"' instead.") - if return_attrs is True: return_response = 'attrs' - use_default_prompt_template = attrs.pop('use_default_prompt_template', False) - prompt, generate_kwargs, postprocess_kwargs = self.config.sanitize_parameters(prompt, use_default_prompt_template=use_default_prompt_template, **attrs) - r = openllm_core.GenerationOutput(**self.call('generate', openllm_core.GenerationInput(prompt=prompt, llm_config=self.config.model_construct_env(**generate_kwargs)).model_dump())) - if return_response == 'attrs': return r - elif return_response == 'raw': return bentoml_cattr.unstructure(r) - else: return self.config.postprocess_generate(prompt, r.responses, **postprocess_kwargs) - -class BaseAsyncClient(_AsyncClient): - async def chat(self, prompt: str, history: list[str], **attrs: t.Any) -> str: - raise NotImplementedError - - async def embed(self, prompt: t.Sequence[str] | str) -> openllm_core.EmbeddingsOutput: - return openllm_core.EmbeddingsOutput(**(await self.call('embeddings', list([prompt] if isinstance(prompt, str) else prompt)))) - - async def predict(self, prompt: str, **attrs: t.Any) -> t.Any: - return await self.query(prompt, **attrs) - - async def query(self, prompt: str, return_response: t.Literal['attrs', 'raw', 'processed'] = 'processed', **attrs: t.Any) -> t.Any: - return_raw_response = attrs.pop('return_raw_response', None) - if return_raw_response is not None: - logger.warning("'return_raw_response' is now deprecated. Please use 'return_response=\"raw\"' instead.") - if return_raw_response is True: return_response = 'raw' - return_attrs = attrs.pop('return_attrs', None) - if return_attrs is not None: - logger.warning("'return_attrs' is now deprecated. Please use 'return_response=\"attrs\"' instead.") - if return_attrs is True: return_response = 'attrs' - use_default_prompt_template = attrs.pop('use_default_prompt_template', False) - prompt, generate_kwargs, postprocess_kwargs = self.config.sanitize_parameters(prompt, use_default_prompt_template=use_default_prompt_template, **attrs) - r = openllm_core.GenerationOutput(**(await self.call('generate', openllm_core.GenerationInput(prompt=prompt, llm_config=self.config.model_construct_env(**generate_kwargs)).model_dump()))) - if return_response == 'attrs': return r - elif return_response == 'raw': return bentoml_cattr.unstructure(r) - else: return self.config.postprocess_generate(prompt, r.responses, **postprocess_kwargs) diff --git a/openllm-client/src/openllm_client/_http.py b/openllm-client/src/openllm_client/_http.py new file mode 100644 index 00000000..4e6f425a --- /dev/null +++ b/openllm-client/src/openllm_client/_http.py @@ -0,0 +1,137 @@ +from __future__ import annotations +import typing as t + +from urllib.parse import urlparse + +import attr +import httpx +import orjson + +from ._schemas import Request +from ._schemas import Response +from ._schemas import StreamResponse + +def _address_validator(_: t.Any, attr: attr.Attribute[t.Any], value: str) -> None: + if not isinstance(value, str): raise TypeError(f'{attr.name} must be a string') + if not urlparse(value).netloc: raise ValueError(f'{attr.name} must be a valid URL') + +@attr.define +class HTTPClient: + address: str = attr.field(validator=_address_validator, converter=lambda addr: addr if '://' in addr else 'http://' + addr) + api_version: str = 'v1' + timeout: int = 30 + client_args: t.Dict[str, t.Any] = attr.field(factory=dict) + __metadata: dict[str, t.Any] = attr.field(default=None) + __config: dict[str, t.Any] = attr.field(default=None) + _inner: httpx.Client = attr.field(init=False, repr=False) + + def __attrs_post_init__(self) -> None: + self._inner = httpx.Client(base_url=self.address, timeout=self.timeout, **self.client_args) + + def _metadata(self) -> dict[str, t.Any]: + if self.__metadata is None: self.__metadata = self._inner.post(self._build_endpoint('metadata')).json() + return self.__metadata + + def _config(self) -> dict[str, t.Any]: + if self.__config is None: + config = orjson.loads(self._metadata()['configuration']) + generation_config = config.pop('generation_config') + self.__config = {**config, **generation_config} + return self.__config + + def health(self): + return self._inner.get('/readyz') + + def _build_endpoint(self, endpoint: str): + return '/' + f'{self.api_version}/{endpoint}' + + def query(self, prompt: str, **attrs: t.Any) -> Response: + req = Request(prompt=self._metadata()['prompt_template'].format(system_message=self._metadata()['system_message'], instruction=prompt), llm_config={**self._config(), **attrs}) + r = self._inner.post(self._build_endpoint('generate'), json=req.json(), **self.client_args) + payload = r.json() + if r.status_code != 200: raise ValueError("Failed to get generation from '/v1/generate'. Check server logs for more details.") + return Response(**payload) + + def generate(self, prompt: str, **attrs: t.Any) -> Response: + return self.query(prompt, **attrs) + + def generate_stream(self, prompt: str, **attrs: t.Any) -> t.Iterator[StreamResponse]: + req = Request(prompt=self._metadata()['prompt_template'].format(system_message=self._metadata()['system_message'], instruction=prompt), llm_config={**self._config(), **attrs}) + with self._inner.stream('POST', self._build_endpoint('generate_stream'), json=req.json(), **self.client_args) as r: + for payload in r.iter_bytes(): + # Skip line + payload = payload.decode('utf-8') + yield StreamResponse(text=payload) + # TODO: make it SSE correct for streaming + # if payload == b"\n": continue + # payload = payload.decode("utf-8") + # if payload.startswith("data:"): + # json_payload = orjson.loads(payload.lstrip('data:').rstrip("\n")) + # print(json_payload) + # try: resp = StreamResponse(text=json_payload) + # except Exception as e: print(e) + # yield resp + + def __del__(self) -> None: + self._inner.close() + +@attr.define +class AsyncHTTPClient: + address: str = attr.field(validator=_address_validator, converter=lambda addr: addr if '://' in addr else 'http://' + addr) + api_version: str = 'v1' + timeout: int = 30 + client_args: t.Dict[str, t.Any] = attr.field(factory=dict) + __metadata: dict[str, t.Any] = attr.field(default=None) + __config: dict[str, t.Any] = attr.field(default=None) + _inner: httpx.AsyncClient = attr.field(init=False, repr=False) + + def __attrs_post_init__(self) -> None: + self._inner = httpx.AsyncClient(base_url=self.address, timeout=self.timeout, **self.client_args) + + async def _metadata(self) -> dict[str, t.Any]: + if self.__metadata is None: self.__metadata = (await self._inner.post(self._build_endpoint('metadata'))).json() + return self.__metadata + + async def _config(self) -> dict[str, t.Any]: + if self.__config is None: + config = orjson.loads((await self._metadata())['configuration']) + generation_config = config.pop('generation_config') + self.__config = {**config, **generation_config} + return self.__config + + async def health(self): + return await self._inner.get('/readyz') + + def _build_endpoint(self, endpoint: str): + return '/' + f'{self.api_version}/{endpoint}' + + async def query(self, prompt: str, **attrs: t.Any) -> Response: + _meta, _config = await self._metadata(), await self._config() + client = httpx.AsyncClient(base_url=self.address, timeout=self.timeout, **self.client_args) + req = Request(prompt=_meta['prompt_template'].format(system_message=_meta['system_message'], instruction=prompt), llm_config={**_config, **attrs}) + r = await client.post(self._build_endpoint('generate'), json=req.json(), **self.client_args) + payload = r.json() + if r.status_code != 200: raise ValueError("Failed to get generation from '/v1/generate'. Check server logs for more details.") + return Response(**payload) + + async def generate(self, prompt: str, **attrs: t.Any) -> Response: + return await self.query(prompt, **attrs) + + async def generate_stream(self, prompt: str, **attrs: t.Any) -> t.AsyncGenerator[StreamResponse, t.Any]: + _meta, _config = await self._metadata(), await self._config() + client = httpx.AsyncClient(base_url=self.address, timeout=self.timeout, **self.client_args) + req = Request(prompt=_meta['prompt_template'].format(system_message=_meta['system_message'], instruction=prompt), llm_config={**_config, **attrs}) + async with client.stream('POST', self._build_endpoint('generate_stream'), json=req.json(), **self.client_args) as r: + async for payload in r.aiter_bytes(): + # Skip line + payload = payload.decode('utf-8') + yield StreamResponse(text=payload) + # TODO: make it SSE correct for streaming + # if payload == b"\n": continue + # payload = payload.decode("utf-8") + # if payload.startswith("data:"): + # json_payload = orjson.loads(payload.lstrip('data:').rstrip("\n")) + # print(json_payload) + # try: resp = StreamResponse(text=json_payload) + # except Exception as e: print(e) + # yield resp diff --git a/openllm-client/src/openllm_client/_schemas.py b/openllm-client/src/openllm_client/_schemas.py new file mode 100644 index 00000000..32594956 --- /dev/null +++ b/openllm-client/src/openllm_client/_schemas.py @@ -0,0 +1,24 @@ +from __future__ import annotations +import typing as t + +import attr +import cattr + +class _Mixin: + def json(self) -> dict[str, t.Any]: + if not attr.has(self.__class__): raise TypeError(f'Class {self.__class__} must be attr class') + return cattr.unstructure(self) + +@attr.define +class Request(_Mixin): + prompt: str + llm_config: t.Dict[str, t.Any] + +@attr.define +class Response(_Mixin): + responses: t.List[str] + configuration: t.Dict[str, t.Any] + +@attr.define +class StreamResponse(_Mixin): + text: str diff --git a/openllm-client/src/openllm_client/benmin/__init__.py b/openllm-client/src/openllm_client/benmin/__init__.py deleted file mode 100644 index a731696f..00000000 --- a/openllm-client/src/openllm_client/benmin/__init__.py +++ /dev/null @@ -1,124 +0,0 @@ -"""This holds a simple client implementation, somewhat similar to `bentoml.client`. - -This module is subjected to change and to be merged upstream to BentoML. - -```python -import openllm_client - -client = openllm_client.benmin.Client.from_url("http://localhost:3000") -``` - -The client implementation won't include a dynamic assignment of the service endpoints, rather this should be called -via `client.call` or `await client.call`. -""" -from __future__ import annotations -import typing as t - -from abc import abstractmethod - -import attr -import httpx - -import bentoml - -if t.TYPE_CHECKING: - from bentoml._internal.service.inference_api import InferenceAPI - -__all__ = ['Client', 'AsyncClient'] - -@attr.define(init=False) -class Client: - server_url: str - endpoints: t.List[str] - svc: bentoml.Service - timeout: int = attr.field(default=30) - - def __init__(self, server_url: str, svc: bentoml.Service, **kwargs: t.Any) -> None: - if len(svc.apis) == 0: raise bentoml.exceptions.BentoMLException('No APIs was found while constructing clients.') - self.__attrs_init__(server_url=server_url, endpoints=list(svc.apis), svc=svc) - for it, val in kwargs.items(): - object.__setattr__(self, it, val) - - def call(self, bentoml_api_name: str, data: t.Any = None, **kwargs: t.Any) -> t.Any: - return self._call(data, _inference_api=self.svc.apis[bentoml_api_name], **kwargs) - - @abstractmethod - def _call(self, data: t.Any, /, *, _inference_api: InferenceAPI[t.Any], **kwargs: t.Any) -> t.Any: - raise NotImplementedError - - @abstractmethod - def health(self) -> t.Any: - raise NotImplementedError - - @classmethod - def from_url(cls, url: str, **kwargs: t.Any) -> Client: - try: - from ._http import HttpClient - return HttpClient.from_url(url, **kwargs) - except httpx.RemoteProtocolError: - from ._grpc import GrpcClient - return GrpcClient.from_url(url, **kwargs) - except Exception as err: - raise bentoml.exceptions.BentoMLException('Failed to create client from url: %s' % url) from err - - @staticmethod - def wait_until_server_ready(server: str, port: int | None = None, timeout: float = 30, **kwargs: t.Any) -> None: - try: - from ._http import HttpClient - return HttpClient.wait_until_server_ready(server, port, timeout, **kwargs) - except httpx.RemoteProtocolError: - if port is None: - raise - from ._grpc import GrpcClient - return GrpcClient.wait_until_server_ready(server, port, timeout, **kwargs) - except Exception as err: - if port is not None: - raise bentoml.exceptions.BentoMLException('Failed to wait until server ready: %s:%d' % (server, port)) from err - else: - raise bentoml.exceptions.BentoMLException('Failed to wait until server ready: %s' % (server)) from err - -@attr.define(init=False) -class AsyncClient: - server_url: str - endpoints: t.List[str] - svc: bentoml.Service - timeout: int = attr.field(default=30) - - def __init__(self, server_url: str, svc: bentoml.Service, **kwargs: t.Any) -> None: - if len(svc.apis) == 0: raise bentoml.exceptions.BentoMLException('No APIs was found while constructing clients.') - self.__attrs_init__(server_url=server_url, endpoints=list(svc.apis), svc=svc) - for it, val in kwargs.items(): - object.__setattr__(self, it, val) - - async def call(self, bentoml_api_name: str, data: t.Any = None, **kwargs: t.Any) -> t.Any: - return await self._call(data, _inference_api=self.svc.apis[bentoml_api_name], **kwargs) - - @abstractmethod - async def _call(self, data: t.Any, /, *, _inference_api: InferenceAPI[t.Any], **kwargs: t.Any) -> t.Any: - raise NotImplementedError - - @abstractmethod - async def health(self) -> t.Any: - raise NotImplementedError - - @classmethod - async def from_url(cls, url: str, **kwargs: t.Any) -> AsyncClient: - try: - from ._http import AsyncHttpClient - return await AsyncHttpClient.from_url(url, **kwargs) - except httpx.RemoteProtocolError: - from ._grpc import AsyncGrpcClient - return await AsyncGrpcClient.from_url(url, **kwargs) - except Exception as err: - raise bentoml.exceptions.BentoMLException('Failed to create client from url: %s' % url) from err - - @staticmethod - async def wait_until_server_ready(host: str, port: int, timeout: float = 30, **kwargs: t.Any) -> None: - try: - from ._http import AsyncHttpClient - await AsyncHttpClient.wait_until_server_ready(host, port, timeout, **kwargs) - except httpx.RemoteProtocolError: - from ._grpc import AsyncGrpcClient - await AsyncGrpcClient.wait_until_server_ready(host, port, timeout, **kwargs) - except Exception as err: - raise bentoml.exceptions.BentoMLException('Failed to wait until server ready: %s:%d' % (host, port)) from err diff --git a/openllm-client/src/openllm_client/benmin/_grpc.py b/openllm-client/src/openllm_client/benmin/_grpc.py deleted file mode 100644 index 9d381d4a..00000000 --- a/openllm-client/src/openllm_client/benmin/_grpc.py +++ /dev/null @@ -1,291 +0,0 @@ -# mypy: disable-error-code="no-redef" -from __future__ import annotations -import functools -import logging -import time -import typing as t - -import bentoml - -from bentoml._internal.service.inference_api import InferenceAPI -from bentoml.grpc.utils import import_generated_stubs -from bentoml.grpc.utils import load_from_file -from openllm_client.benmin import AsyncClient -from openllm_client.benmin import Client -from openllm_core._typing_compat import NotRequired -from openllm_core._typing_compat import overload -from openllm_core.utils import ensure_exec_coro -from openllm_core.utils import is_grpc_available -from openllm_core.utils import is_grpc_health_available - -if not is_grpc_available() or not is_grpc_health_available(): - raise ImportError("gRPC is required to use gRPC client. Install with 'pip install \"openllm-client[grpc]\"'.") -import grpc -import grpc_health.v1.health_pb2 as pb_health -import grpc_health.v1.health_pb2_grpc as services_health - -from google.protobuf import json_format -from grpc import aio - -pb, services = import_generated_stubs('v1') - -if t.TYPE_CHECKING: - from bentoml.grpc.v1.service_pb2 import ServiceMetadataResponse - -logger = logging.getLogger(__name__) - -class ClientCredentials(t.TypedDict): - root_certificates: NotRequired[t.Union[bytes, str]] - private_key: NotRequired[t.Union[bytes, str]] - certificate_chain: NotRequired[t.Union[bytes, str]] - -@overload -def dispatch_channel(server_url: str, - typ: t.Literal['async'], - ssl: bool = ..., - ssl_client_credentials: ClientCredentials | None = ..., - options: t.Any | None = ..., - compression: grpc.Compression | None = ..., - interceptors: t.Sequence[aio.ClientInterceptor] | None = ...) -> aio.Channel: - ... - -@overload -def dispatch_channel(server_url: str, - typ: t.Literal['sync'], - ssl: bool = ..., - ssl_client_credentials: ClientCredentials | None = ..., - options: t.Any | None = ..., - compression: grpc.Compression | None = ..., - interceptors: t.Sequence[aio.ClientInterceptor] | None = None) -> grpc.Channel: - ... - -def dispatch_channel(server_url: str, - typ: t.Literal['async', 'sync'] = 'sync', - ssl: bool = False, - ssl_client_credentials: ClientCredentials | None = None, - options: t.Any | None = None, - compression: grpc.Compression | None = None, - interceptors: t.Sequence[aio.ClientInterceptor] | None = None) -> aio.Channel | grpc.Channel: - credentials = None - if ssl: - if ssl_client_credentials is None: raise RuntimeError("'ssl=True' requires 'ssl_client_credentials'") - credentials = grpc.ssl_channel_credentials(**{k: load_from_file(v) if isinstance(v, str) else v for k, v in ssl_client_credentials.items()}) - - if typ == 'async' and ssl: - return aio.secure_channel(server_url, credentials=credentials, options=options, compression=compression, interceptors=interceptors) - elif typ == 'async': - return aio.insecure_channel(server_url, options=options, compression=compression, interceptors=interceptors) - elif typ == 'sync' and ssl: - return grpc.secure_channel(server_url, credentials=credentials, options=options, compression=compression) - elif typ == 'sync': - return grpc.insecure_channel(server_url, options=options, compression=compression) - else: - raise ValueError(f'Unknown type: {typ}') - -class GrpcClient(Client): - ssl: bool - ssl_client_credentials: t.Optional[ClientCredentials] - options: t.Any - compression: t.Optional[grpc.Compression] - - def __init__(self, - server_url: str, - svc: bentoml.Service, # gRPC specific options - ssl: bool = False, - options: t.Any | None = None, - compression: grpc.Compression | None = None, - ssl_client_credentials: ClientCredentials | None = None, - **kwargs: t.Any) -> None: - self.ssl, self.ssl_client_credentials, self.options, self.compression = ssl, ssl_client_credentials, options, compression - super().__init__(server_url, svc, **kwargs) - - @functools.cached_property - def inner(self) -> grpc.Channel: - if self.ssl: - if self.ssl_client_credentials is None: raise RuntimeError("'ssl=True' requires 'ssl_client_credentials'") - credentials = grpc.ssl_channel_credentials(**{k: load_from_file(v) if isinstance(v, str) else v for k, v in self.ssl_client_credentials.items()}) - return grpc.secure_channel(self.server_url, credentials=credentials, options=self.options, compression=self.compression) - return grpc.insecure_channel(self.server_url, options=self.options, compression=self.compression) - - @staticmethod - def wait_until_server_ready(host: str, port: int, timeout: float = 30, check_interval: int = 1, **kwargs: t.Any) -> None: - with dispatch_channel(f"{host.replace(r'localhost', '0.0.0.0')}:{port}", - typ='sync', - options=kwargs.get('options', None), - compression=kwargs.get('compression', None), - ssl=kwargs.get('ssl', False), - ssl_client_credentials=kwargs.get('ssl_client_credentials', None)) as channel: - req = pb_health.HealthCheckRequest() - req.service = 'bentoml.grpc.v1.BentoService' - health_stub = services_health.HealthStub(channel) - start_time = time.time() - while time.time() - start_time < timeout: - try: - resp = health_stub.Check(req) - if resp.status == pb_health.HealthCheckResponse.SERVING: break - else: time.sleep(check_interval) - except grpc.RpcError: - logger.debug('Waiting for server to be ready...') - time.sleep(check_interval) - try: - resp = health_stub.Check(req) - if resp.status != pb_health.HealthCheckResponse.SERVING: - raise TimeoutError(f"Timed out waiting {timeout} seconds for server at '{host}:{port}' to be ready.") - except grpc.RpcError as err: - logger.error('Caught RpcError while connecting to %s:%s:\n', host, port) - logger.error(err) - raise - - @classmethod - def from_url(cls, url: str, **kwargs: t.Any) -> GrpcClient: - with dispatch_channel(url.replace(r'localhost', '0.0.0.0'), - typ='sync', - options=kwargs.get('options', None), - compression=kwargs.get('compression', None), - ssl=kwargs.get('ssl', False), - ssl_client_credentials=kwargs.get('ssl_client_credentials', None)) as channel: - metadata = t.cast( - 'ServiceMetadataResponse', - channel.unary_unary('/bentoml.grpc.v1.BentoService/ServiceMetadata', - request_serializer=pb.ServiceMetadataRequest.SerializeToString, - response_deserializer=pb.ServiceMetadataResponse.FromString)(pb.ServiceMetadataRequest())) - reflection = bentoml.Service(metadata.name) - for api in metadata.apis: - try: - reflection.apis[api.name] = InferenceAPI[t.Any](None, - bentoml.io.from_spec({ - 'id': api.input.descriptor_id, - 'args': json_format.MessageToDict(api.input.attributes).get('args', None) - }), - bentoml.io.from_spec({ - 'id': api.output.descriptor_id, - 'args': json_format.MessageToDict(api.output.attributes).get('args', None) - }), - name=api.name, - doc=api.docs) - except Exception as e: - logger.error('Failed to instantiate client for API %s: ', api.name, e) - return cls(url, reflection, **kwargs) - - def health(self) -> t.Any: - return services_health.HealthStub(self.inner).Check(pb_health.HealthCheckRequest(service='')) - - def _call(self, data: t.Any, /, *, _inference_api: InferenceAPI[t.Any], **kwargs: t.Any) -> t.Any: - channel_kwargs = {k: kwargs.pop(f'_grpc_channel_{k}', None) for k in {'timeout', 'metadata', 'credentials', 'wait_for_ready', 'compression'}} - if _inference_api.multi_input: - if data is not None: - raise ValueError(f"'{_inference_api.name}' takes multiple inputs, and thus required to pass as keyword arguments.") - fake_resp = ensure_exec_coro(_inference_api.input.to_proto(kwargs)) - else: - fake_resp = ensure_exec_coro(_inference_api.input.to_proto(data)) - api_fn = {v: k for k, v in self.svc.apis.items()} - stubs = services.BentoServiceStub(self.inner) - proto = stubs.Call(pb.Request(**{'api_name': api_fn[_inference_api], _inference_api.input.proto_fields[0]: fake_resp}), **channel_kwargs) - return ensure_exec_coro(_inference_api.output.from_proto(getattr(proto, proto.WhichOneof('content')))) - -class AsyncGrpcClient(AsyncClient): - ssl: bool - ssl_client_credentials: t.Optional[ClientCredentials] - options: aio.ChannelArgumentType - interceptors: t.Optional[t.Sequence[aio.ClientInterceptor]] - compression: t.Optional[grpc.Compression] - - def __init__(self, - server_url: str, - svc: bentoml.Service, # gRPC specific options - ssl: bool = False, - options: aio.ChannelArgumentType | None = None, - interceptors: t.Sequence[aio.ClientInterceptor] | None = None, - compression: grpc.Compression | None = None, - ssl_client_credentials: ClientCredentials | None = None, - **kwargs: t.Any) -> None: - self.ssl, self.ssl_client_credentials, self.options, self.interceptors, self.compression = ssl, ssl_client_credentials, options, interceptors, compression - super().__init__(server_url, svc, **kwargs) - - @functools.cached_property - def inner(self) -> aio.Channel: - if self.ssl: - if self.ssl_client_credentials is None: raise RuntimeError("'ssl=True' requires 'ssl_client_credentials'") - credentials = grpc.ssl_channel_credentials(**{k: load_from_file(v) if isinstance(v, str) else v for k, v in self.ssl_client_credentials.items()}) - return aio.secure_channel(self.server_url, credentials=credentials, options=self.options, compression=self.compression, interceptors=self.interceptors) - return aio.insecure_channel(self.server_url, options=self.options, compression=self.compression, interceptors=self.interceptors) - - @staticmethod - async def wait_until_server_ready(host: str, port: int, timeout: float = 30, check_interval: int = 1, **kwargs: t.Any) -> None: - async with dispatch_channel(f"{host.replace(r'localhost', '0.0.0.0')}:{port}", - typ='async', - options=kwargs.get('options', None), - compression=kwargs.get('compression', None), - ssl=kwargs.get('ssl', False), - ssl_client_credentials=kwargs.get('ssl_client_credentials', None)) as channel: - req = pb_health.HealthCheckRequest() - req.service = 'bentoml.grpc.v1.BentoService' - health_stub = services_health.HealthStub(channel) - start_time = time.time() - while time.time() - start_time < timeout: - try: - resp = health_stub.Check(req) - if resp.status == pb_health.HealthCheckResponse.SERVING: break - else: time.sleep(check_interval) - except grpc.RpcError: - logger.debug('Waiting for server to be ready...') - time.sleep(check_interval) - try: - resp = health_stub.Check(req) - if resp.status != pb_health.HealthCheckResponse.SERVING: - raise TimeoutError(f"Timed out waiting {timeout} seconds for server at '{host}:{port}' to be ready.") - except grpc.RpcError as err: - logger.error('Caught RpcError while connecting to %s:%s:\n', host, port) - logger.error(err) - raise - - @classmethod - async def from_url(cls, url: str, **kwargs: t.Any) -> AsyncGrpcClient: - async with dispatch_channel(url.replace(r'localhost', '0.0.0.0'), - typ='async', - options=kwargs.get('options', None), - compression=kwargs.get('compression', None), - ssl=kwargs.get('ssl', False), - ssl_client_credentials=kwargs.get('ssl_client_credentials', None), - interceptors=kwargs.get('interceptors', None)) as channel: - metadata = t.cast( - 'ServiceMetadataResponse', - channel.unary_unary('/bentoml.grpc.v1.BentoService/ServiceMetadata', - request_serializer=pb.ServiceMetadataRequest.SerializeToString, - response_deserializer=pb.ServiceMetadataResponse.FromString)(pb.ServiceMetadataRequest())) - reflection = bentoml.Service(metadata.name) - for api in metadata.apis: - try: - reflection.apis[api.name] = InferenceAPI[t.Any](None, - bentoml.io.from_spec({ - 'id': api.input.descriptor_id, - 'args': json_format.MessageToDict(api.input.attributes).get('args', None) - }), - bentoml.io.from_spec({ - 'id': api.output.descriptor_id, - 'args': json_format.MessageToDict(api.output.attributes).get('args', None) - }), - name=api.name, - doc=api.docs) - except Exception as e: - logger.error('Failed to instantiate client for API %s: ', api.name, e) - return cls(url, reflection, **kwargs) - - async def health(self) -> t.Any: - return await services_health.HealthStub(self.inner).Check(pb_health.HealthCheckRequest(service='')) - - async def _call(self, data: t.Any, /, *, _inference_api: InferenceAPI[t.Any], **kwargs: t.Any) -> t.Any: - channel_kwargs = {k: kwargs.pop(f'_grpc_channel_{k}', None) for k in {'timeout', 'metadata', 'credentials', 'wait_for_ready', 'compression'}} - state = self.inner.get_state(try_to_connect=True) - if state != grpc.ChannelConnectivity.READY: await self.inner.channel_ready() - if _inference_api.multi_input: - if data is not None: - raise ValueError(f"'{_inference_api.name}' takes multiple inputs, and thus required to pass as keyword arguments.") - fake_resp = await _inference_api.input.to_proto(kwargs) - else: - fake_resp = await _inference_api.input.to_proto(data) - api_fn = {v: k for k, v in self.svc.apis.items()} - async with self.inner: - stubs = services.BentoServiceStub(self.inner) - proto = await stubs.Call(pb.Request(**{'api_name': api_fn[_inference_api], _inference_api.input.proto_fields[0]: fake_resp}), **channel_kwargs) - return await _inference_api.output.from_proto(getattr(proto, proto.WhichOneof('content'))) diff --git a/openllm-client/src/openllm_client/benmin/_http.py b/openllm-client/src/openllm_client/benmin/_http.py deleted file mode 100644 index 86477e5a..00000000 --- a/openllm-client/src/openllm_client/benmin/_http.py +++ /dev/null @@ -1,201 +0,0 @@ -from __future__ import annotations -import asyncio -import functools -import logging -import time -import typing as t -import urllib.error - -from urllib.parse import urlparse - -import httpx -import orjson -import starlette.datastructures -import starlette.requests -import starlette.responses - -import bentoml - -from bentoml._internal.service.inference_api import InferenceAPI -from openllm_client.benmin import AsyncClient -from openllm_client.benmin import Client -from openllm_core.utils import ensure_exec_coro - -logger = logging.getLogger(__name__) - -class HttpClient(Client): - @functools.cached_property - def inner(self) -> httpx.Client: - if not urlparse(self.server_url).netloc: raise ValueError(f'Invalid server url: {self.server_url}') - return httpx.Client(base_url=self.server_url) - - @staticmethod - def wait_until_server_ready(host: str, port: int | None = None, timeout: float = 30, check_interval: int = 1, **kwargs: t.Any) -> None: - host = host if '://' in host else 'http://' + host - server = host if port is None else f'{host}:{port}' - logger.debug("Waiting for server @ '%s' to be ready...", server) - start = time.time() - while time.time() - start < timeout: - try: - with httpx.Client(base_url=server) as sess: - status = sess.get('/readyz').status_code - if status == 200: break - else: time.sleep(check_interval) - except (httpx.ConnectError, urllib.error.URLError, ConnectionError): - logger.debug('Server is not ready yet, retrying in %d seconds...', check_interval) - time.sleep(check_interval) - # Try once more and raise for exception - try: - with httpx.Client(base_url=server) as sess: - status = sess.get('/readyz').status_code - except httpx.HTTPStatusError as err: - logger.error('Failed to wait until server ready: %s', server) - logger.error(err) - raise - - def health(self) -> httpx.Response: - return self.inner.get('/readyz') - - @classmethod - def from_url(cls, url: str, **kwargs: t.Any) -> HttpClient: - url = url if '://' in url else 'http://' + url - with httpx.Client(base_url=url) as sess: - resp = sess.get('/docs.json') - if resp.status_code != 200: - raise ValueError(f'Failed to get OpenAPI schema from the server: {resp.status_code} {resp.reason_phrase}:\n{resp.content.decode()}') - _spec = orjson.loads(resp.content) - - reflection = bentoml.Service(_spec['info']['title']) - - for route, spec in _spec['paths'].items(): - for meth_spec in spec.values(): - if 'tags' in meth_spec and 'Service APIs' in meth_spec['tags']: - if 'x-bentoml-io-descriptor' not in meth_spec['requestBody']: - raise ValueError(f'Malformed BentoML spec received from BentoML server {url}') - if 'x-bentoml-io-descriptor' not in meth_spec['responses']['200']: - raise ValueError(f'Malformed BentoML spec received from BentoML server {url}') - if 'x-bentoml-name' not in meth_spec: - raise ValueError(f'Malformed BentoML spec received from BentoML server {url}') - try: - reflection.apis[meth_spec['x-bentoml-name']] = InferenceAPI[t.Any](None, - bentoml.io.from_spec(meth_spec['requestBody']['x-bentoml-io-descriptor']), - bentoml.io.from_spec(meth_spec['responses']['200']['x-bentoml-io-descriptor']), - name=meth_spec['x-bentoml-name'], - doc=meth_spec['description'], - route=route.lstrip('/')) - except Exception as e: - logger.error('Failed to instantiate client for API %s: ', meth_spec['x-bentoml-name'], e) - return cls(url, reflection) - - def _call(self, data: t.Any, /, *, _inference_api: InferenceAPI[t.Any], **kwargs: t.Any) -> t.Any: - # All gRPC kwargs should be popped out. - kwargs = {k: v for k, v in kwargs.items() if not k.startswith('_grpc_')} - if _inference_api.multi_input: - if data is not None: - raise ValueError(f"'{_inference_api.name}' takes multiple inputs, and thus required to pass as keyword arguments.") - fake_resp = ensure_exec_coro(_inference_api.input.to_http_response(kwargs, None)) - else: - fake_resp = ensure_exec_coro(_inference_api.input.to_http_response(data, None)) - - # XXX: hack around StreamingResponse, since now we only have Text, for metadata so it is fine to do this. - if isinstance(fake_resp, starlette.responses.StreamingResponse): body = None - else: body = fake_resp.body - - resp = self.inner.post('/' + _inference_api.route if not _inference_api.route.startswith('/') else _inference_api.route, - data=body, - headers={'content-type': fake_resp.headers['content-type']}, - timeout=self.timeout) - if resp.status_code != 200: raise ValueError(f'Error while making request: {resp.status_code}: {resp.content!s}') - fake_req = starlette.requests.Request(scope={'type': 'http'}) - headers = starlette.datastructures.Headers(headers=resp.headers) - fake_req._body = resp.content - # Request.headers sets a _headers variable. We will need to set this value to our fake request object. - fake_req._headers = headers - return ensure_exec_coro(_inference_api.output.from_http_request(fake_req)) - -class AsyncHttpClient(AsyncClient): - @functools.cached_property - def inner(self) -> httpx.AsyncClient: - if not urlparse(self.server_url).netloc: raise ValueError(f'Invalid server url: {self.server_url}') - return httpx.AsyncClient(base_url=self.server_url) - - @staticmethod - async def wait_until_server_ready(host: str, port: int, timeout: float = 30, check_interval: int = 1, **kwargs: t.Any) -> None: - host = host if '://' in host else 'http://' + host - server = host if port is None else f'{host}:{port}' - logger.debug("Waiting for server @ '%s' to be ready...", server) - start = time.time() - while time.time() - start < timeout: - try: - async with httpx.AsyncClient(base_url=server) as sess: - resp = await sess.get('/readyz') - if resp.status_code == 200: break - else: await asyncio.sleep(check_interval) - except (httpx.ConnectError, urllib.error.URLError, ConnectionError): - logger.debug('Server is not ready yet, retrying in %d seconds...', check_interval) - await asyncio.sleep(check_interval) - # Try once more and raise for exception - async with httpx.AsyncClient(base_url=server) as sess: - resp = await sess.get('/readyz') - if resp.status_code != 200: - raise TimeoutError(f"Timeout while waiting for server @ '{server}' to be ready: {resp.status_code}: {resp.content!s}") - - async def health(self) -> httpx.Response: - return await self.inner.get('/readyz') - - @classmethod - async def from_url(cls, url: str, **kwargs: t.Any) -> AsyncHttpClient: - url = url if '://' in url else 'http://' + url - async with httpx.AsyncClient(base_url=url) as sess: - resp = await sess.get('/docs.json') - if resp.status_code != 200: - raise ValueError(f'Failed to get OpenAPI schema from the server: {resp.status_code} {resp.reason_phrase}:\n{(await resp.aread()).decode()}') - _spec = orjson.loads(await resp.aread()) - - reflection = bentoml.Service(_spec['info']['title']) - - for route, spec in _spec['paths'].items(): - for meth_spec in spec.values(): - if 'tags' in meth_spec and 'Service APIs' in meth_spec['tags']: - if 'x-bentoml-io-descriptor' not in meth_spec['requestBody']: - raise ValueError(f'Malformed BentoML spec received from BentoML server {url}') - if 'x-bentoml-io-descriptor' not in meth_spec['responses']['200']: - raise ValueError(f'Malformed BentoML spec received from BentoML server {url}') - if 'x-bentoml-name' not in meth_spec: - raise ValueError(f'Malformed BentoML spec received from BentoML server {url}') - try: - reflection.apis[meth_spec['x-bentoml-name']] = InferenceAPI[t.Any](None, - bentoml.io.from_spec(meth_spec['requestBody']['x-bentoml-io-descriptor']), - bentoml.io.from_spec(meth_spec['responses']['200']['x-bentoml-io-descriptor']), - name=meth_spec['x-bentoml-name'], - doc=meth_spec['description'], - route=route.lstrip('/')) - except ValueError as e: - logger.error('Failed to instantiate client for API %s: ', meth_spec['x-bentoml-name'], e) - return cls(url, reflection) - - async def _call(self, data: t.Any, /, *, _inference_api: InferenceAPI[t.Any], **kwargs: t.Any) -> t.Any: - # All gRPC kwargs should be popped out. - kwargs = {k: v for k, v in kwargs.items() if not k.startswith('_grpc_')} - if _inference_api.multi_input: - if data is not None: - raise ValueError(f"'{_inference_api.name}' takes multiple inputs, and thus required to pass as keyword arguments.") - fake_resp = await _inference_api.input.to_http_response(kwargs, None) - else: - fake_resp = await _inference_api.input.to_http_response(data, None) - - # XXX: hack around StreamingResponse, since now we only have Text, for metadata so it is fine to do this. - if isinstance(fake_resp, starlette.responses.StreamingResponse): body = None - else: body = t.cast(t.Any, fake_resp.body) - - resp = await self.inner.post('/' + _inference_api.route if not _inference_api.route.startswith('/') else _inference_api.route, - data=body, - headers={'content-type': fake_resp.headers['content-type']}, - timeout=self.timeout) - if resp.status_code != 200: raise ValueError(f'Error making request: {resp.status_code}: {(await resp.aread())!s}') - fake_req = starlette.requests.Request(scope={'type': 'http'}) - headers = starlette.datastructures.Headers(headers=resp.headers) - fake_req._body = resp.content - # Request.headers sets a _headers variable. We will need to set this value to our fake request object. - fake_req._headers = headers - return await _inference_api.output.from_http_request(fake_req) diff --git a/openllm-client/src/openllm_client/client.py b/openllm-client/src/openllm_client/client.py deleted file mode 100644 index 4bee4248..00000000 --- a/openllm-client/src/openllm_client/client.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import annotations -import logging - -from urllib.parse import urlparse - -from ._base import BaseAsyncClient -from ._base import BaseClient - -logger = logging.getLogger(__name__) - -def process_http_address(self: AsyncHTTPClient | HTTPClient, address: str) -> None: - address = address if '://' in address else 'http://' + address - parsed = urlparse(address) - self._host, *_port = parsed.netloc.split(':') - if len(_port) == 0: self._port = '80' if parsed.scheme == 'http' else '443' - else: self._port = next(iter(_port)) - -class HTTPClient(BaseClient): - def __init__(self, address: str, timeout: int = 30): - process_http_address(self, address) - super().__init__(address, timeout) - -class AsyncHTTPClient(BaseAsyncClient): - def __init__(self, address: str, timeout: int = 30): - process_http_address(self, address) - super().__init__(address, timeout) - -class GrpcClient(BaseClient): - def __init__(self, address: str, timeout: int = 30): - self._host, self._port = address.split(':') - super().__init__(address, timeout) - -class AsyncGrpcClient(BaseAsyncClient): - def __init__(self, address: str, timeout: int = 30): - self._host, self._port = address.split(':') - super().__init__(address, timeout) diff --git a/openllm-client/src/openllm_client/pb/__init__.py b/openllm-client/src/openllm_client/pb/__init__.py new file mode 100644 index 00000000..0e8df59b --- /dev/null +++ b/openllm-client/src/openllm_client/pb/__init__.py @@ -0,0 +1,2 @@ +# NOTE: This whole module is generated with `generate-grpc-stubs` +# and shouldn't be modified manually. diff --git a/openllm-client/src/openllm_client/_adapters.py b/openllm-client/src/openllm_client/pb/v1/__init__.py similarity index 100% rename from openllm-client/src/openllm_client/_adapters.py rename to openllm-client/src/openllm_client/pb/v1/__init__.py diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb3/__init__.py b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.py b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.py new file mode 100644 index 00000000..4c97b9e2 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: service.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import service as _service +from google.protobuf import service_reflection +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 +from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rservice.proto\x12\x0f\x62\x65ntoml.grpc.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\x18\n\x16ServiceMetadataRequest\"\xde\x03\n\x17ServiceMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x43\n\x04\x61pis\x18\x02 \x03(\x0b\x32\x35.bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI\x12\x0c\n\x04\x64ocs\x18\x03 \x01(\t\x1ao\n\x12\x44\x65scriptorMetadata\x12\x1a\n\rdescriptor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.StructB\x10\n\x0e_descriptor_id\x1a\xf0\x01\n\x0cInferenceAPI\x12\x0c\n\x04name\x18\x01 \x01(\t\x12O\n\x05input\x18\x02 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x00\x88\x01\x01\x12P\n\x06output\x18\x03 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x01\x88\x01\x01\x12\x11\n\x04\x64ocs\x18\x04 \x01(\tH\x02\x88\x01\x01\x42\x08\n\x06_inputB\t\n\x07_outputB\x07\n\x05_docs\"\x85\x03\n\x07Request\x12\x10\n\x08\x61pi_name\x18\x01 \x01(\t\x12+\n\x07ndarray\x18\x03 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x05 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x06 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x07 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\t \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\n \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\x0b\x10\x0e\"\xf4\x02\n\x08Response\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\t \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\n\x10\x0e\"\xc6\x02\n\x04Part\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12\x1a\n\x10serialized_bytes\x18\x04 \x01(\x0cH\x00\x42\x10\n\x0erepresentationJ\x04\x08\x02\x10\x03J\x04\x08\t\x10\x0e\"\x89\x01\n\tMultipart\x12\x36\n\x06\x66ields\x18\x01 \x03(\x0b\x32&.bentoml.grpc.v1.Multipart.FieldsEntry\x1a\x44\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.bentoml.grpc.v1.Part:\x02\x38\x01\"3\n\x04\x46ile\x12\x11\n\x04kind\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\x42\x07\n\x05_kind\"K\n\tDataFrame\x12\x14\n\x0c\x63olumn_names\x18\x01 \x03(\t\x12(\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x17.bentoml.grpc.v1.Series\"\xa1\x01\n\x06Series\x12\x17\n\x0b\x62ool_values\x18\x01 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x66loat_values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x03 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x06 \x03(\x03\x42\x02\x10\x01\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\"\xc2\x03\n\x07NDArray\x12-\n\x05\x64type\x18\x01 \x01(\x0e\x32\x1e.bentoml.grpc.v1.NDArray.DType\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x18\n\x0c\x66loat_values\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x62ool_values\x18\x06 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x19\n\ruint32_values\x18\t \x03(\rB\x02\x10\x01\x12\x19\n\ruint64_values\x18\n \x03(\x04\x42\x02\x10\x01\"\xa9\x01\n\x05\x44Type\x12\x15\n\x11\x44TYPE_UNSPECIFIED\x10\x00\x12\x0f\n\x0b\x44TYPE_FLOAT\x10\x01\x12\x10\n\x0c\x44TYPE_DOUBLE\x10\x02\x12\x0e\n\nDTYPE_BOOL\x10\x03\x12\x0f\n\x0b\x44TYPE_INT32\x10\x04\x12\x0f\n\x0b\x44TYPE_INT64\x10\x05\x12\x10\n\x0c\x44TYPE_UINT32\x10\x06\x12\x10\n\x0c\x44TYPE_UINT64\x10\x07\x12\x10\n\x0c\x44TYPE_STRING\x10\x08\x32\xb5\x01\n\x0c\x42\x65ntoService\x12=\n\x04\x43\x61ll\x12\x18.bentoml.grpc.v1.Request\x1a\x19.bentoml.grpc.v1.Response\"\x00\x12\x66\n\x0fServiceMetadata\x12\'.bentoml.grpc.v1.ServiceMetadataRequest\x1a(.bentoml.grpc.v1.ServiceMetadataResponse\"\x00\x42]\n\x13\x63om.bentoml.grpc.v1B\x0cServiceProtoP\x01Z*github.com/bentoml/bentoml/grpc/v1;service\x90\x01\x01\xf8\x01\x01\xa2\x02\x03SVCb\x06proto3') + + + +_SERVICEMETADATAREQUEST = DESCRIPTOR.message_types_by_name['ServiceMetadataRequest'] +_SERVICEMETADATARESPONSE = DESCRIPTOR.message_types_by_name['ServiceMetadataResponse'] +_SERVICEMETADATARESPONSE_DESCRIPTORMETADATA = _SERVICEMETADATARESPONSE.nested_types_by_name['DescriptorMetadata'] +_SERVICEMETADATARESPONSE_INFERENCEAPI = _SERVICEMETADATARESPONSE.nested_types_by_name['InferenceAPI'] +_REQUEST = DESCRIPTOR.message_types_by_name['Request'] +_RESPONSE = DESCRIPTOR.message_types_by_name['Response'] +_PART = DESCRIPTOR.message_types_by_name['Part'] +_MULTIPART = DESCRIPTOR.message_types_by_name['Multipart'] +_MULTIPART_FIELDSENTRY = _MULTIPART.nested_types_by_name['FieldsEntry'] +_FILE = DESCRIPTOR.message_types_by_name['File'] +_DATAFRAME = DESCRIPTOR.message_types_by_name['DataFrame'] +_SERIES = DESCRIPTOR.message_types_by_name['Series'] +_NDARRAY = DESCRIPTOR.message_types_by_name['NDArray'] +_NDARRAY_DTYPE = _NDARRAY.enum_types_by_name['DType'] +ServiceMetadataRequest = _reflection.GeneratedProtocolMessageType('ServiceMetadataRequest', (_message.Message,), { + 'DESCRIPTOR' : _SERVICEMETADATAREQUEST, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.ServiceMetadataRequest) + }) +_sym_db.RegisterMessage(ServiceMetadataRequest) + +ServiceMetadataResponse = _reflection.GeneratedProtocolMessageType('ServiceMetadataResponse', (_message.Message,), { + + 'DescriptorMetadata' : _reflection.GeneratedProtocolMessageType('DescriptorMetadata', (_message.Message,), { + 'DESCRIPTOR' : _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadata) + }) + , + + 'InferenceAPI' : _reflection.GeneratedProtocolMessageType('InferenceAPI', (_message.Message,), { + 'DESCRIPTOR' : _SERVICEMETADATARESPONSE_INFERENCEAPI, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI) + }) + , + 'DESCRIPTOR' : _SERVICEMETADATARESPONSE, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.ServiceMetadataResponse) + }) +_sym_db.RegisterMessage(ServiceMetadataResponse) +_sym_db.RegisterMessage(ServiceMetadataResponse.DescriptorMetadata) +_sym_db.RegisterMessage(ServiceMetadataResponse.InferenceAPI) + +Request = _reflection.GeneratedProtocolMessageType('Request', (_message.Message,), { + 'DESCRIPTOR' : _REQUEST, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.Request) + }) +_sym_db.RegisterMessage(Request) + +Response = _reflection.GeneratedProtocolMessageType('Response', (_message.Message,), { + 'DESCRIPTOR' : _RESPONSE, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.Response) + }) +_sym_db.RegisterMessage(Response) + +Part = _reflection.GeneratedProtocolMessageType('Part', (_message.Message,), { + 'DESCRIPTOR' : _PART, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.Part) + }) +_sym_db.RegisterMessage(Part) + +Multipart = _reflection.GeneratedProtocolMessageType('Multipart', (_message.Message,), { + + 'FieldsEntry' : _reflection.GeneratedProtocolMessageType('FieldsEntry', (_message.Message,), { + 'DESCRIPTOR' : _MULTIPART_FIELDSENTRY, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.Multipart.FieldsEntry) + }) + , + 'DESCRIPTOR' : _MULTIPART, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.Multipart) + }) +_sym_db.RegisterMessage(Multipart) +_sym_db.RegisterMessage(Multipart.FieldsEntry) + +File = _reflection.GeneratedProtocolMessageType('File', (_message.Message,), { + 'DESCRIPTOR' : _FILE, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.File) + }) +_sym_db.RegisterMessage(File) + +DataFrame = _reflection.GeneratedProtocolMessageType('DataFrame', (_message.Message,), { + 'DESCRIPTOR' : _DATAFRAME, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.DataFrame) + }) +_sym_db.RegisterMessage(DataFrame) + +Series = _reflection.GeneratedProtocolMessageType('Series', (_message.Message,), { + 'DESCRIPTOR' : _SERIES, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.Series) + }) +_sym_db.RegisterMessage(Series) + +NDArray = _reflection.GeneratedProtocolMessageType('NDArray', (_message.Message,), { + 'DESCRIPTOR' : _NDARRAY, + '__module__' : 'service_pb2' + # @@protoc_insertion_point(class_scope:bentoml.grpc.v1.NDArray) + }) +_sym_db.RegisterMessage(NDArray) + +_BENTOSERVICE = DESCRIPTOR.services_by_name['BentoService'] +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\023com.bentoml.grpc.v1B\014ServiceProtoP\001Z*github.com/bentoml/bentoml/grpc/v1;service\220\001\001\370\001\001\242\002\003SVC' + _MULTIPART_FIELDSENTRY._options = None + _MULTIPART_FIELDSENTRY._serialized_options = b'8\001' + _SERIES.fields_by_name['bool_values']._options = None + _SERIES.fields_by_name['bool_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['float_values']._options = None + _SERIES.fields_by_name['float_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['int32_values']._options = None + _SERIES.fields_by_name['int32_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['int64_values']._options = None + _SERIES.fields_by_name['int64_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['double_values']._options = None + _SERIES.fields_by_name['double_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['float_values']._options = None + _NDARRAY.fields_by_name['float_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['double_values']._options = None + _NDARRAY.fields_by_name['double_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['bool_values']._options = None + _NDARRAY.fields_by_name['bool_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['int32_values']._options = None + _NDARRAY.fields_by_name['int32_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['int64_values']._options = None + _NDARRAY.fields_by_name['int64_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['uint32_values']._options = None + _NDARRAY.fields_by_name['uint32_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['uint64_values']._options = None + _NDARRAY.fields_by_name['uint64_values']._serialized_options = b'\020\001' + _SERVICEMETADATAREQUEST._serialized_start=96 + _SERVICEMETADATAREQUEST._serialized_end=120 + _SERVICEMETADATARESPONSE._serialized_start=123 + _SERVICEMETADATARESPONSE._serialized_end=601 + _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA._serialized_start=247 + _SERVICEMETADATARESPONSE_DESCRIPTORMETADATA._serialized_end=358 + _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_start=361 + _SERVICEMETADATARESPONSE_INFERENCEAPI._serialized_end=601 + _REQUEST._serialized_start=604 + _REQUEST._serialized_end=993 + _RESPONSE._serialized_start=996 + _RESPONSE._serialized_end=1368 + _PART._serialized_start=1371 + _PART._serialized_end=1697 + _MULTIPART._serialized_start=1700 + _MULTIPART._serialized_end=1837 + _MULTIPART_FIELDSENTRY._serialized_start=1769 + _MULTIPART_FIELDSENTRY._serialized_end=1837 + _FILE._serialized_start=1839 + _FILE._serialized_end=1890 + _DATAFRAME._serialized_start=1892 + _DATAFRAME._serialized_end=1967 + _SERIES._serialized_start=1970 + _SERIES._serialized_end=2131 + _NDARRAY._serialized_start=2134 + _NDARRAY._serialized_end=2584 + _NDARRAY_DTYPE._serialized_start=2415 + _NDARRAY_DTYPE._serialized_end=2584 + _BENTOSERVICE._serialized_start=2587 + _BENTOSERVICE._serialized_end=2768 +BentoService = service_reflection.GeneratedServiceType('BentoService', (_service.Service,), dict( + DESCRIPTOR = _BENTOSERVICE, + __module__ = 'service_pb2' + )) + +BentoService_Stub = service_reflection.GeneratedServiceStubType('BentoService_Stub', (BentoService,), dict( + DESCRIPTOR = _BENTOSERVICE, + __module__ = 'service_pb2' + )) + + +# @@protoc_insertion_point(module_scope) diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.pyi b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.pyi new file mode 100644 index 00000000..ea497f31 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2.pyi @@ -0,0 +1,611 @@ +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file +Vendorred from: https://github.com/bentoml/BentoML/blob/main/src/bentoml/grpc/v1/service.proto""" +import abc +import builtins +import collections.abc +import concurrent.futures +import google.protobuf.descriptor +import google.protobuf.internal.containers +import google.protobuf.internal.enum_type_wrapper +import google.protobuf.message +import google.protobuf.service +import google.protobuf.struct_pb2 +import google.protobuf.wrappers_pb2 +import sys +import typing + +if sys.version_info >= (3, 10): + import typing as typing_extensions +else: + import typing_extensions + +DESCRIPTOR: google.protobuf.descriptor.FileDescriptor + +class ServiceMetadataRequest(google.protobuf.message.Message): + """ServiceMetadataRequest message doesn't take any arguments.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ServiceMetadataRequest = ServiceMetadataRequest + +class ServiceMetadataResponse(google.protobuf.message.Message): + """ServiceMetadataResponse returns metadata of bentoml.Service. + Currently it includes name, version, apis, and docs. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + class DescriptorMetadata(google.protobuf.message.Message): + """DescriptorMetadata is a metadata of any given IODescriptor.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + DESCRIPTOR_ID_FIELD_NUMBER: builtins.int + ATTRIBUTES_FIELD_NUMBER: builtins.int + descriptor_id: builtins.str + """descriptor_id describes the given ID of the descriptor, which matches with our OpenAPI definition.""" + @property + def attributes(self) -> google.protobuf.struct_pb2.Struct: + """attributes is the kwargs of the given descriptor.""" + def __init__( + self, + *, + descriptor_id: builtins.str | None = ..., + attributes: google.protobuf.struct_pb2.Struct | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_descriptor_id", b"_descriptor_id", "attributes", b"attributes", "descriptor_id", b"descriptor_id"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_descriptor_id", b"_descriptor_id", "attributes", b"attributes", "descriptor_id", b"descriptor_id"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_descriptor_id", b"_descriptor_id"]) -> typing_extensions.Literal["descriptor_id"] | None: ... + + class InferenceAPI(google.protobuf.message.Message): + """InferenceAPI is bentoml._internal.service.inferece_api.InferenceAPI + that is exposed to gRPC client. + There is no way for reflection to get information of given @svc.api. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NAME_FIELD_NUMBER: builtins.int + INPUT_FIELD_NUMBER: builtins.int + OUTPUT_FIELD_NUMBER: builtins.int + DOCS_FIELD_NUMBER: builtins.int + name: builtins.str + """name is the name of the API.""" + @property + def input(self) -> global___ServiceMetadataResponse.DescriptorMetadata: + """input is the input descriptor of the API.""" + @property + def output(self) -> global___ServiceMetadataResponse.DescriptorMetadata: + """output is the output descriptor of the API.""" + docs: builtins.str + """docs is the optional documentation of the API.""" + def __init__( + self, + *, + name: builtins.str = ..., + input: global___ServiceMetadataResponse.DescriptorMetadata | None = ..., + output: global___ServiceMetadataResponse.DescriptorMetadata | None = ..., + docs: builtins.str | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "output", b"output"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "name", b"name", "output", b"output"]) -> None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_docs", b"_docs"]) -> typing_extensions.Literal["docs"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_input", b"_input"]) -> typing_extensions.Literal["input"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_output", b"_output"]) -> typing_extensions.Literal["output"] | None: ... + + NAME_FIELD_NUMBER: builtins.int + APIS_FIELD_NUMBER: builtins.int + DOCS_FIELD_NUMBER: builtins.int + name: builtins.str + """name is the service name.""" + @property + def apis(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___ServiceMetadataResponse.InferenceAPI]: + """apis holds a list of InferenceAPI of the service.""" + docs: builtins.str + """docs is the documentation of the service.""" + def __init__( + self, + *, + name: builtins.str = ..., + apis: collections.abc.Iterable[global___ServiceMetadataResponse.InferenceAPI] | None = ..., + docs: builtins.str = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["apis", b"apis", "docs", b"docs", "name", b"name"]) -> None: ... + +global___ServiceMetadataResponse = ServiceMetadataResponse + +class Request(google.protobuf.message.Message): + """Request message for incoming Call.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + API_NAME_FIELD_NUMBER: builtins.int + NDARRAY_FIELD_NUMBER: builtins.int + DATAFRAME_FIELD_NUMBER: builtins.int + SERIES_FIELD_NUMBER: builtins.int + FILE_FIELD_NUMBER: builtins.int + TEXT_FIELD_NUMBER: builtins.int + JSON_FIELD_NUMBER: builtins.int + MULTIPART_FIELD_NUMBER: builtins.int + SERIALIZED_BYTES_FIELD_NUMBER: builtins.int + api_name: builtins.str + """api_name defines the API entrypoint to call. + api_name is the name of the function defined in bentoml.Service. + Example: + + @svc.api(input=NumpyNdarray(), output=File()) + def predict(input: NDArray[float]) -> bytes: + ... + + api_name is "predict" in this case. + """ + @property + def ndarray(self) -> global___NDArray: + """NDArray represents a n-dimensional array of arbitrary type.""" + @property + def dataframe(self) -> global___DataFrame: + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + """ + @property + def series(self) -> global___Series: + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + @property + def file(self) -> global___File: + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + @property + def text(self) -> google.protobuf.wrappers_pb2.StringValue: + """Text represents a string inputs.""" + @property + def json(self) -> google.protobuf.struct_pb2.Value: + """JSON is represented by using google.protobuf.Value. + see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + """ + @property + def multipart(self) -> global___Multipart: + """Multipart represents a multipart message. + It comprises of a mapping from given type name to a subset of aforementioned types. + """ + serialized_bytes: builtins.bytes + """serialized_bytes is for data serialized in BentoML's internal serialization format.""" + def __init__( + self, + *, + api_name: builtins.str = ..., + ndarray: global___NDArray | None = ..., + dataframe: global___DataFrame | None = ..., + series: global___Series | None = ..., + file: global___File | None = ..., + text: google.protobuf.wrappers_pb2.StringValue | None = ..., + json: google.protobuf.struct_pb2.Value | None = ..., + multipart: global___Multipart | None = ..., + serialized_bytes: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["api_name", b"api_name", "content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["content", b"content"]) -> typing_extensions.Literal["ndarray", "dataframe", "series", "file", "text", "json", "multipart", "serialized_bytes"] | None: ... + +global___Request = Request + +class Response(google.protobuf.message.Message): + """Request message for incoming Call.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NDARRAY_FIELD_NUMBER: builtins.int + DATAFRAME_FIELD_NUMBER: builtins.int + SERIES_FIELD_NUMBER: builtins.int + FILE_FIELD_NUMBER: builtins.int + TEXT_FIELD_NUMBER: builtins.int + JSON_FIELD_NUMBER: builtins.int + MULTIPART_FIELD_NUMBER: builtins.int + SERIALIZED_BYTES_FIELD_NUMBER: builtins.int + @property + def ndarray(self) -> global___NDArray: + """NDArray represents a n-dimensional array of arbitrary type.""" + @property + def dataframe(self) -> global___DataFrame: + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + """ + @property + def series(self) -> global___Series: + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + @property + def file(self) -> global___File: + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + @property + def text(self) -> google.protobuf.wrappers_pb2.StringValue: + """Text represents a string inputs.""" + @property + def json(self) -> google.protobuf.struct_pb2.Value: + """JSON is represented by using google.protobuf.Value. + see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + """ + @property + def multipart(self) -> global___Multipart: + """Multipart represents a multipart message. + It comprises of a mapping from given type name to a subset of aforementioned types. + """ + serialized_bytes: builtins.bytes + """serialized_bytes is for data serialized in BentoML's internal serialization format.""" + def __init__( + self, + *, + ndarray: global___NDArray | None = ..., + dataframe: global___DataFrame | None = ..., + series: global___Series | None = ..., + file: global___File | None = ..., + text: google.protobuf.wrappers_pb2.StringValue | None = ..., + json: google.protobuf.struct_pb2.Value | None = ..., + multipart: global___Multipart | None = ..., + serialized_bytes: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["content", b"content"]) -> typing_extensions.Literal["ndarray", "dataframe", "series", "file", "text", "json", "multipart", "serialized_bytes"] | None: ... + +global___Response = Response + +class Part(google.protobuf.message.Message): + """Part represents possible value types for multipart message. + These are the same as the types in Request message. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NDARRAY_FIELD_NUMBER: builtins.int + DATAFRAME_FIELD_NUMBER: builtins.int + SERIES_FIELD_NUMBER: builtins.int + FILE_FIELD_NUMBER: builtins.int + TEXT_FIELD_NUMBER: builtins.int + JSON_FIELD_NUMBER: builtins.int + SERIALIZED_BYTES_FIELD_NUMBER: builtins.int + @property + def ndarray(self) -> global___NDArray: + """NDArray represents a n-dimensional array of arbitrary type.""" + @property + def dataframe(self) -> global___DataFrame: + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + """ + @property + def series(self) -> global___Series: + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + @property + def file(self) -> global___File: + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + @property + def text(self) -> google.protobuf.wrappers_pb2.StringValue: + """Text represents a string inputs.""" + @property + def json(self) -> google.protobuf.struct_pb2.Value: + """JSON is represented by using google.protobuf.Value. + see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + """ + serialized_bytes: builtins.bytes + """serialized_bytes is for data serialized in BentoML's internal serialization format.""" + def __init__( + self, + *, + ndarray: global___NDArray | None = ..., + dataframe: global___DataFrame | None = ..., + series: global___Series | None = ..., + file: global___File | None = ..., + text: google.protobuf.wrappers_pb2.StringValue | None = ..., + json: google.protobuf.struct_pb2.Value | None = ..., + serialized_bytes: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["dataframe", b"dataframe", "file", b"file", "json", b"json", "ndarray", b"ndarray", "representation", b"representation", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["dataframe", b"dataframe", "file", b"file", "json", b"json", "ndarray", b"ndarray", "representation", b"representation", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["representation", b"representation"]) -> typing_extensions.Literal["ndarray", "dataframe", "series", "file", "text", "json", "serialized_bytes"] | None: ... + +global___Part = Part + +class Multipart(google.protobuf.message.Message): + """Multipart represents a multipart message. + It comprises of a mapping from given type name to a subset of aforementioned types. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + class FieldsEntry(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + KEY_FIELD_NUMBER: builtins.int + VALUE_FIELD_NUMBER: builtins.int + key: builtins.str + @property + def value(self) -> global___Part: ... + def __init__( + self, + *, + key: builtins.str = ..., + value: global___Part | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["value", b"value"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]) -> None: ... + + FIELDS_FIELD_NUMBER: builtins.int + @property + def fields(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, global___Part]: ... + def __init__( + self, + *, + fields: collections.abc.Mapping[builtins.str, global___Part] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["fields", b"fields"]) -> None: ... + +global___Multipart = Multipart + +class File(google.protobuf.message.Message): + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + KIND_FIELD_NUMBER: builtins.int + CONTENT_FIELD_NUMBER: builtins.int + kind: builtins.str + """optional file type, let it be csv, text, parquet, etc. + v1alpha1 uses 1 as FileType enum. + """ + content: builtins.bytes + """contents of file as bytes.""" + def __init__( + self, + *, + kind: builtins.str | None = ..., + content: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_kind", b"_kind", "kind", b"kind"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_kind", b"_kind", "content", b"content", "kind", b"kind"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_kind", b"_kind"]) -> typing_extensions.Literal["kind"] | None: ... + +global___File = File + +class DataFrame(google.protobuf.message.Message): + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + This message carries given implementation of tabular data based on given orientation. + TODO: support index, records, etc. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + COLUMN_NAMES_FIELD_NUMBER: builtins.int + COLUMNS_FIELD_NUMBER: builtins.int + @property + def column_names(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """columns name""" + @property + def columns(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Series]: + """columns orient. + { column ↠ { index ↠ value } } + """ + def __init__( + self, + *, + column_names: collections.abc.Iterable[builtins.str] | None = ..., + columns: collections.abc.Iterable[global___Series] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["column_names", b"column_names", "columns", b"columns"]) -> None: ... + +global___DataFrame = DataFrame + +class Series(google.protobuf.message.Message): + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + BOOL_VALUES_FIELD_NUMBER: builtins.int + FLOAT_VALUES_FIELD_NUMBER: builtins.int + INT32_VALUES_FIELD_NUMBER: builtins.int + INT64_VALUES_FIELD_NUMBER: builtins.int + STRING_VALUES_FIELD_NUMBER: builtins.int + DOUBLE_VALUES_FIELD_NUMBER: builtins.int + @property + def bool_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bool]: + """A bool parameter value""" + @property + def float_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """A float parameter value""" + @property + def int32_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """A int32 parameter value""" + @property + def int64_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """A int64 parameter value""" + @property + def string_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """A string parameter value""" + @property + def double_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """represents a double parameter value.""" + def __init__( + self, + *, + bool_values: collections.abc.Iterable[builtins.bool] | None = ..., + float_values: collections.abc.Iterable[builtins.float] | None = ..., + int32_values: collections.abc.Iterable[builtins.int] | None = ..., + int64_values: collections.abc.Iterable[builtins.int] | None = ..., + string_values: collections.abc.Iterable[builtins.str] | None = ..., + double_values: collections.abc.Iterable[builtins.float] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_values", b"bool_values", "double_values", b"double_values", "float_values", b"float_values", "int32_values", b"int32_values", "int64_values", b"int64_values", "string_values", b"string_values"]) -> None: ... + +global___Series = Series + +class NDArray(google.protobuf.message.Message): + """NDArray represents a n-dimensional array of arbitrary type.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + class _DType: + ValueType = typing.NewType("ValueType", builtins.int) + V: typing_extensions.TypeAlias = ValueType + + class _DTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[NDArray._DType.ValueType], builtins.type): # noqa: F821 + DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor + DTYPE_UNSPECIFIED: NDArray._DType.ValueType # 0 + """Represents a None type.""" + DTYPE_FLOAT: NDArray._DType.ValueType # 1 + """Represents an float type.""" + DTYPE_DOUBLE: NDArray._DType.ValueType # 2 + """Represents an double type.""" + DTYPE_BOOL: NDArray._DType.ValueType # 3 + """Represents a bool type.""" + DTYPE_INT32: NDArray._DType.ValueType # 4 + """Represents an int32 type.""" + DTYPE_INT64: NDArray._DType.ValueType # 5 + """Represents an int64 type.""" + DTYPE_UINT32: NDArray._DType.ValueType # 6 + """Represents a uint32 type.""" + DTYPE_UINT64: NDArray._DType.ValueType # 7 + """Represents a uint64 type.""" + DTYPE_STRING: NDArray._DType.ValueType # 8 + """Represents a string type.""" + + class DType(_DType, metaclass=_DTypeEnumTypeWrapper): + """Represents data type of a given array.""" + + DTYPE_UNSPECIFIED: NDArray.DType.ValueType # 0 + """Represents a None type.""" + DTYPE_FLOAT: NDArray.DType.ValueType # 1 + """Represents an float type.""" + DTYPE_DOUBLE: NDArray.DType.ValueType # 2 + """Represents an double type.""" + DTYPE_BOOL: NDArray.DType.ValueType # 3 + """Represents a bool type.""" + DTYPE_INT32: NDArray.DType.ValueType # 4 + """Represents an int32 type.""" + DTYPE_INT64: NDArray.DType.ValueType # 5 + """Represents an int64 type.""" + DTYPE_UINT32: NDArray.DType.ValueType # 6 + """Represents a uint32 type.""" + DTYPE_UINT64: NDArray.DType.ValueType # 7 + """Represents a uint64 type.""" + DTYPE_STRING: NDArray.DType.ValueType # 8 + """Represents a string type.""" + + DTYPE_FIELD_NUMBER: builtins.int + SHAPE_FIELD_NUMBER: builtins.int + STRING_VALUES_FIELD_NUMBER: builtins.int + FLOAT_VALUES_FIELD_NUMBER: builtins.int + DOUBLE_VALUES_FIELD_NUMBER: builtins.int + BOOL_VALUES_FIELD_NUMBER: builtins.int + INT32_VALUES_FIELD_NUMBER: builtins.int + INT64_VALUES_FIELD_NUMBER: builtins.int + UINT32_VALUES_FIELD_NUMBER: builtins.int + UINT64_VALUES_FIELD_NUMBER: builtins.int + dtype: global___NDArray.DType.ValueType + """DTYPE is the data type of given array""" + @property + def shape(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """shape is the shape of given array.""" + @property + def string_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """represents a string parameter value.""" + @property + def float_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """represents a float parameter value.""" + @property + def double_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """represents a double parameter value.""" + @property + def bool_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bool]: + """represents a bool parameter value.""" + @property + def int32_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a int32 parameter value.""" + @property + def int64_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a int64 parameter value.""" + @property + def uint32_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a uint32 parameter value.""" + @property + def uint64_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a uint64 parameter value.""" + def __init__( + self, + *, + dtype: global___NDArray.DType.ValueType = ..., + shape: collections.abc.Iterable[builtins.int] | None = ..., + string_values: collections.abc.Iterable[builtins.str] | None = ..., + float_values: collections.abc.Iterable[builtins.float] | None = ..., + double_values: collections.abc.Iterable[builtins.float] | None = ..., + bool_values: collections.abc.Iterable[builtins.bool] | None = ..., + int32_values: collections.abc.Iterable[builtins.int] | None = ..., + int64_values: collections.abc.Iterable[builtins.int] | None = ..., + uint32_values: collections.abc.Iterable[builtins.int] | None = ..., + uint64_values: collections.abc.Iterable[builtins.int] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_values", b"bool_values", "double_values", b"double_values", "dtype", b"dtype", "float_values", b"float_values", "int32_values", b"int32_values", "int64_values", b"int64_values", "shape", b"shape", "string_values", b"string_values", "uint32_values", b"uint32_values", "uint64_values", b"uint64_values"]) -> None: ... + +global___NDArray = NDArray + +class BentoService(google.protobuf.service.Service, metaclass=abc.ABCMeta): + """a gRPC BentoServer.""" + + DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor + @abc.abstractmethod + def Call( + inst: BentoService, + rpc_controller: google.protobuf.service.RpcController, + request: global___Request, + callback: collections.abc.Callable[[global___Response], None] | None, + ) -> concurrent.futures.Future[global___Response]: + """Call handles methodcaller of given API entrypoint.""" + @abc.abstractmethod + def ServiceMetadata( + inst: BentoService, + rpc_controller: google.protobuf.service.RpcController, + request: global___ServiceMetadataRequest, + callback: collections.abc.Callable[[global___ServiceMetadataResponse], None] | None, + ) -> concurrent.futures.Future[global___ServiceMetadataResponse]: + """ServiceMetadata returns metadata of bentoml.Service.""" + +class BentoService_Stub(BentoService): + """a gRPC BentoServer.""" + + def __init__(self, rpc_channel: google.protobuf.service.RpcChannel) -> None: ... + DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor + def Call( + inst: BentoService_Stub, + rpc_controller: google.protobuf.service.RpcController, + request: global___Request, + callback: collections.abc.Callable[[global___Response], None] | None = ..., + ) -> concurrent.futures.Future[global___Response]: + """Call handles methodcaller of given API entrypoint.""" + def ServiceMetadata( + inst: BentoService_Stub, + rpc_controller: google.protobuf.service.RpcController, + request: global___ServiceMetadataRequest, + callback: collections.abc.Callable[[global___ServiceMetadataResponse], None] | None = ..., + ) -> concurrent.futures.Future[global___ServiceMetadataResponse]: + """ServiceMetadata returns metadata of bentoml.Service.""" diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.py b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.py new file mode 100644 index 00000000..cff4c957 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.py @@ -0,0 +1,104 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +import service_pb2 as service__pb2 + + +class BentoServiceStub(object): + """a gRPC BentoServer. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.Call = channel.unary_unary( + '/bentoml.grpc.v1.BentoService/Call', + request_serializer=service__pb2.Request.SerializeToString, + response_deserializer=service__pb2.Response.FromString, + ) + self.ServiceMetadata = channel.unary_unary( + '/bentoml.grpc.v1.BentoService/ServiceMetadata', + request_serializer=service__pb2.ServiceMetadataRequest.SerializeToString, + response_deserializer=service__pb2.ServiceMetadataResponse.FromString, + ) + + +class BentoServiceServicer(object): + """a gRPC BentoServer. + """ + + def Call(self, request, context): + """Call handles methodcaller of given API entrypoint. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ServiceMetadata(self, request, context): + """ServiceMetadata returns metadata of bentoml.Service. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_BentoServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'Call': grpc.unary_unary_rpc_method_handler( + servicer.Call, + request_deserializer=service__pb2.Request.FromString, + response_serializer=service__pb2.Response.SerializeToString, + ), + 'ServiceMetadata': grpc.unary_unary_rpc_method_handler( + servicer.ServiceMetadata, + request_deserializer=service__pb2.ServiceMetadataRequest.FromString, + response_serializer=service__pb2.ServiceMetadataResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'bentoml.grpc.v1.BentoService', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class BentoService(object): + """a gRPC BentoServer. + """ + + @staticmethod + def Call(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/bentoml.grpc.v1.BentoService/Call', + service__pb2.Request.SerializeToString, + service__pb2.Response.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def ServiceMetadata(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/bentoml.grpc.v1.BentoService/ServiceMetadata', + service__pb2.ServiceMetadataRequest.SerializeToString, + service__pb2.ServiceMetadataResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.pyi b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.pyi new file mode 100644 index 00000000..423ed190 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb3/service_pb2_grpc.pyi @@ -0,0 +1,42 @@ +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file +Vendorred from: https://github.com/bentoml/BentoML/blob/main/src/bentoml/grpc/v1/service.proto""" +import abc +import grpc +import service_pb2 + +class BentoServiceStub: + """a gRPC BentoServer.""" + + def __init__(self, channel: grpc.Channel) -> None: ... + Call: grpc.UnaryUnaryMultiCallable[ + service_pb2.Request, + service_pb2.Response, + ] + """Call handles methodcaller of given API entrypoint.""" + ServiceMetadata: grpc.UnaryUnaryMultiCallable[ + service_pb2.ServiceMetadataRequest, + service_pb2.ServiceMetadataResponse, + ] + """ServiceMetadata returns metadata of bentoml.Service.""" + +class BentoServiceServicer(metaclass=abc.ABCMeta): + """a gRPC BentoServer.""" + + @abc.abstractmethod + def Call( + self, + request: service_pb2.Request, + context: grpc.ServicerContext, + ) -> service_pb2.Response: + """Call handles methodcaller of given API entrypoint.""" + @abc.abstractmethod + def ServiceMetadata( + self, + request: service_pb2.ServiceMetadataRequest, + context: grpc.ServicerContext, + ) -> service_pb2.ServiceMetadataResponse: + """ServiceMetadata returns metadata of bentoml.Service.""" + +def add_BentoServiceServicer_to_server(servicer: BentoServiceServicer, server: grpc.Server) -> None: ... diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb4/__init__.py b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.py b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.py new file mode 100644 index 00000000..21fd2d2f --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: service.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 +from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rservice.proto\x12\x0f\x62\x65ntoml.grpc.v1\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\x18\n\x16ServiceMetadataRequest\"\xde\x03\n\x17ServiceMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x43\n\x04\x61pis\x18\x02 \x03(\x0b\x32\x35.bentoml.grpc.v1.ServiceMetadataResponse.InferenceAPI\x12\x0c\n\x04\x64ocs\x18\x03 \x01(\t\x1ao\n\x12\x44\x65scriptorMetadata\x12\x1a\n\rdescriptor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12+\n\nattributes\x18\x02 \x01(\x0b\x32\x17.google.protobuf.StructB\x10\n\x0e_descriptor_id\x1a\xf0\x01\n\x0cInferenceAPI\x12\x0c\n\x04name\x18\x01 \x01(\t\x12O\n\x05input\x18\x02 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x00\x88\x01\x01\x12P\n\x06output\x18\x03 \x01(\x0b\x32;.bentoml.grpc.v1.ServiceMetadataResponse.DescriptorMetadataH\x01\x88\x01\x01\x12\x11\n\x04\x64ocs\x18\x04 \x01(\tH\x02\x88\x01\x01\x42\x08\n\x06_inputB\t\n\x07_outputB\x07\n\x05_docs\"\x85\x03\n\x07Request\x12\x10\n\x08\x61pi_name\x18\x01 \x01(\t\x12+\n\x07ndarray\x18\x03 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x05 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x06 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x07 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\t \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\n \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\x0b\x10\x0e\"\xf4\x02\n\x08Response\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12/\n\tmultipart\x18\t \x01(\x0b\x32\x1a.bentoml.grpc.v1.MultipartH\x00\x12\x1a\n\x10serialized_bytes\x18\x02 \x01(\x0cH\x00\x42\t\n\x07\x63ontentJ\x04\x08\x04\x10\x05J\x04\x08\n\x10\x0e\"\xc6\x02\n\x04Part\x12+\n\x07ndarray\x18\x01 \x01(\x0b\x32\x18.bentoml.grpc.v1.NDArrayH\x00\x12/\n\tdataframe\x18\x03 \x01(\x0b\x32\x1a.bentoml.grpc.v1.DataFrameH\x00\x12)\n\x06series\x18\x05 \x01(\x0b\x32\x17.bentoml.grpc.v1.SeriesH\x00\x12%\n\x04\x66ile\x18\x06 \x01(\x0b\x32\x15.bentoml.grpc.v1.FileH\x00\x12,\n\x04text\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.StringValueH\x00\x12&\n\x04json\x18\x08 \x01(\x0b\x32\x16.google.protobuf.ValueH\x00\x12\x1a\n\x10serialized_bytes\x18\x04 \x01(\x0cH\x00\x42\x10\n\x0erepresentationJ\x04\x08\x02\x10\x03J\x04\x08\t\x10\x0e\"\x89\x01\n\tMultipart\x12\x36\n\x06\x66ields\x18\x01 \x03(\x0b\x32&.bentoml.grpc.v1.Multipart.FieldsEntry\x1a\x44\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.bentoml.grpc.v1.Part:\x02\x38\x01\"3\n\x04\x46ile\x12\x11\n\x04kind\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\x42\x07\n\x05_kind\"K\n\tDataFrame\x12\x14\n\x0c\x63olumn_names\x18\x01 \x03(\t\x12(\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x17.bentoml.grpc.v1.Series\"\xa1\x01\n\x06Series\x12\x17\n\x0b\x62ool_values\x18\x01 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x66loat_values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x03 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x06 \x03(\x03\x42\x02\x10\x01\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\"\xc2\x03\n\x07NDArray\x12-\n\x05\x64type\x18\x01 \x01(\x0e\x32\x1e.bentoml.grpc.v1.NDArray.DType\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\x15\n\rstring_values\x18\x05 \x03(\t\x12\x18\n\x0c\x66loat_values\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x19\n\rdouble_values\x18\x04 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x62ool_values\x18\x06 \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0cint32_values\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x18\n\x0cint64_values\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x19\n\ruint32_values\x18\t \x03(\rB\x02\x10\x01\x12\x19\n\ruint64_values\x18\n \x03(\x04\x42\x02\x10\x01\"\xa9\x01\n\x05\x44Type\x12\x15\n\x11\x44TYPE_UNSPECIFIED\x10\x00\x12\x0f\n\x0b\x44TYPE_FLOAT\x10\x01\x12\x10\n\x0c\x44TYPE_DOUBLE\x10\x02\x12\x0e\n\nDTYPE_BOOL\x10\x03\x12\x0f\n\x0b\x44TYPE_INT32\x10\x04\x12\x0f\n\x0b\x44TYPE_INT64\x10\x05\x12\x10\n\x0c\x44TYPE_UINT32\x10\x06\x12\x10\n\x0c\x44TYPE_UINT64\x10\x07\x12\x10\n\x0c\x44TYPE_STRING\x10\x08\x32\xb5\x01\n\x0c\x42\x65ntoService\x12=\n\x04\x43\x61ll\x12\x18.bentoml.grpc.v1.Request\x1a\x19.bentoml.grpc.v1.Response\"\x00\x12\x66\n\x0fServiceMetadata\x12\'.bentoml.grpc.v1.ServiceMetadataRequest\x1a(.bentoml.grpc.v1.ServiceMetadataResponse\"\x00\x42]\n\x13\x63om.bentoml.grpc.v1B\x0cServiceProtoP\x01Z*github.com/bentoml/bentoml/grpc/v1;service\x90\x01\x01\xf8\x01\x01\xa2\x02\x03SVCb\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'service_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\023com.bentoml.grpc.v1B\014ServiceProtoP\001Z*github.com/bentoml/bentoml/grpc/v1;service\220\001\001\370\001\001\242\002\003SVC' + _MULTIPART_FIELDSENTRY._options = None + _MULTIPART_FIELDSENTRY._serialized_options = b'8\001' + _SERIES.fields_by_name['bool_values']._options = None + _SERIES.fields_by_name['bool_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['float_values']._options = None + _SERIES.fields_by_name['float_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['int32_values']._options = None + _SERIES.fields_by_name['int32_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['int64_values']._options = None + _SERIES.fields_by_name['int64_values']._serialized_options = b'\020\001' + _SERIES.fields_by_name['double_values']._options = None + _SERIES.fields_by_name['double_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['float_values']._options = None + _NDARRAY.fields_by_name['float_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['double_values']._options = None + _NDARRAY.fields_by_name['double_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['bool_values']._options = None + _NDARRAY.fields_by_name['bool_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['int32_values']._options = None + _NDARRAY.fields_by_name['int32_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['int64_values']._options = None + _NDARRAY.fields_by_name['int64_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['uint32_values']._options = None + _NDARRAY.fields_by_name['uint32_values']._serialized_options = b'\020\001' + _NDARRAY.fields_by_name['uint64_values']._options = None + _NDARRAY.fields_by_name['uint64_values']._serialized_options = b'\020\001' + _globals['_SERVICEMETADATAREQUEST']._serialized_start=96 + _globals['_SERVICEMETADATAREQUEST']._serialized_end=120 + _globals['_SERVICEMETADATARESPONSE']._serialized_start=123 + _globals['_SERVICEMETADATARESPONSE']._serialized_end=601 + _globals['_SERVICEMETADATARESPONSE_DESCRIPTORMETADATA']._serialized_start=247 + _globals['_SERVICEMETADATARESPONSE_DESCRIPTORMETADATA']._serialized_end=358 + _globals['_SERVICEMETADATARESPONSE_INFERENCEAPI']._serialized_start=361 + _globals['_SERVICEMETADATARESPONSE_INFERENCEAPI']._serialized_end=601 + _globals['_REQUEST']._serialized_start=604 + _globals['_REQUEST']._serialized_end=993 + _globals['_RESPONSE']._serialized_start=996 + _globals['_RESPONSE']._serialized_end=1368 + _globals['_PART']._serialized_start=1371 + _globals['_PART']._serialized_end=1697 + _globals['_MULTIPART']._serialized_start=1700 + _globals['_MULTIPART']._serialized_end=1837 + _globals['_MULTIPART_FIELDSENTRY']._serialized_start=1769 + _globals['_MULTIPART_FIELDSENTRY']._serialized_end=1837 + _globals['_FILE']._serialized_start=1839 + _globals['_FILE']._serialized_end=1890 + _globals['_DATAFRAME']._serialized_start=1892 + _globals['_DATAFRAME']._serialized_end=1967 + _globals['_SERIES']._serialized_start=1970 + _globals['_SERIES']._serialized_end=2131 + _globals['_NDARRAY']._serialized_start=2134 + _globals['_NDARRAY']._serialized_end=2584 + _globals['_NDARRAY_DTYPE']._serialized_start=2415 + _globals['_NDARRAY_DTYPE']._serialized_end=2584 + _globals['_BENTOSERVICE']._serialized_start=2587 + _globals['_BENTOSERVICE']._serialized_end=2768 +_builder.BuildServices(DESCRIPTOR, 'service_pb2', _globals) +# @@protoc_insertion_point(module_scope) diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.pyi b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.pyi new file mode 100644 index 00000000..3290dc12 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2.pyi @@ -0,0 +1,624 @@ +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file +Vendorred from: https://github.com/bentoml/BentoML/blob/main/src/bentoml/grpc/v1/service.proto""" +import abc +import builtins +import collections.abc +import concurrent.futures +import google.protobuf.descriptor +import google.protobuf.internal.containers +import google.protobuf.internal.enum_type_wrapper +import google.protobuf.message +import google.protobuf.service +import google.protobuf.struct_pb2 +import google.protobuf.wrappers_pb2 +import sys +import typing + +if sys.version_info >= (3, 10): + import typing as typing_extensions +else: + import typing_extensions + +DESCRIPTOR: google.protobuf.descriptor.FileDescriptor + +@typing_extensions.final +class ServiceMetadataRequest(google.protobuf.message.Message): + """ServiceMetadataRequest message doesn't take any arguments.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ServiceMetadataRequest = ServiceMetadataRequest + +@typing_extensions.final +class ServiceMetadataResponse(google.protobuf.message.Message): + """ServiceMetadataResponse returns metadata of bentoml.Service. + Currently it includes name, version, apis, and docs. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + @typing_extensions.final + class DescriptorMetadata(google.protobuf.message.Message): + """DescriptorMetadata is a metadata of any given IODescriptor.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + DESCRIPTOR_ID_FIELD_NUMBER: builtins.int + ATTRIBUTES_FIELD_NUMBER: builtins.int + descriptor_id: builtins.str + """descriptor_id describes the given ID of the descriptor, which matches with our OpenAPI definition.""" + @property + def attributes(self) -> google.protobuf.struct_pb2.Struct: + """attributes is the kwargs of the given descriptor.""" + def __init__( + self, + *, + descriptor_id: builtins.str | None = ..., + attributes: google.protobuf.struct_pb2.Struct | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_descriptor_id", b"_descriptor_id", "attributes", b"attributes", "descriptor_id", b"descriptor_id"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_descriptor_id", b"_descriptor_id", "attributes", b"attributes", "descriptor_id", b"descriptor_id"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_descriptor_id", b"_descriptor_id"]) -> typing_extensions.Literal["descriptor_id"] | None: ... + + @typing_extensions.final + class InferenceAPI(google.protobuf.message.Message): + """InferenceAPI is bentoml._internal.service.inferece_api.InferenceAPI + that is exposed to gRPC client. + There is no way for reflection to get information of given @svc.api. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NAME_FIELD_NUMBER: builtins.int + INPUT_FIELD_NUMBER: builtins.int + OUTPUT_FIELD_NUMBER: builtins.int + DOCS_FIELD_NUMBER: builtins.int + name: builtins.str + """name is the name of the API.""" + @property + def input(self) -> global___ServiceMetadataResponse.DescriptorMetadata: + """input is the input descriptor of the API.""" + @property + def output(self) -> global___ServiceMetadataResponse.DescriptorMetadata: + """output is the output descriptor of the API.""" + docs: builtins.str + """docs is the optional documentation of the API.""" + def __init__( + self, + *, + name: builtins.str = ..., + input: global___ServiceMetadataResponse.DescriptorMetadata | None = ..., + output: global___ServiceMetadataResponse.DescriptorMetadata | None = ..., + docs: builtins.str | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "output", b"output"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_docs", b"_docs", "_input", b"_input", "_output", b"_output", "docs", b"docs", "input", b"input", "name", b"name", "output", b"output"]) -> None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_docs", b"_docs"]) -> typing_extensions.Literal["docs"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_input", b"_input"]) -> typing_extensions.Literal["input"] | None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_output", b"_output"]) -> typing_extensions.Literal["output"] | None: ... + + NAME_FIELD_NUMBER: builtins.int + APIS_FIELD_NUMBER: builtins.int + DOCS_FIELD_NUMBER: builtins.int + name: builtins.str + """name is the service name.""" + @property + def apis(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___ServiceMetadataResponse.InferenceAPI]: + """apis holds a list of InferenceAPI of the service.""" + docs: builtins.str + """docs is the documentation of the service.""" + def __init__( + self, + *, + name: builtins.str = ..., + apis: collections.abc.Iterable[global___ServiceMetadataResponse.InferenceAPI] | None = ..., + docs: builtins.str = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["apis", b"apis", "docs", b"docs", "name", b"name"]) -> None: ... + +global___ServiceMetadataResponse = ServiceMetadataResponse + +@typing_extensions.final +class Request(google.protobuf.message.Message): + """Request message for incoming Call.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + API_NAME_FIELD_NUMBER: builtins.int + NDARRAY_FIELD_NUMBER: builtins.int + DATAFRAME_FIELD_NUMBER: builtins.int + SERIES_FIELD_NUMBER: builtins.int + FILE_FIELD_NUMBER: builtins.int + TEXT_FIELD_NUMBER: builtins.int + JSON_FIELD_NUMBER: builtins.int + MULTIPART_FIELD_NUMBER: builtins.int + SERIALIZED_BYTES_FIELD_NUMBER: builtins.int + api_name: builtins.str + """api_name defines the API entrypoint to call. + api_name is the name of the function defined in bentoml.Service. + Example: + + @svc.api(input=NumpyNdarray(), output=File()) + def predict(input: NDArray[float]) -> bytes: + ... + + api_name is "predict" in this case. + """ + @property + def ndarray(self) -> global___NDArray: + """NDArray represents a n-dimensional array of arbitrary type.""" + @property + def dataframe(self) -> global___DataFrame: + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + """ + @property + def series(self) -> global___Series: + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + @property + def file(self) -> global___File: + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + @property + def text(self) -> google.protobuf.wrappers_pb2.StringValue: + """Text represents a string inputs.""" + @property + def json(self) -> google.protobuf.struct_pb2.Value: + """JSON is represented by using google.protobuf.Value. + see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + """ + @property + def multipart(self) -> global___Multipart: + """Multipart represents a multipart message. + It comprises of a mapping from given type name to a subset of aforementioned types. + """ + serialized_bytes: builtins.bytes + """serialized_bytes is for data serialized in BentoML's internal serialization format.""" + def __init__( + self, + *, + api_name: builtins.str = ..., + ndarray: global___NDArray | None = ..., + dataframe: global___DataFrame | None = ..., + series: global___Series | None = ..., + file: global___File | None = ..., + text: google.protobuf.wrappers_pb2.StringValue | None = ..., + json: google.protobuf.struct_pb2.Value | None = ..., + multipart: global___Multipart | None = ..., + serialized_bytes: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["api_name", b"api_name", "content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["content", b"content"]) -> typing_extensions.Literal["ndarray", "dataframe", "series", "file", "text", "json", "multipart", "serialized_bytes"] | None: ... + +global___Request = Request + +@typing_extensions.final +class Response(google.protobuf.message.Message): + """Request message for incoming Call.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NDARRAY_FIELD_NUMBER: builtins.int + DATAFRAME_FIELD_NUMBER: builtins.int + SERIES_FIELD_NUMBER: builtins.int + FILE_FIELD_NUMBER: builtins.int + TEXT_FIELD_NUMBER: builtins.int + JSON_FIELD_NUMBER: builtins.int + MULTIPART_FIELD_NUMBER: builtins.int + SERIALIZED_BYTES_FIELD_NUMBER: builtins.int + @property + def ndarray(self) -> global___NDArray: + """NDArray represents a n-dimensional array of arbitrary type.""" + @property + def dataframe(self) -> global___DataFrame: + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + """ + @property + def series(self) -> global___Series: + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + @property + def file(self) -> global___File: + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + @property + def text(self) -> google.protobuf.wrappers_pb2.StringValue: + """Text represents a string inputs.""" + @property + def json(self) -> google.protobuf.struct_pb2.Value: + """JSON is represented by using google.protobuf.Value. + see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + """ + @property + def multipart(self) -> global___Multipart: + """Multipart represents a multipart message. + It comprises of a mapping from given type name to a subset of aforementioned types. + """ + serialized_bytes: builtins.bytes + """serialized_bytes is for data serialized in BentoML's internal serialization format.""" + def __init__( + self, + *, + ndarray: global___NDArray | None = ..., + dataframe: global___DataFrame | None = ..., + series: global___Series | None = ..., + file: global___File | None = ..., + text: google.protobuf.wrappers_pb2.StringValue | None = ..., + json: google.protobuf.struct_pb2.Value | None = ..., + multipart: global___Multipart | None = ..., + serialized_bytes: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["content", b"content", "dataframe", b"dataframe", "file", b"file", "json", b"json", "multipart", b"multipart", "ndarray", b"ndarray", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["content", b"content"]) -> typing_extensions.Literal["ndarray", "dataframe", "series", "file", "text", "json", "multipart", "serialized_bytes"] | None: ... + +global___Response = Response + +@typing_extensions.final +class Part(google.protobuf.message.Message): + """Part represents possible value types for multipart message. + These are the same as the types in Request message. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NDARRAY_FIELD_NUMBER: builtins.int + DATAFRAME_FIELD_NUMBER: builtins.int + SERIES_FIELD_NUMBER: builtins.int + FILE_FIELD_NUMBER: builtins.int + TEXT_FIELD_NUMBER: builtins.int + JSON_FIELD_NUMBER: builtins.int + SERIALIZED_BYTES_FIELD_NUMBER: builtins.int + @property + def ndarray(self) -> global___NDArray: + """NDArray represents a n-dimensional array of arbitrary type.""" + @property + def dataframe(self) -> global___DataFrame: + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + """ + @property + def series(self) -> global___Series: + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + @property + def file(self) -> global___File: + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + @property + def text(self) -> google.protobuf.wrappers_pb2.StringValue: + """Text represents a string inputs.""" + @property + def json(self) -> google.protobuf.struct_pb2.Value: + """JSON is represented by using google.protobuf.Value. + see https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/struct.proto + """ + serialized_bytes: builtins.bytes + """serialized_bytes is for data serialized in BentoML's internal serialization format.""" + def __init__( + self, + *, + ndarray: global___NDArray | None = ..., + dataframe: global___DataFrame | None = ..., + series: global___Series | None = ..., + file: global___File | None = ..., + text: google.protobuf.wrappers_pb2.StringValue | None = ..., + json: google.protobuf.struct_pb2.Value | None = ..., + serialized_bytes: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["dataframe", b"dataframe", "file", b"file", "json", b"json", "ndarray", b"ndarray", "representation", b"representation", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["dataframe", b"dataframe", "file", b"file", "json", b"json", "ndarray", b"ndarray", "representation", b"representation", "serialized_bytes", b"serialized_bytes", "series", b"series", "text", b"text"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["representation", b"representation"]) -> typing_extensions.Literal["ndarray", "dataframe", "series", "file", "text", "json", "serialized_bytes"] | None: ... + +global___Part = Part + +@typing_extensions.final +class Multipart(google.protobuf.message.Message): + """Multipart represents a multipart message. + It comprises of a mapping from given type name to a subset of aforementioned types. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + @typing_extensions.final + class FieldsEntry(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + KEY_FIELD_NUMBER: builtins.int + VALUE_FIELD_NUMBER: builtins.int + key: builtins.str + @property + def value(self) -> global___Part: ... + def __init__( + self, + *, + key: builtins.str = ..., + value: global___Part | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["value", b"value"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]) -> None: ... + + FIELDS_FIELD_NUMBER: builtins.int + @property + def fields(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, global___Part]: ... + def __init__( + self, + *, + fields: collections.abc.Mapping[builtins.str, global___Part] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["fields", b"fields"]) -> None: ... + +global___Multipart = Multipart + +@typing_extensions.final +class File(google.protobuf.message.Message): + """File represents for any arbitrary file type. This can be + plaintext, image, video, audio, etc. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + KIND_FIELD_NUMBER: builtins.int + CONTENT_FIELD_NUMBER: builtins.int + kind: builtins.str + """optional file type, let it be csv, text, parquet, etc. + v1alpha1 uses 1 as FileType enum. + """ + content: builtins.bytes + """contents of file as bytes.""" + def __init__( + self, + *, + kind: builtins.str | None = ..., + content: builtins.bytes = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_kind", b"_kind", "kind", b"kind"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_kind", b"_kind", "content", b"content", "kind", b"kind"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_kind", b"_kind"]) -> typing_extensions.Literal["kind"] | None: ... + +global___File = File + +@typing_extensions.final +class DataFrame(google.protobuf.message.Message): + """DataFrame represents any tabular data type. We are using + DataFrame as a trivial representation for tabular type. + This message carries given implementation of tabular data based on given orientation. + TODO: support index, records, etc. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + COLUMN_NAMES_FIELD_NUMBER: builtins.int + COLUMNS_FIELD_NUMBER: builtins.int + @property + def column_names(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """columns name""" + @property + def columns(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Series]: + """columns orient. + { column ↠ { index ↠ value } } + """ + def __init__( + self, + *, + column_names: collections.abc.Iterable[builtins.str] | None = ..., + columns: collections.abc.Iterable[global___Series] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["column_names", b"column_names", "columns", b"columns"]) -> None: ... + +global___DataFrame = DataFrame + +@typing_extensions.final +class Series(google.protobuf.message.Message): + """Series portrays a series of values. This can be used for + representing Series types in tabular data. + """ + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + BOOL_VALUES_FIELD_NUMBER: builtins.int + FLOAT_VALUES_FIELD_NUMBER: builtins.int + INT32_VALUES_FIELD_NUMBER: builtins.int + INT64_VALUES_FIELD_NUMBER: builtins.int + STRING_VALUES_FIELD_NUMBER: builtins.int + DOUBLE_VALUES_FIELD_NUMBER: builtins.int + @property + def bool_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bool]: + """A bool parameter value""" + @property + def float_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """A float parameter value""" + @property + def int32_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """A int32 parameter value""" + @property + def int64_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """A int64 parameter value""" + @property + def string_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """A string parameter value""" + @property + def double_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """represents a double parameter value.""" + def __init__( + self, + *, + bool_values: collections.abc.Iterable[builtins.bool] | None = ..., + float_values: collections.abc.Iterable[builtins.float] | None = ..., + int32_values: collections.abc.Iterable[builtins.int] | None = ..., + int64_values: collections.abc.Iterable[builtins.int] | None = ..., + string_values: collections.abc.Iterable[builtins.str] | None = ..., + double_values: collections.abc.Iterable[builtins.float] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_values", b"bool_values", "double_values", b"double_values", "float_values", b"float_values", "int32_values", b"int32_values", "int64_values", b"int64_values", "string_values", b"string_values"]) -> None: ... + +global___Series = Series + +@typing_extensions.final +class NDArray(google.protobuf.message.Message): + """NDArray represents a n-dimensional array of arbitrary type.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + class _DType: + ValueType = typing.NewType("ValueType", builtins.int) + V: typing_extensions.TypeAlias = ValueType + + class _DTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[NDArray._DType.ValueType], builtins.type): + DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor + DTYPE_UNSPECIFIED: NDArray._DType.ValueType # 0 + """Represents a None type.""" + DTYPE_FLOAT: NDArray._DType.ValueType # 1 + """Represents an float type.""" + DTYPE_DOUBLE: NDArray._DType.ValueType # 2 + """Represents an double type.""" + DTYPE_BOOL: NDArray._DType.ValueType # 3 + """Represents a bool type.""" + DTYPE_INT32: NDArray._DType.ValueType # 4 + """Represents an int32 type.""" + DTYPE_INT64: NDArray._DType.ValueType # 5 + """Represents an int64 type.""" + DTYPE_UINT32: NDArray._DType.ValueType # 6 + """Represents a uint32 type.""" + DTYPE_UINT64: NDArray._DType.ValueType # 7 + """Represents a uint64 type.""" + DTYPE_STRING: NDArray._DType.ValueType # 8 + """Represents a string type.""" + + class DType(_DType, metaclass=_DTypeEnumTypeWrapper): + """Represents data type of a given array.""" + + DTYPE_UNSPECIFIED: NDArray.DType.ValueType # 0 + """Represents a None type.""" + DTYPE_FLOAT: NDArray.DType.ValueType # 1 + """Represents an float type.""" + DTYPE_DOUBLE: NDArray.DType.ValueType # 2 + """Represents an double type.""" + DTYPE_BOOL: NDArray.DType.ValueType # 3 + """Represents a bool type.""" + DTYPE_INT32: NDArray.DType.ValueType # 4 + """Represents an int32 type.""" + DTYPE_INT64: NDArray.DType.ValueType # 5 + """Represents an int64 type.""" + DTYPE_UINT32: NDArray.DType.ValueType # 6 + """Represents a uint32 type.""" + DTYPE_UINT64: NDArray.DType.ValueType # 7 + """Represents a uint64 type.""" + DTYPE_STRING: NDArray.DType.ValueType # 8 + """Represents a string type.""" + + DTYPE_FIELD_NUMBER: builtins.int + SHAPE_FIELD_NUMBER: builtins.int + STRING_VALUES_FIELD_NUMBER: builtins.int + FLOAT_VALUES_FIELD_NUMBER: builtins.int + DOUBLE_VALUES_FIELD_NUMBER: builtins.int + BOOL_VALUES_FIELD_NUMBER: builtins.int + INT32_VALUES_FIELD_NUMBER: builtins.int + INT64_VALUES_FIELD_NUMBER: builtins.int + UINT32_VALUES_FIELD_NUMBER: builtins.int + UINT64_VALUES_FIELD_NUMBER: builtins.int + dtype: global___NDArray.DType.ValueType + """DTYPE is the data type of given array""" + @property + def shape(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """shape is the shape of given array.""" + @property + def string_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """represents a string parameter value.""" + @property + def float_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """represents a float parameter value.""" + @property + def double_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: + """represents a double parameter value.""" + @property + def bool_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bool]: + """represents a bool parameter value.""" + @property + def int32_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a int32 parameter value.""" + @property + def int64_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a int64 parameter value.""" + @property + def uint32_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a uint32 parameter value.""" + @property + def uint64_values(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: + """represents a uint64 parameter value.""" + def __init__( + self, + *, + dtype: global___NDArray.DType.ValueType = ..., + shape: collections.abc.Iterable[builtins.int] | None = ..., + string_values: collections.abc.Iterable[builtins.str] | None = ..., + float_values: collections.abc.Iterable[builtins.float] | None = ..., + double_values: collections.abc.Iterable[builtins.float] | None = ..., + bool_values: collections.abc.Iterable[builtins.bool] | None = ..., + int32_values: collections.abc.Iterable[builtins.int] | None = ..., + int64_values: collections.abc.Iterable[builtins.int] | None = ..., + uint32_values: collections.abc.Iterable[builtins.int] | None = ..., + uint64_values: collections.abc.Iterable[builtins.int] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_values", b"bool_values", "double_values", b"double_values", "dtype", b"dtype", "float_values", b"float_values", "int32_values", b"int32_values", "int64_values", b"int64_values", "shape", b"shape", "string_values", b"string_values", "uint32_values", b"uint32_values", "uint64_values", b"uint64_values"]) -> None: ... + +global___NDArray = NDArray + +class BentoService(google.protobuf.service.Service, metaclass=abc.ABCMeta): + """a gRPC BentoServer.""" + + DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor + @abc.abstractmethod + def Call( + inst: BentoService, # pyright: ignore[reportSelfClsParameterName] + rpc_controller: google.protobuf.service.RpcController, + request: global___Request, + callback: collections.abc.Callable[[global___Response], None] | None, + ) -> concurrent.futures.Future[global___Response]: + """Call handles methodcaller of given API entrypoint.""" + @abc.abstractmethod + def ServiceMetadata( + inst: BentoService, # pyright: ignore[reportSelfClsParameterName] + rpc_controller: google.protobuf.service.RpcController, + request: global___ServiceMetadataRequest, + callback: collections.abc.Callable[[global___ServiceMetadataResponse], None] | None, + ) -> concurrent.futures.Future[global___ServiceMetadataResponse]: + """ServiceMetadata returns metadata of bentoml.Service.""" + +class BentoService_Stub(BentoService): + """a gRPC BentoServer.""" + + def __init__(self, rpc_channel: google.protobuf.service.RpcChannel) -> None: ... + DESCRIPTOR: google.protobuf.descriptor.ServiceDescriptor + def Call( + inst: BentoService_Stub, # pyright: ignore[reportSelfClsParameterName] + rpc_controller: google.protobuf.service.RpcController, + request: global___Request, + callback: collections.abc.Callable[[global___Response], None] | None = ..., + ) -> concurrent.futures.Future[global___Response]: + """Call handles methodcaller of given API entrypoint.""" + def ServiceMetadata( + inst: BentoService_Stub, # pyright: ignore[reportSelfClsParameterName] + rpc_controller: google.protobuf.service.RpcController, + request: global___ServiceMetadataRequest, + callback: collections.abc.Callable[[global___ServiceMetadataResponse], None] | None = ..., + ) -> concurrent.futures.Future[global___ServiceMetadataResponse]: + """ServiceMetadata returns metadata of bentoml.Service.""" diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.py b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.py new file mode 100644 index 00000000..cff4c957 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.py @@ -0,0 +1,104 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +import service_pb2 as service__pb2 + + +class BentoServiceStub(object): + """a gRPC BentoServer. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.Call = channel.unary_unary( + '/bentoml.grpc.v1.BentoService/Call', + request_serializer=service__pb2.Request.SerializeToString, + response_deserializer=service__pb2.Response.FromString, + ) + self.ServiceMetadata = channel.unary_unary( + '/bentoml.grpc.v1.BentoService/ServiceMetadata', + request_serializer=service__pb2.ServiceMetadataRequest.SerializeToString, + response_deserializer=service__pb2.ServiceMetadataResponse.FromString, + ) + + +class BentoServiceServicer(object): + """a gRPC BentoServer. + """ + + def Call(self, request, context): + """Call handles methodcaller of given API entrypoint. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ServiceMetadata(self, request, context): + """ServiceMetadata returns metadata of bentoml.Service. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_BentoServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'Call': grpc.unary_unary_rpc_method_handler( + servicer.Call, + request_deserializer=service__pb2.Request.FromString, + response_serializer=service__pb2.Response.SerializeToString, + ), + 'ServiceMetadata': grpc.unary_unary_rpc_method_handler( + servicer.ServiceMetadata, + request_deserializer=service__pb2.ServiceMetadataRequest.FromString, + response_serializer=service__pb2.ServiceMetadataResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'bentoml.grpc.v1.BentoService', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class BentoService(object): + """a gRPC BentoServer. + """ + + @staticmethod + def Call(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/bentoml.grpc.v1.BentoService/Call', + service__pb2.Request.SerializeToString, + service__pb2.Response.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def ServiceMetadata(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/bentoml.grpc.v1.BentoService/ServiceMetadata', + service__pb2.ServiceMetadataRequest.SerializeToString, + service__pb2.ServiceMetadataResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.pyi b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.pyi new file mode 100644 index 00000000..11f7b401 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/_generated_pb4/service_pb2_grpc.pyi @@ -0,0 +1,67 @@ +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file +Vendorred from: https://github.com/bentoml/BentoML/blob/main/src/bentoml/grpc/v1/service.proto""" +import abc +import collections.abc +import grpc +import grpc.aio +import service_pb2 +import typing + +_T = typing.TypeVar('_T') + +class _MaybeAsyncIterator(collections.abc.AsyncIterator[_T], collections.abc.Iterator[_T], metaclass=abc.ABCMeta): + ... + +class _ServicerContext(grpc.ServicerContext, grpc.aio.ServicerContext): # type: ignore + ... + +class BentoServiceStub: + """a gRPC BentoServer.""" + + def __init__(self, channel: typing.Union[grpc.Channel, grpc.aio.Channel]) -> None: ... + Call: grpc.UnaryUnaryMultiCallable[ + service_pb2.Request, + service_pb2.Response, + ] + """Call handles methodcaller of given API entrypoint.""" + ServiceMetadata: grpc.UnaryUnaryMultiCallable[ + service_pb2.ServiceMetadataRequest, + service_pb2.ServiceMetadataResponse, + ] + """ServiceMetadata returns metadata of bentoml.Service.""" + +class BentoServiceAsyncStub: + """a gRPC BentoServer.""" + + Call: grpc.aio.UnaryUnaryMultiCallable[ + service_pb2.Request, + service_pb2.Response, + ] + """Call handles methodcaller of given API entrypoint.""" + ServiceMetadata: grpc.aio.UnaryUnaryMultiCallable[ + service_pb2.ServiceMetadataRequest, + service_pb2.ServiceMetadataResponse, + ] + """ServiceMetadata returns metadata of bentoml.Service.""" + +class BentoServiceServicer(metaclass=abc.ABCMeta): + """a gRPC BentoServer.""" + + @abc.abstractmethod + def Call( + self, + request: service_pb2.Request, + context: _ServicerContext, + ) -> typing.Union[service_pb2.Response, collections.abc.Awaitable[service_pb2.Response]]: + """Call handles methodcaller of given API entrypoint.""" + @abc.abstractmethod + def ServiceMetadata( + self, + request: service_pb2.ServiceMetadataRequest, + context: _ServicerContext, + ) -> typing.Union[service_pb2.ServiceMetadataResponse, collections.abc.Awaitable[service_pb2.ServiceMetadataResponse]]: + """ServiceMetadata returns metadata of bentoml.Service.""" + +def add_BentoServiceServicer_to_server(servicer: BentoServiceServicer, server: typing.Union[grpc.Server, grpc.aio.Server]) -> None: ... diff --git a/openllm-client/src/openllm_client/pb/v1/service_pb2.py b/openllm-client/src/openllm_client/pb/v1/service_pb2.py new file mode 100644 index 00000000..61998b01 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/service_pb2.py @@ -0,0 +1,6 @@ +from __future__ import annotations +from google.protobuf import __version__ +if __version__.startswith("4"): + from ._generated_pb4.service_pb2 import * +else: + from ._generated_pb3.service_pb2 import * diff --git a/openllm-client/src/openllm_client/pb/v1/service_pb2.pyi b/openllm-client/src/openllm_client/pb/v1/service_pb2.pyi new file mode 100644 index 00000000..61998b01 --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/service_pb2.pyi @@ -0,0 +1,6 @@ +from __future__ import annotations +from google.protobuf import __version__ +if __version__.startswith("4"): + from ._generated_pb4.service_pb2 import * +else: + from ._generated_pb3.service_pb2 import * diff --git a/openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.py b/openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.py new file mode 100644 index 00000000..8b9707fd --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.py @@ -0,0 +1,6 @@ +from __future__ import annotations +from google.protobuf import __version__ +if __version__.startswith("4"): + from ._generated_pb4.service_pb2_grpc import * +else: + from ._generated_pb3.service_pb2_grpc import * diff --git a/openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.pyi b/openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.pyi new file mode 100644 index 00000000..8b9707fd --- /dev/null +++ b/openllm-client/src/openllm_client/pb/v1/service_pb2_grpc.pyi @@ -0,0 +1,6 @@ +from __future__ import annotations +from google.protobuf import __version__ +if __version__.startswith("4"): + from ._generated_pb4.service_pb2_grpc import * +else: + from ._generated_pb3.service_pb2_grpc import * diff --git a/openllm-core/src/openllm_core/_schema.py b/openllm-core/src/openllm_core/_schema.py index 424c30ea..418b3668 100644 --- a/openllm-core/src/openllm_core/_schema.py +++ b/openllm-core/src/openllm_core/_schema.py @@ -71,6 +71,8 @@ class MetadataOutput: configuration: str supports_embeddings: bool supports_hf_agent: bool + prompt_template: str + system_message: str @attr.frozen(slots=True) class EmbeddingsOutput: diff --git a/openllm-core/src/openllm_core/_typing_compat.py b/openllm-core/src/openllm_core/_typing_compat.py index 6c25e1f1..e20064fd 100644 --- a/openllm-core/src/openllm_core/_typing_compat.py +++ b/openllm-core/src/openllm_core/_typing_compat.py @@ -111,6 +111,8 @@ class LLMRunner(bentoml.Runner, t.Generic[M, T]): supports_embeddings: bool supports_hf_agent: bool has_adapters: bool + system_message: str | None + prompt_template: str | None embeddings: RunnerMethod[LLMRunnable[M, T], [list[str]], t.Sequence[EmbeddingsOutput]] generate: RunnerMethod[LLMRunnable[M, T], [str], list[t.Any]] generate_one: RunnerMethod[LLMRunnable[M, T], [str, list[str]], t.Sequence[dict[t.Literal['generated_text'], str]]] diff --git a/openllm-core/src/openllm_core/config/configuration_baichuan.py b/openllm-core/src/openllm_core/config/configuration_baichuan.py index ba3be4e8..06a36fd8 100644 --- a/openllm-core/src/openllm_core/config/configuration_baichuan.py +++ b/openllm-core/src/openllm_core/config/configuration_baichuan.py @@ -56,6 +56,14 @@ class BaichuanConfig(openllm_core.LLMConfig): top_p: float = 0.7 temperature: float = 0.95 + @property + def default_prompt_template(self) -> str: + return DEFAULT_PROMPT_TEMPLATE.to_string() + + @property + def default_system_message(self) -> str: + return DEFAULT_SYSTEM_MESSAGE + def sanitize_parameters(self, prompt: str, prompt_template: PromptTemplate | str | None = None, diff --git a/openllm-core/src/openllm_core/config/configuration_llama.py b/openllm-core/src/openllm_core/config/configuration_llama.py index 66c580dd..cd13bf21 100644 --- a/openllm-core/src/openllm_core/config/configuration_llama.py +++ b/openllm-core/src/openllm_core/config/configuration_llama.py @@ -69,7 +69,7 @@ class LlamaConfig(openllm_core.LLMConfig): Refer to [Llama's model card](https://huggingface.co/docs/transformers/main/model_doc/llama) for more information. """ - use_llama2_prompt: bool = dantic.Field(False, description='Whether to use the prompt format for Llama 2. Disable this when working with Llama 1.') + use_llama2_prompt: bool = dantic.Field(True, description='Whether to use the prompt format for Llama 2. Disable this when working with Llama 1.') __config__ = { 'name_type': 'lowercase', 'url': 'https://github.com/facebookresearch/llama', @@ -106,6 +106,14 @@ class LlamaConfig(openllm_core.LLMConfig): best_of: int = 1 presence_penalty: float = 0.5 + @property + def default_prompt_template(self, use_llama2_prompt: bool = True) -> str: + return DEFAULT_PROMPT_TEMPLATE('v2' if use_llama2_prompt else 'v1').to_string() + + @property + def default_system_message(self) -> str: + return DEFAULT_SYSTEM_MESSAGE + def sanitize_parameters(self, prompt: str, prompt_template: PromptTemplate | str | None = None, diff --git a/openllm-python/src/openllm/_llm.py b/openllm-python/src/openllm/_llm.py index 4e16f9c1..3f303c91 100644 --- a/openllm-python/src/openllm/_llm.py +++ b/openllm-python/src/openllm/_llm.py @@ -39,7 +39,6 @@ from openllm_core._typing_compat import T from openllm_core._typing_compat import TupleAny from openllm_core._typing_compat import overload from openllm_core.prompts import PromptTemplate -from openllm_core.prompts import process_prompt from openllm_core.utils import DEBUG from openllm_core.utils import MYPY from openllm_core.utils import EnvVarMixin @@ -620,7 +619,7 @@ class LLM(LLMInterface[M, T], ReprMixin): # set default tokenizer kwargs tokenizer_kwds.update({'padding_side': 'left', 'truncation_side': 'left'}) - # parsing tokenizer and model kwargs, as the hierachy is param pass > default + # parsing tokenizer and model kwargs, as the hierarchy is param pass > default normalized_model_kwds, normalized_tokenizer_kwds = normalize_attrs_to_model_tokenizer_pair(**attrs) # NOTE: Save the args and kwargs for latter load self.__attrs_init__(llm_config, quantization_config, _quantize, model_id, args, { @@ -1211,6 +1210,8 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate if adapter_name is not None: __self.set_adapter(adapter_name) request_id: str | None = attrs.pop('request_id', None) if request_id is None: raise ValueError('request_id must not be None.') + prompt, *_ = self.sanitize_parameters(prompt, **attrs) + if openllm_core.utils.DEBUG: logger.debug('Prompt:\n%s', prompt) if stop_token_ids is None: stop_token_ids = [] stop_token_ids.append(self.tokenizer.eos_token_id) @@ -1237,7 +1238,6 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate async def vllm_generate_iterator(__self: _Runnable, prompt: str, **attrs: t.Any) -> t.AsyncGenerator[str, None]: # TODO: System prompt support pre = 0 - prompt = process_prompt(prompt, None, False) echo = attrs.pop('echo', False) stop: str | t.Iterable[str] | None = attrs.pop('stop', None) stop_token_ids: list[int] | None = attrs.pop('stop_token_ids', None) @@ -1247,6 +1247,8 @@ def llm_runnable_class(self: LLM[M, T], embeddings_sig: ModelSignature, generate if adapter_name is not None: __self.set_adapter(adapter_name) request_id: str | None = attrs.pop('request_id', None) if request_id is None: raise ValueError('request_id must not be None.') + prompt, *_ = self.sanitize_parameters(prompt, **attrs) + if openllm_core.utils.DEBUG: logger.debug('Prompt:\n%s', prompt) if stop_token_ids is None: stop_token_ids = [] stop_token_ids.append(self.tokenizer.eos_token_id) @@ -1342,7 +1344,9 @@ def llm_runner_class(self: LLM[M, T]) -> type[LLMRunner[M, T]]: '__repr_args__': _wrapped_repr_args, 'supports_embeddings': self['supports_embeddings'], 'supports_hf_agent': self['supports_generate_one'], - 'has_adapters': self._adapters_mapping is not None + 'has_adapters': self._adapters_mapping is not None, + 'prompt_template': self._prompt_template.to_string() if self._prompt_template else self.config.default_prompt_template, + 'system_message': self._system_message if self._system_message else self.config.default_system_message, })) __all__ = ['LLMRunner', 'LLMRunnable', 'Runner', 'LLM', 'llm_runner_class', 'llm_runnable_class', 'EmbeddingsOutput'] diff --git a/openllm-python/src/openllm/_service.py b/openllm-python/src/openllm/_service.py index e5aeea27..47084aa9 100644 --- a/openllm-python/src/openllm/_service.py +++ b/openllm-python/src/openllm/_service.py @@ -32,13 +32,7 @@ model = svars.model model_id = svars.model_id adapter_map = svars.adapter_map llm_config = openllm.AutoConfig.for_model(model) -runner = openllm.Runner( - model, - llm_config=llm_config, - model_id=model_id, - ensure_available=False, - adapter_map=orjson.loads(adapter_map) -) +runner = openllm.Runner(model, llm_config=llm_config, model_id=model_id, ensure_available=False, adapter_map=orjson.loads(adapter_map)) generic_embedding_runner = bentoml.Runner(openllm.GenericEmbeddingRunnable, # XXX: remove arg-type once bentoml.Runner is correct set with type name='llm-generic-embedding', scheduling_strategy=openllm_core.CascadingResourceStrategy, @@ -189,9 +183,11 @@ async def chat_completion_v1(input_dict: dict[str, t.Any], ctx: bentoml.Context) 'timeout': 3600, 'model_name': llm_config['model_name'], 'backend': runner.backend, - 'configuration': '', + 'configuration': llm_config.model_dump(flatten=True), 'supports_embeddings': runner.supports_embeddings, - 'supports_hf_agent': runner.supports_hf_agent + 'supports_hf_agent': runner.supports_hf_agent, + 'prompt_template': runner.prompt_template, + 'system_message': runner.system_message, })) def metadata_v1(_: str) -> openllm.MetadataOutput: return openllm.MetadataOutput(timeout=llm_config['timeout'], @@ -200,7 +196,10 @@ def metadata_v1(_: str) -> openllm.MetadataOutput: model_id=runner.llm.model_id, configuration=llm_config.model_dump_json().decode(), supports_embeddings=runner.supports_embeddings, - supports_hf_agent=runner.supports_hf_agent) + supports_hf_agent=runner.supports_hf_agent, + prompt_template=runner.prompt_template, + system_message=runner.system_message, + ) @svc.api(route='/v1/embeddings', input=bentoml.io.JSON.from_sample(['Hey Jude, welcome to the jungle!', 'What is the meaning of life?']), diff --git a/openllm-python/src/openllm/cli/entrypoint.py b/openllm-python/src/openllm/cli/entrypoint.py index 2367e364..00b47bdd 100644 --- a/openllm-python/src/openllm/cli/entrypoint.py +++ b/openllm-python/src/openllm/cli/entrypoint.py @@ -27,7 +27,6 @@ import itertools import logging import os import platform -import re import subprocess import sys import time @@ -795,6 +794,7 @@ def instruct_command(endpoint: str, timeout: int, agent: LiteralString, output: --text "¡Este es un API muy agradable!" ``` ''' + raise click.ClickException("'instruct' is currently disabled") client = openllm.client.HTTPClient(endpoint, timeout=timeout) try: @@ -844,15 +844,16 @@ def embed_command( termui.echo(gen_embed.embeddings, fg='white') ctx.exit(0) @cli.command() -@shared_client_options +@shared_client_options(output_value='porcelain') @click.option('--server-type', type=click.Choice(['grpc', 'http']), help='Server type', default='http', show_default=True) +@click.option('--stream/--no-stream', type=click.BOOL, is_flag=True, default=True, help='Whether to stream the response.') @click.argument('prompt', type=click.STRING) @click.option( '--sampling-params', help='Define query options. (format: ``--opt temperature=0.8 --opt=top_k:12)', required=False, multiple=True, callback=opt_callback, metavar='ARG=VALUE[,ARG=VALUE]' ) @click.pass_context def query_command( - ctx: click.Context, /, prompt: str, endpoint: str, timeout: int, server_type: t.Literal['http', 'grpc'], output: LiteralOutput, _memoized: DictStrAny, **attrs: t.Any + ctx: click.Context, /, prompt: str, endpoint: str, timeout: int, stream: bool, server_type: t.Literal['http', 'grpc'], output: LiteralOutput, _memoized: DictStrAny, **attrs: t.Any ) -> None: '''Ask a LLM interactively, from a terminal. @@ -862,23 +863,30 @@ def query_command( ``` ''' _memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v} - if server_type == 'grpc': endpoint = re.sub(r'http://', '', endpoint) - client = openllm.client.HTTPClient(endpoint, timeout=timeout) if server_type == 'http' else openllm.client.GrpcClient(endpoint, timeout=timeout) + if server_type == 'grpc': raise click.ClickException("'grpc' is currently disabled.") + # TODO: grpc support + client = openllm.client.HTTPClient(address=endpoint, timeout=timeout) input_fg, generated_fg = 'magenta', 'cyan' if output != 'porcelain': termui.echo('==Input==\n', fg='white') termui.echo(f'{prompt}', fg=input_fg) - res = client.query(prompt, return_response='raw', **{**client.configuration, **_memoized}) + fn = client.generate_stream if stream else client.generate + res = fn(prompt, **{**client._config(), **_memoized}) if output == 'pretty': - response = client.config.postprocess_generate(prompt, res['responses']) - if isinstance(response, dict) and 'text' in response: response = response['text'] termui.echo('\n\n==Responses==\n', fg='white') - termui.echo(response, fg=generated_fg) + if stream: + for it in res: termui.echo(it.text, fg=generated_fg, nl=False) + else: termui.echo(res.responses[0], fg=generated_fg) elif output == 'json': - termui.echo(orjson.dumps(res, option=orjson.OPT_INDENT_2).decode(), fg='white') - else: - termui.echo(res['responses'], fg='white') + if stream: + for it in res: termui.echo(orjson.dumps(bentoml_cattr.unstructure(it), option=orjson.OPT_INDENT_2).decode(), fg='white') + else: termui.echo(orjson.dumps(bentoml_cattr.unstructure(res), option=orjson.OPT_INDENT_2).decode(), fg='white') + else: # noqa: PLR5501 + if stream: + for it in res: termui.echo(it.text, fg=generated_fg, nl=False) + else: termui.echo(res.responses, fg='white') ctx.exit(0) + @cli.group(cls=Extensions, hidden=True, name='extension') def extension_command() -> None: '''Extension for OpenLLM CLI.''' diff --git a/openllm-python/src/openllm/client.py b/openllm-python/src/openllm/client.py index 9c2553b3..6a90bd60 100644 --- a/openllm-python/src/openllm/client.py +++ b/openllm-python/src/openllm/client.py @@ -16,12 +16,10 @@ import typing as t import openllm_client if t.TYPE_CHECKING: - from openllm_client import AsyncGrpcClient as AsyncGrpcClient from openllm_client import AsyncHTTPClient as AsyncHTTPClient - from openllm_client import BaseAsyncClient as BaseAsyncClient - from openllm_client import BaseClient as BaseClient - from openllm_client import GrpcClient as GrpcClient from openllm_client import HTTPClient as HTTPClient + # from openllm_client import AsyncGrpcClient as AsyncGrpcClient + # from openllm_client import GrpcClient as GrpcClient def __dir__() -> t.Sequence[str]: return sorted(dir(openllm_client)) diff --git a/pyproject.toml b/pyproject.toml index b680d202..eafdc972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -156,6 +156,7 @@ whitelist-regex = ["test_.*"] toplevel = ["openllm"] [tool.ruff] +tab-size = 2 extend-exclude = [ "tools", "examples",