diff --git a/pyproject.toml b/pyproject.toml index 8f10f88a..60e2095f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,29 +1,8 @@ [build-system] -requires = ["hatchling"] build-backend = "hatchling.build" +requires = ["hatchling"] [project] -name = "openllm" -dynamic = ["version"] -description = 'OpenLLM: REST/gRPC API server for running any open Large-Language Model - StableLM, Llama, Alpaca, Dolly, Flan-T5, Custom' -readme = "README.md" -requires-python = ">=3.8" -license = "Apache-2.0" -keywords = [ - "MLOps", - "AI", - "BentoML", - "Model Serving", - "Model Deployment", - "LLMOps", - "Large Language Model", - "Generative AI", - "Stable Diffusion", - "StableLM", - "Alpaca", - "PyTorch", - "Transformers", -] authors = [ { name = "Aaron Pham", email = "aarnphm@bentoml.com" }, { name = "BentoML Team", email = "contact@bentoml.com" }, @@ -57,7 +36,7 @@ dependencies = [ "grpcio-reflection", "httpx[http2]", # transformers[torch] includes torch and transformers - "transformers[torch,accelerate,tokenizers,onnxruntime,onnx]>=4.29.0", + "transformers[torch,accelerate,tokenizers,onnxruntime,onnx,optimum]>=4.29.0", # Super fast JSON serialization "orjson", "inflection", @@ -66,18 +45,33 @@ dependencies = [ # black for generating service file. "black[jupyter]==23.3.0", ] +description = 'OpenLLM: REST/gRPC API server for running any open Large-Language Model - StableLM, Llama, Alpaca, Dolly, Flan-T5, Custom' +dynamic = ["version"] +keywords = [ + "MLOps", + "AI", + "BentoML", + "Model Serving", + "Model Deployment", + "LLMOps", + "Large Language Model", + "Generative AI", + "Stable Diffusion", + "StableLM", + "Alpaca", + "PyTorch", + "Transformers", +] +license = "Apache-2.0" +name = "openllm" +readme = "README.md" +requires-python = ">=3.8" [project.optional-dependencies] -all = [ - 'openllm[fine-tune]', - 'openllm[chatglm]', - 'openllm[falcon]', - 'openllm[flan-t5]', - 'openllm[starcoder]', -] -fine-tune = ["peft", "bitsandbytes", "datasets"] +all = ['openllm[fine-tune]', 'openllm[chatglm]', 'openllm[falcon]', 'openllm[flan-t5]', 'openllm[starcoder]'] chatglm = ['cpm_kernels', 'sentencepiece'] falcon = ['einops'] +fine-tune = ["peft", "bitsandbytes", "datasets"] flan-t5 = ['flax', 'jax', 'jaxlib', 'tensorflow'] starcoder = ['bitsandbytes'] @@ -108,29 +102,24 @@ dependencies = [ "pre-commit", ] [tool.hatch.envs.default.scripts] +cov = ["test-cov", "cov-report"] +cov-report = ["- coverage combine", "coverage report"] setup = "pre-commit install" test = "pytest {args:tests}" test-cov = "coverage run -m pytest {args:tests}" -cov-report = ["- coverage combine", "coverage report"] -cov = ["test-cov", "cov-report"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] [tool.hatch.envs.dev] -detached = true dependencies = ["ruff>=0.0.243", "pyright", "hatch"] +detached = true [tool.hatch.envs.dev.scripts] -typing = "pyright {args:src/openllm tests}" -style = ["ruff {args:.}", "black --check --diff {args:.}"] -fmt = [ - "black {args:.}", - "black --pyi {args:typings/}", - "ruff --fix {args:.}", - "style", -] all = ["fmt", "typing"] +fmt = ["black {args:.}", "black --pyi {args:typings/}", "ruff --fix {args:.}", "style"] +style = ["ruff {args:.}", "black --check --diff {args:.}"] +typing = "pyright {args:src/openllm tests}" [tool.pytest.ini_options] addopts = ["-rfEX", "-pno:warnings"] @@ -138,8 +127,6 @@ python_files = ["test_*.py", "*_test.py"] testpaths = ["tests"] [tool.black] -target-version = ["py311"] -line-length = 120 exclude = ''' ( /( @@ -158,10 +145,10 @@ exclude = ''' | src/openllm/__about__.py ) ''' +line-length = 120 +target-version = ["py311"] [tool.ruff] -target-version = "py311" -line-length = 120 ignore = [ # Allow non-abstract empty methods in abstract base classes "B027", @@ -178,6 +165,8 @@ ignore = [ "PLR0913", "PLR0915", ] +line-length = 120 +target-version = "py311" unfixable = [ "F401", # Don't touch unused imports, just warn about it. ] @@ -186,8 +175,8 @@ unfixable = [ convention = "google" [tool.ruff.isort] -lines-after-imports = 2 known-first-party = ["openllm", "bentoml", 'transformers'] +lines-after-imports = 2 [tool.ruff.flake8-quotes] inline-quotes = "single" @@ -197,31 +186,31 @@ ban-relative-imports = "all" [tool.ruff.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252"] "__init__.py" = ["E402", "F401", "F403", "F811"] +"tests/**/*" = ["PLR2004", "S101", "TID252"] [tool.pyright] -pythonVersion = "3.11" -include = ["src/", "tests/"] analysis.useLibraryCodeForTypes = true -typeCheckingMode = "strict" -strictListInference = true -strictDictionaryInference = true -strictSetInference = true -strictParameterNoneValue = true enableTypeIgnoreComments = true +include = ["src/", "tests/"] +pythonVersion = "3.11" reportMissingImports = "none" -reportMissingTypeStubs = "warning" reportMissingModuleSource = "warning" -reportUnknownVariableType = "warning" +reportMissingTypeStubs = "warning" reportUnknownMemberType = "warning" +reportUnknownVariableType = "warning" +strictDictionaryInference = true +strictListInference = true +strictParameterNoneValue = true +strictSetInference = true +typeCheckingMode = "strict" [tool.coverage.run] -source_pkgs = ["openllm", "tests"] branch = true -parallel = true omit = ["src/openllm/__about__.py"] +parallel = true +source_pkgs = ["openllm", "tests"] [tool.coverage.paths] openllm = ["src/openllm", "*/openllm/src/openllm"] diff --git a/src/openllm/_configuration.py b/src/openllm/_configuration.py index 994b102e..95c827fc 100644 --- a/src/openllm/_configuration.py +++ b/src/openllm/_configuration.py @@ -127,7 +127,7 @@ class GenerationConfig(pydantic.BaseModel): """Generation config provides the configuration to then be parsed to ``transformers.GenerationConfig``, with some additional validation and environment constructor. - Note that we always set `do_sample=True` and `return_dict_in_generate=False` + Note that we always set `do_sample=True` """ # NOTE: parameters for controlling the length of the output @@ -146,12 +146,10 @@ class GenerationConfig(pydantic.BaseModel): early_stopping: bool = pydantic.Field( False, description="""Controls the stopping condition for beam-based methods, like beam-search. It accepts the - following values: - - `True`, where the generation stops as soon as there are `num_beams` complete candidates; - - `False`, where an heuristic is applied and the generation stops when is it very unlikely to find - better candidates; - - `"never"`, where the beam search procedure only stops when there cannot be better candidates - (canonical beam search algorithm) + following values: `True`, where the generation stops as soon as there are `num_beams` complete candidates; + `False`, where an heuristic is applied and the generation stops when is it very unlikely to find + better candidates; `"never"`, where the beam search procedure only stops when there + cannot be better candidates (canonical beam search algorithm) """, ) max_time: float = pydantic.Field( diff --git a/src/openllm/cli.py b/src/openllm/cli.py index 76018c74..804a85d3 100644 --- a/src/openllm/cli.py +++ b/src/openllm/cli.py @@ -497,7 +497,7 @@ def cli(): @cli.command(name="version") @output_decorator def version(output: t.Literal["json", "pretty", "porcelain"]): - """Return current OpenLLM version.""" + """🚀 OpenLLM version.""" if output == "pretty": _console.print(f"OpenLLM version: {openllm.__version__}") elif output == "json": diff --git a/taplo.toml b/taplo.toml new file mode 100644 index 00000000..5b930a52 --- /dev/null +++ b/taplo.toml @@ -0,0 +1,7 @@ +include = ['*.toml'] + +[formatting] +align_entries = false +column_width = 120 +indent_string = " " +reorder_keys = true