diff --git a/.cbfmt.toml b/.cbfmt.toml
new file mode 100644
index 00000000..4b26e763
--- /dev/null
+++ b/.cbfmt.toml
@@ -0,0 +1,2 @@
+[languages]
+python = ["ruff format -"]
diff --git a/README.md b/README.md
index feed15cd..1b79c6b9 100644
--- a/README.md
+++ b/README.md
@@ -138,6 +138,7 @@ To interact with the server, you can visit the web UI at [http://0.0.0.0:3000/]
 
 ```python
 import openllm
+
 client = openllm.client.HTTPClient('http://localhost:3000')
 client.query('Explain to me the difference between "further" and "farther"')
 ```
@@ -1364,7 +1365,8 @@ llm = openllm.LLM('facebook/opt-2.7b')
 The main inference API is the streaming `generate_iterator` method:
 
 ```python
-async for generation in llm.generate_iterator('What is the meaning of life?'): print(generation.outputs[0].text)
+async for generation in llm.generate_iterator('What is the meaning of life?'):
+  print(generation.outputs[0].text)
 ```
 
 > [!NOTE]
@@ -1405,7 +1407,10 @@ specify the base_url to `llm-endpoint/v1` and you are good to go:
 
 ```python
 import openai
-client = openai.OpenAI(base_url='http://localhost:3000/v1', api_key='na')  # Here the server is running on localhost:3000
+
+client = openai.OpenAI(
+  base_url='http://localhost:3000/v1', api_key='na'
+)  # Here the server is running on localhost:3000
 
 completions = client.completions.create(
   prompt='Write me a tag line for an ice cream shop.', model=model, max_tokens=64, stream=stream
@@ -1431,7 +1436,8 @@ import openllm
 
 llm = openllm.LLM('facebook/opt-2.7b')
 
-svc = bentoml.Service(name="llm-opt-service", runners=[llm.runner])
+svc = bentoml.Service(name='llm-opt-service', runners=[llm.runner])
+
 
 @svc.api(input=bentoml.io.Text(), output=bentoml.io.Text())
 async def prompt(input_text: str) -> str:
@@ -1449,12 +1455,15 @@ from llama_index.llms.openllm import OpenLLM
 
 llm = OpenLLM('HuggingFaceH4/zephyr-7b-alpha')
 
-llm.complete("The meaning of life is")
+llm.complete('The meaning of life is')
+
 
 async def main(prompt, **kwargs):
-  async for it in llm.astream_chat(prompt, **kwargs): print(it)
+  async for it in llm.astream_chat(prompt, **kwargs):
+    print(it)
 
-asyncio.run(main("The time at San Francisco is"))
+
+asyncio.run(main('The time at San Francisco is'))
 ```
 
 If there is a remote LLM Server running elsewhere, then you can use `llama_index.llms.openllm.OpenLLMAPI`:
@@ -1473,9 +1482,9 @@ To quickly start a local LLM with `langchain`, simply do the following:
 ```python
 from langchain.llms import OpenLLM
 
-llm = OpenLLM(model_name="llama", model_id='meta-llama/Llama-2-7b-hf')
+llm = OpenLLM(model_name='llama', model_id='meta-llama/Llama-2-7b-hf')
 
-llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
+llm('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
 ```
 
 > [!IMPORTANT]
@@ -1491,25 +1500,21 @@ it by specifying its URL:
 from langchain.llms import OpenLLM
 
 llm = OpenLLM(server_url='http://44.23.123.1:3000', server_type='grpc')
-llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
+llm('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
 ```
 
 To integrate a LangChain agent with BentoML, you can do the following:
 
 ```python
-llm = OpenLLM(
-    model_id='google/flan-t5-large',
-    embedded=False,
-    serialisation="legacy"
-)
-tools = load_tools(["serpapi", "llm-math"], llm=llm)
-agent = initialize_agent(
-    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
-)
-svc = bentoml.Service("langchain-openllm", runners=[llm.runner])
+llm = OpenLLM(model_id='google/flan-t5-large', embedded=False, serialisation='legacy')
+tools = load_tools(['serpapi', 'llm-math'], llm=llm)
+agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
+svc = bentoml.Service('langchain-openllm', runners=[llm.runner])
+
+
 @svc.api(input=Text(), output=Text())
 def chat(input_text: str):
-    return agent.run(input_text)
+  return agent.run(input_text)
 ```
 
 > [!NOTE]
@@ -1529,9 +1534,9 @@ OpenLLM seamlessly integrates with
 ```python
 import transformers
 
-agent = transformers.HfAgent("http://localhost:3000/hf/agent")  # URL that runs the OpenLLM server
+agent = transformers.HfAgent('http://localhost:3000/hf/agent')  # URL that runs the OpenLLM server
 
-agent.run("Is the following `text` positive or negative?", text="I don't like how this models is generate inputs")
+agent.run('Is the following `text` positive or negative?', text="I don't like how this models is generate inputs")
 ```
 
 <!-- hatch-fancy-pypi-readme interim stop -->
diff --git a/openllm-python/README.md b/openllm-python/README.md
index feed15cd..1b79c6b9 100644
--- a/openllm-python/README.md
+++ b/openllm-python/README.md
@@ -138,6 +138,7 @@ To interact with the server, you can visit the web UI at [http://0.0.0.0:3000/]
 
 ```python
 import openllm
+
 client = openllm.client.HTTPClient('http://localhost:3000')
 client.query('Explain to me the difference between "further" and "farther"')
 ```
@@ -1364,7 +1365,8 @@ llm = openllm.LLM('facebook/opt-2.7b')
 The main inference API is the streaming `generate_iterator` method:
 
 ```python
-async for generation in llm.generate_iterator('What is the meaning of life?'): print(generation.outputs[0].text)
+async for generation in llm.generate_iterator('What is the meaning of life?'):
+  print(generation.outputs[0].text)
 ```
 
 > [!NOTE]
@@ -1405,7 +1407,10 @@ specify the base_url to `llm-endpoint/v1` and you are good to go:
 
 ```python
 import openai
-client = openai.OpenAI(base_url='http://localhost:3000/v1', api_key='na')  # Here the server is running on localhost:3000
+
+client = openai.OpenAI(
+  base_url='http://localhost:3000/v1', api_key='na'
+)  # Here the server is running on localhost:3000
 
 completions = client.completions.create(
   prompt='Write me a tag line for an ice cream shop.', model=model, max_tokens=64, stream=stream
@@ -1431,7 +1436,8 @@ import openllm
 
 llm = openllm.LLM('facebook/opt-2.7b')
 
-svc = bentoml.Service(name="llm-opt-service", runners=[llm.runner])
+svc = bentoml.Service(name='llm-opt-service', runners=[llm.runner])
+
 
 @svc.api(input=bentoml.io.Text(), output=bentoml.io.Text())
 async def prompt(input_text: str) -> str:
@@ -1449,12 +1455,15 @@ from llama_index.llms.openllm import OpenLLM
 
 llm = OpenLLM('HuggingFaceH4/zephyr-7b-alpha')
 
-llm.complete("The meaning of life is")
+llm.complete('The meaning of life is')
+
 
 async def main(prompt, **kwargs):
-  async for it in llm.astream_chat(prompt, **kwargs): print(it)
+  async for it in llm.astream_chat(prompt, **kwargs):
+    print(it)
 
-asyncio.run(main("The time at San Francisco is"))
+
+asyncio.run(main('The time at San Francisco is'))
 ```
 
 If there is a remote LLM Server running elsewhere, then you can use `llama_index.llms.openllm.OpenLLMAPI`:
@@ -1473,9 +1482,9 @@ To quickly start a local LLM with `langchain`, simply do the following:
 ```python
 from langchain.llms import OpenLLM
 
-llm = OpenLLM(model_name="llama", model_id='meta-llama/Llama-2-7b-hf')
+llm = OpenLLM(model_name='llama', model_id='meta-llama/Llama-2-7b-hf')
 
-llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
+llm('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
 ```
 
 > [!IMPORTANT]
@@ -1491,25 +1500,21 @@ it by specifying its URL:
 from langchain.llms import OpenLLM
 
 llm = OpenLLM(server_url='http://44.23.123.1:3000', server_type='grpc')
-llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")
+llm('What is the difference between a duck and a goose? And why there are so many Goose in Canada?')
 ```
 
 To integrate a LangChain agent with BentoML, you can do the following:
 
 ```python
-llm = OpenLLM(
-    model_id='google/flan-t5-large',
-    embedded=False,
-    serialisation="legacy"
-)
-tools = load_tools(["serpapi", "llm-math"], llm=llm)
-agent = initialize_agent(
-    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
-)
-svc = bentoml.Service("langchain-openllm", runners=[llm.runner])
+llm = OpenLLM(model_id='google/flan-t5-large', embedded=False, serialisation='legacy')
+tools = load_tools(['serpapi', 'llm-math'], llm=llm)
+agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
+svc = bentoml.Service('langchain-openllm', runners=[llm.runner])
+
+
 @svc.api(input=Text(), output=Text())
 def chat(input_text: str):
-    return agent.run(input_text)
+  return agent.run(input_text)
 ```
 
 > [!NOTE]
@@ -1529,9 +1534,9 @@ OpenLLM seamlessly integrates with
 ```python
 import transformers
 
-agent = transformers.HfAgent("http://localhost:3000/hf/agent")  # URL that runs the OpenLLM server
+agent = transformers.HfAgent('http://localhost:3000/hf/agent')  # URL that runs the OpenLLM server
 
-agent.run("Is the following `text` positive or negative?", text="I don't like how this models is generate inputs")
+agent.run('Is the following `text` positive or negative?', text="I don't like how this models is generate inputs")
 ```
 
 <!-- hatch-fancy-pypi-readme interim stop -->