From 8aeeb46d2f40e4f26600d7bb9fa4355cb7d8d4ff Mon Sep 17 00:00:00 2001
From: Ryuichi Leo Takashige <leo@exolabs.net>
Date: Mon, 2 Feb 2026 21:33:16 +0000
Subject: [PATCH] failures

---
 src/exo/worker/runner/runner.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/exo/worker/runner/runner.py b/src/exo/worker/runner/runner.py
index 3a5619f4..1c278994 100644
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
@@ -101,15 +101,11 @@ def _should_use_serial_processing(
     """
     Determine if a ChatCompletion task requires serial processing.
 
-    GPT-OSS models have mixed cache types (KVCache + RotatingKVCache) that
-    don't work reliably with BatchGenerator's batched prefill.
+    Currently always returns False - batch mode handles all cases.
+    Post-processing (GPT-OSS, thinking models, tool calls) can be applied
+    per-request to the individual streams from the batch generator.
     """
-    from mlx_lm.models.gpt_oss import Model as GptOssModel
-
-    # GPT-OSS models don't work reliably with batched generation
-    if isinstance(model, GptOssModel):
-        return True
-
+    # All tasks can use batch mode - post-processing is per-request
     return False