dashboard: show macOS version in debug mode (#1454 )

## Motivation When debugging cluster issues, it's useful to see which macOS version each node is running — especially since version mismatches can cause compatibility problems. The OS version data is already collected by the identity gatherer but wasn't shown in the topology graph. ## Changes - Added OS version label (e.g. "macOS 15.2") to each node in the topology graph when debug mode is enabled - Renders below the existing TB and RDMA debug labels using the same styling conventions - Sources data from the existing `nodeIdentities` store (no backend changes needed) ## Why It Works The `nodeIdentities` store already contains `osVersion` for each node. We simply read it in the `TopologyGraph` component and append a text label in the debug section, following the exact same pattern as the TB and RDMA labels. ## Test Plan ### Manual Testing  - Enable debug mode in the dashboard - Verify OS version label appears below TB/RDMA labels on each node - Verify label disappears when debug mode is disabled ### Automated Testing - Dashboard build passes (`npm run build`) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: rltakashige <rl.takashige@gmail.com> Co-authored-by: Ryuichi Leo Takashige <leo@exolabs.net>
feat: add enable_thinking toggle for thinking-capable models (#1457 )
2026-02-12 15:11:30 -05:00 · 2026-02-12 17:56:55 +00:00 · 2026-02-12 17:35:24 +00:00 · 2026-02-12 16:38:09 +00:00
14 changed files with 237 additions and 88 deletions
--- a/bench/exo_bench.py
+++ b/bench/exo_bench.py
@@ -19,6 +19,11 @@ from urllib.parse import urlencode
 from loguru import logger
 from transformers import AutoTokenizer

+# Backoff constants for cluster settling retry
+_SETTLE_INITIAL_BACKOFF_S = 1.0
+_SETTLE_MAX_BACKOFF_S = 60.0
+_SETTLE_BACKOFF_MULTIPLIER = 2.0
+
 # Monkey-patch for transformers 5.x compatibility
 # Kimi's tokenization_kimi.py imports bytes_to_unicode from the old location
 # which was moved in transformers 5.0.0rc2
@@ -388,6 +393,66 @@ class PromptSizer:
        return content, tok


+def fetch_and_filter_placements(
+    client: ExoClient, full_model_id: str, args: argparse.Namespace
+) -> list[dict[str, Any]]:
+    previews_resp = client.request_json(
+        "GET", "/instance/previews", params={"model_id": full_model_id}
+    )
+    previews = previews_resp.get("previews") or []
+
+    selected: list[dict[str, Any]] = []
+    for p in previews:
+        if p.get("error") is not None:
+            continue
+        if not placement_filter(str(p.get("instance_meta", "")), args.instance_meta):
+            continue
+        if not sharding_filter(str(p.get("sharding", "")), args.sharding):
+            continue
+
+        instance = p.get("instance")
+        if not isinstance(instance, dict):
+            continue
+
+        n = nodes_used_in_instance(instance)
+        # Skip tensor ring single node as it is pointless when pipeline ring
+        if n == 1 and (
+            (args.sharding == "both" and "tensor" in p.get("sharding", "").lower())
+            or (
+                args.instance_meta == "both"
+                and "jaccl" in p.get("instance_meta", "").lower()
+            )
+        ):
+            continue
+
+        if (
+            args.skip_pipeline_jaccl
+            and (
+                args.instance_meta == "both"
+                and "jaccl" in p.get("instance_meta", "").lower()
+            )
+            and (
+                args.sharding == "both" and "pipeline" in p.get("sharding", "").lower()
+            )
+        ):
+            continue
+
+        if (
+            args.skip_tensor_ring
+            and (
+                args.instance_meta == "both"
+                and "ring" in p.get("instance_meta", "").lower()
+            )
+            and (args.sharding == "both" and "tensor" in p.get("sharding", "").lower())
+        ):
+            continue
+
+        if args.min_nodes <= n <= args.max_nodes:
+            selected.append(p)
+
+    return selected
+
+
 def main() -> int:
    ap = argparse.ArgumentParser(
        prog="exo-bench",
@@ -464,6 +529,12 @@ def main() -> int:
        action="store_true",
        help="Force all pp×tg combinations (cartesian product) even when lists have equal length.",
    )
+    ap.add_argument(
+        "--settle-timeout",
+        type=float,
+        default=0,
+        help="Max seconds to wait for the cluster to produce valid placements (0 = try once).",
+    )
    args = ap.parse_args()

    pp_list = parse_int_list(args.pp)
@@ -487,11 +558,6 @@ def main() -> int:
    client = ExoClient(args.host, args.port, timeout_s=args.timeout)
    short_id, full_model_id = resolve_model_short_id(client, args.model)

-    previews_resp = client.request_json(
-        "GET", "/instance/previews", params={"model_id": full_model_id}
-    )
-    previews = previews_resp.get("previews") or []
-
    tokenizer = load_tokenizer_for_bench(full_model_id)
    if tokenizer is None:
        raise RuntimeError("[exo-bench] tokenizer load failed")
@@ -503,54 +569,20 @@ def main() -> int:
        logger.error("[exo-bench] tokenizer usable but prompt sizing failed")
        raise

-    selected: list[dict[str, Any]] = []
-    for p in previews:
-        if p.get("error") is not None:
-            continue
-        if not placement_filter(str(p.get("instance_meta", "")), args.instance_meta):
-            continue
-        if not sharding_filter(str(p.get("sharding", "")), args.sharding):
-            continue
+    selected = fetch_and_filter_placements(client, full_model_id, args)

-        instance = p.get("instance")
-        if not isinstance(instance, dict):
-            continue
-
-        n = nodes_used_in_instance(instance)
-        # Skip tensor ring single node as it is pointless when pipeline ring
-        if n == 1 and (
-            (args.sharding == "both" and "tensor" in p.get("sharding", "").lower())
-            or (
-                args.instance_meta == "both"
-                and "jaccl" in p.get("instance_meta", "").lower()
+    if not selected and args.settle_timeout > 0:
+        backoff = _SETTLE_INITIAL_BACKOFF_S
+        deadline = time.monotonic() + args.settle_timeout
+        while not selected and time.monotonic() < deadline:
+            remaining = deadline - time.monotonic()
+            logger.warning(
+                f"No valid placements yet (cluster may still be settling). "
+                f"Retrying in {backoff:.1f}s ({remaining:.0f}s remaining)..."
            )
-        ):
-            continue
-
-        if (
-            args.skip_pipeline_jaccl
-            and (
-                args.instance_meta == "both"
-                and "jaccl" in p.get("instance_meta", "").lower()
-            )
-            and (
-                args.sharding == "both" and "pipeline" in p.get("sharding", "").lower()
-            )
-        ):
-            continue
-
-        if (
-            args.skip_tensor_ring
-            and (
-                args.instance_meta == "both"
-                and "ring" in p.get("instance_meta", "").lower()
-            )
-            and (args.sharding == "both" and "tensor" in p.get("sharding", "").lower())
-        ):
-            continue
-
-        if args.min_nodes <= n <= args.max_nodes:
-            selected.append(p)
+            time.sleep(min(backoff, remaining))
+            backoff = min(backoff * _SETTLE_BACKOFF_MULTIPLIER, _SETTLE_MAX_BACKOFF_S)
+            selected = fetch_and_filter_placements(client, full_model_id, args)

    if not selected:
        logger.error("No valid placements matched your filters.")
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -12,6 +12,8 @@
    ttftMs,
    tps,
    totalTokens,
+    thinkingEnabled as thinkingEnabledStore,
+    setConversationThinking,
  } from "$lib/stores/app.svelte";
  import ChatAttachments from "./ChatAttachments.svelte";
  import ImageParamsPanel from "./ImageParamsPanel.svelte";
@@ -25,6 +27,7 @@
    autofocus?: boolean;
    showModelSelector?: boolean;
    modelTasks?: Record<string, string[]>;
+    modelCapabilities?: Record<string, string[]>;
  }

  let {
@@ -34,6 +37,7 @@
    autofocus = true,
    showModelSelector = false,
    modelTasks = {},
+    modelCapabilities = {},
  }: Props = $props();

  let message = $state("");
@@ -41,6 +45,7 @@
  let fileInputRef: HTMLInputElement | undefined = $state();
  let uploadedFiles = $state<ChatUploadedFile[]>([]);
  let isDragOver = $state(false);
+  const thinkingEnabled = $derived(thinkingEnabledStore());
  let loading = $derived(isLoading());
  const currentModel = $derived(selectedChatModel());
  const instanceData = $derived(instances());
@@ -95,6 +100,12 @@
    );
  });

+  const modelSupportsThinking = $derived(() => {
+    if (!currentModel) return false;
+    const caps = modelCapabilities[currentModel] || [];
+    return caps.includes("thinking") && caps.includes("text");
+  });
+
  const isEditOnlyWithoutImage = $derived(
    currentModel !== null &&
      modelSupportsOnlyImageEditing(currentModel) &&
@@ -282,7 +293,11 @@
      // Use image generation for text-to-image models
      generateImage(content);
    } else {
-      sendMessage(content, files);
+      sendMessage(
+        content,
+        files,
+        modelSupportsThinking() ? thinkingEnabled : null,
+      );
    }

    // Refocus the textarea after sending
@@ -520,6 +535,35 @@
            </div>
          {/if}
        </div>
+        <!-- Thinking toggle -->
+        {#if modelSupportsThinking()}
+          <button
+            type="button"
+            onclick={() => setConversationThinking(!thinkingEnabled)}
+            class="flex items-center gap-1.5 px-2 py-1 rounded text-xs font-mono tracking-wide transition-all duration-200 flex-shrink-0 cursor-pointer border {thinkingEnabled
+              ? 'bg-exo-yellow/15 border-exo-yellow/40 text-exo-yellow'
+              : 'bg-exo-medium-gray/30 border-exo-medium-gray/50 text-exo-light-gray/60 hover:text-exo-light-gray'}"
+            title={thinkingEnabled
+              ? "Thinking enabled — click to disable"
+              : "Thinking disabled — click to enable"}
+          >
+            <svg
+              class="w-3.5 h-3.5"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="1.5"
+            >
+              <path
+                d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
+                stroke-linecap="round"
+                stroke-linejoin="round"
+              />
+            </svg>
+            <span>{thinkingEnabled ? "THINK" : "NO THINK"}</span>
+          </button>
+        {/if}
+
        <!-- Performance stats -->
        {#if currentTtft !== null || currentTps !== null}
          <div class="flex items-center gap-4 text-xs font-mono flex-shrink-0">
--- a/dashboard/src/lib/components/TopologyGraph.svelte
+++ b/dashboard/src/lib/components/TopologyGraph.svelte
@@ -7,6 +7,7 @@
    debugMode,
    nodeThunderboltBridge,
    nodeRdmaCtl,
+    nodeIdentities,
    type NodeInfo,
  } from "$lib/stores/app.svelte";

@@ -33,6 +34,7 @@
  const debugEnabled = $derived(debugMode());
  const tbBridgeData = $derived(nodeThunderboltBridge());
  const rdmaCtlData = $derived(nodeRdmaCtl());
+  const identitiesData = $derived(nodeIdentities());

  function getNodeLabel(nodeId: string): string {
    const node = data?.nodes?.[nodeId];
@@ -1177,6 +1179,22 @@
            .attr("font-size", debugFontSize)
            .attr("font-family", "SF Mono, Monaco, monospace")
            .text(rdmaText);
+          debugLabelY += debugLineHeight;
+        }
+
+        const identity = identitiesData[nodeInfo.id];
+        if (identity?.osVersion) {
+          nodeG
+            .append("text")
+            .attr("x", nodeInfo.x)
+            .attr("y", debugLabelY)
+            .attr("text-anchor", "middle")
+            .attr("fill", "rgba(179,179,179,0.7)")
+            .attr("font-size", debugFontSize)
+            .attr("font-family", "SF Mono, Monaco, monospace")
+            .text(
+              `macOS ${identity.osVersion}${identity.osBuildVersion ? ` (${identity.osBuildVersion})` : ""}`,
+            );
        }
      }
    });
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -296,6 +296,7 @@ export interface Conversation {
  modelId: string | null;
  sharding: string | null;
  instanceType: string | null;
+  enableThinking: boolean | null;
 }

 const STORAGE_KEY = "exo-conversations";
@@ -605,6 +606,7 @@ class AppStore {
          modelId: conversation.modelId ?? null,
          sharding: conversation.sharding ?? null,
          instanceType: conversation.instanceType ?? null,
+          enableThinking: conversation.enableThinking ?? null,
        }));
      }
    } catch (error) {
@@ -794,6 +796,7 @@ class AppStore {
      modelId: derivedModelId,
      sharding: derivedSharding,
      instanceType: derivedInstanceType,
+      enableThinking: null,
    };

    this.conversations.unshift(conversation);
@@ -819,6 +822,7 @@ class AppStore {
    this.hasStartedChat = true;
    this.isTopologyMinimized = true;
    this.isSidebarOpen = true; // Auto-open sidebar when chatting
+    this.thinkingEnabled = conversation.enableThinking ?? true;
    this.refreshConversationModelFromInstances();

    return true;
@@ -1932,6 +1936,11 @@ class AppStore {
    }
  }

+  /**
+   * Whether thinking is enabled for the current conversation
+   */
+  thinkingEnabled = $state(true);
+
  /**
   * Selected model for chat (can be set by the UI)
   */
@@ -2110,6 +2119,7 @@ class AppStore {
      textContent?: string;
      preview?: string;
    }[],
+    enableThinking?: boolean | null,
  ): Promise<void> {
    if ((!content.trim() && (!files || files.length === 0)) || this.isLoading)
      return;
@@ -2257,6 +2267,9 @@ class AppStore {
          stream: true,
          logprobs: true,
          top_logprobs: 5,
+          ...(enableThinking != null && {
+            enable_thinking: enableThinking,
+          }),
        }),
      });

@@ -2915,6 +2928,18 @@ class AppStore {
    );
  }

+  /**
+   * Update the thinking preference for the active conversation
+   */
+  setConversationThinking(enabled: boolean) {
+    this.thinkingEnabled = enabled;
+    const conv = this.getActiveConversation();
+    if (conv) {
+      conv.enableThinking = enabled;
+      this.saveConversationsToStorage();
+    }
+  }
+
  /**
   * Start a download on a specific node
   */
@@ -3028,6 +3053,7 @@ export const isLoadingPreviews = () => appStore.isLoadingPreviews;
 export const lastUpdate = () => appStore.lastUpdate;
 export const isTopologyMinimized = () => appStore.isTopologyMinimized;
 export const selectedChatModel = () => appStore.selectedChatModel;
+export const thinkingEnabled = () => appStore.thinkingEnabled;
 export const debugMode = () => appStore.getDebugMode();
 export const topologyOnlyMode = () => appStore.getTopologyOnlyMode();
 export const chatSidebarVisible = () => appStore.getChatSidebarVisible();
@@ -3043,7 +3069,8 @@ export const sendMessage = (
    textContent?: string;
    preview?: string;
  }[],
-) => appStore.sendMessage(content, files);
+  enableThinking?: boolean | null,
+) => appStore.sendMessage(content, files, enableThinking);
 export const generateImage = (prompt: string, modelId?: string) =>
  appStore.generateImage(prompt, modelId);
 export const editImage = (
@@ -3086,6 +3113,8 @@ export const deleteAllConversations = () => appStore.deleteAllConversations();
 export const renameConversation = (id: string, name: string) =>
  appStore.renameConversation(id, name);
 export const getActiveConversation = () => appStore.getActiveConversation();
+export const setConversationThinking = (enabled: boolean) =>
+  appStore.setConversationThinking(enabled);

 // Sidebar actions
 export const isSidebarOpen = () => appStore.isSidebarOpen;
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -190,6 +190,19 @@
    return tasks;
  });

+  const modelCapabilities = $derived(() => {
+    const caps: Record<string, string[]> = {};
+    for (const model of models) {
+      if (model.capabilities && model.capabilities.length > 0) {
+        caps[model.id] = model.capabilities;
+        if (model.hugging_face_id) {
+          caps[model.hugging_face_id] = model.capabilities;
+        }
+      }
+    }
+    return caps;
+  });
+
  // Helper to check if a model supports image generation
  function modelSupportsImageGeneration(modelId: string): boolean {
    const model = models.find(
@@ -2270,6 +2283,7 @@
                showHelperText={false}
                showModelSelector={true}
                modelTasks={modelTasks()}
+                modelCapabilities={modelCapabilities()}
              />
            </div>
          </div>
@@ -3049,6 +3063,7 @@
                placeholder="Ask anything"
                showModelSelector={true}
                modelTasks={modelTasks()}
+                modelCapabilities={modelCapabilities()}
              />
            </div>
          </div>
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
    "loguru>=0.7.3",
    "exo_pyo3_bindings", # rust bindings
    "anyio==4.11.0",
-    "mlx; sys_platform == 'darwin'",
+    "mlx==0.30.6; sys_platform == 'darwin'",
    "mlx[cpu]==0.30.6; sys_platform == 'linux'",
    "mlx-lm==0.30.6",
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
@@ -64,7 +64,6 @@ members = [

 [tool.uv.sources]
 exo_pyo3_bindings = { workspace = true }
-mlx = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git", marker = "sys_platform == 'darwin'" }
 #mlx-lm = { git = "https://github.com/davidmcc73/mlx-lm", branch = "stable" }
 # Uncomment to use local mlx/mlx-lm development versions:
 # mlx = { path = "/Users/Shared/mlx", editable=true }
--- a/src/exo/master/adapters/chat_completions.py
+++ b/src/exo/master/adapters/chat_completions.py
@@ -79,6 +79,7 @@ def chat_request_to_text_generation(
        seed=request.seed,
        stream=request.stream,
        tools=request.tools,
+        enable_thinking=request.enable_thinking,
        chat_template_messages=chat_template_messages
        if chat_template_messages
        else None,
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -199,6 +199,7 @@ class ChatCompletionRequest(BaseModel):
    top_p: float | None = None
    top_k: int | None = None
    tools: list[dict[str, Any]] | None = None
+    enable_thinking: bool | None = None
    tool_choice: str | dict[str, Any] | None = None
    parallel_tool_calls: bool | None = None
    user: str | None = None
--- a/src/exo/shared/types/text_generation.py
+++ b/src/exo/shared/types/text_generation.py
@@ -40,5 +40,6 @@ class TextGenerationTaskParams(BaseModel, frozen=True):
    stop: str | list[str] | None = None
    seed: int | None = None
    chat_template_messages: list[dict[str, Any]] | None = None
+    enable_thinking: bool | None = None
    logprobs: bool = False
    top_logprobs: int | None = None
--- a/src/exo/utils/channels.py
+++ b/src/exo/utils/channels.py
@@ -141,9 +141,6 @@ class MpSender[T]:
        )
        self._state.buffer.join_thread()

-    def cancel_join(self) -> None:
-        self._state.buffer.cancel_join_thread()
-
    # == context manager support ==
    def __enter__(self) -> Self:
        return self
@@ -215,9 +212,6 @@ class MpReceiver[T]:
        )
        self._state.buffer.join_thread()

-    def cancel_join(self) -> None:
-        self._state.buffer.cancel_join_thread()
-
    # == iterator support ==
    def __iter__(self) -> Self:
        return self
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -57,7 +57,6 @@ def prefill(
    sampler: Callable[[mx.array], mx.array],
    prompt_tokens: mx.array,
    cache: KVCacheType,
-    group: mx.distributed.Group | None = None,
 ) -> tuple[float, int, list[CacheSnapshot]]:
    """Prefill the KV cache with prompt tokens.

@@ -87,9 +86,6 @@ def prefill(

    set_pipeline_prefill(model, is_prefill=True)

-    mx_barrier(group)
-    logger.info("Ready to prefill")
-
    # Use max_tokens=1 because max_tokens=0 does not work.
    # We just throw away the generated token - we only care about filling the cache
    for _ in stream_generate(
@@ -309,9 +305,16 @@ def mlx_generate(
    )
    max_stop_len = max((len(s) for s in stop_sequences), default=0)

+    mx_barrier(group)
+    logger.info("Ready to prefill")
+
    # Prefill cache with all tokens except the last one
    prefill_tps, prefill_tokens, ssm_snapshots_list = prefill(
-        model, tokenizer, sampler, prompt_tokens[:-1], caches, group
+        model,
+        tokenizer,
+        sampler,
+        prompt_tokens[:-1],
+        caches,
    )
    cache_snapshots: list[CacheSnapshot] | None = ssm_snapshots_list or None

--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -462,11 +462,19 @@ def apply_chat_template(
        partial_assistant_content = cast(str, formatted_messages[-1].get("content", ""))
        formatted_messages = formatted_messages[:-1]

+    extra_kwargs: dict[str, Any] = {}
+    if task_params.enable_thinking is not None:
+        # Qwen3 and GLM use "enable_thinking"; DeepSeek uses "thinking".
+        # Jinja ignores unknown variables, so passing both is safe.
+        extra_kwargs["enable_thinking"] = task_params.enable_thinking
+        extra_kwargs["thinking"] = task_params.enable_thinking
+
    prompt: str = tokenizer.apply_chat_template(
        formatted_messages,
        tokenize=False,
        add_generation_prompt=True,
        tools=task_params.tools,
+        **extra_kwargs,
    )

    if partial_assistant_content:
--- a/src/exo/worker/runner/bootstrap.py
+++ b/src/exo/worker/runner/bootstrap.py
@@ -41,8 +41,6 @@ def entrypoint(
        main(bound_instance, event_sender, task_receiver)
    except ClosedResourceError:
        logger.warning("Runner communication closed unexpectedly")
-    except KeyboardInterrupt:
-        logger.info("Runner received interrupt, shutting down")
    except Exception as e:
        logger.opt(exception=e).warning(
            f"Runner {bound_instance.bound_runner_id} crashed with critical exception {e}"
@@ -57,9 +55,7 @@ def entrypoint(
        try:
            event_sender.close()
            task_receiver.close()
-        except Exception:
-            pass
        finally:
-            event_sender.cancel_join()
-            task_receiver.cancel_join()
+            event_sender.join()
+            task_receiver.join()
            logger.info("bye from the runner")
--- a/uv.lock
+++ b/uv.lock
@@ -377,8 +377,8 @@ dependencies = [
    { name = "hypercorn", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "loguru", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "mflux", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cpu"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260211+80de5e35", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cpu"], marker = "sys_platform == 'linux'" },
    { name = "mlx-lm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "msgspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "openai-harmony", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -416,7 +416,7 @@ requires-dist = [
    { name = "hypercorn", specifier = ">=0.18.0" },
    { name = "loguru", specifier = ">=0.7.3" },
    { name = "mflux", specifier = "==0.15.5" },
-    { name = "mlx", marker = "sys_platform == 'darwin'", git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.30.6" },
    { name = "mlx", extras = ["cpu"], marker = "sys_platform == 'linux'", specifier = "==0.30.6" },
    { name = "mlx-lm", specifier = "==0.30.6" },
    { name = "msgspec", specifier = ">=0.19.0" },
@@ -1020,8 +1020,8 @@ dependencies = [
    { name = "fonttools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "huggingface-hub", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "matplotlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.6", source = { registry = "https://pypi.org/simple" }, extra = ["cuda13"], marker = "sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260211+80de5e35", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "mlx", extra = ["cuda13"], marker = "sys_platform == 'linux'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "opencv-python", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "piexif", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1048,12 +1048,18 @@ wheels = [
 name = "mlx"
 version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "sys_platform == 'linux'",
+dependencies = [
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" },
+    { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" },
    { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" },
    { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" },
+    { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" },
    { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" },
    { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" },
 ]
@@ -1066,14 +1072,6 @@ cuda13 = [
    { name = "mlx-cuda-13", marker = "sys_platform == 'linux'" },
 ]

-[[package]]
-name = "mlx"
-version = "0.30.7.dev20260211+80de5e35"
-source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }
-resolution-markers = [
-    "sys_platform == 'darwin'",
-]
-
 [[package]]
 name = "mlx-cpu"
 version = "0.30.6"
@@ -1104,7 +1102,7 @@ version = "0.30.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "mlx", version = "0.30.7.dev20260211+80de5e35", source = { git = "https://github.com/rltakashige/mlx-jaccl-fix-small-recv.git#80de5e35498d6ae759573225177482f0ca9ded80" }, marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
    { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "protobuf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -1116,6 +1114,16 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/20/5f/01d281f1fa8a1521d5936659beb4f5ab1f32b463d059263cf9d4cef969d9/mlx_lm-0.30.6-py3-none-any.whl", hash = "sha256:a7405bd581eacc4bf8209d7a6b7f23629585a0d7c6740c2a97e51fee35b3b0e1", size = 379451, upload-time = "2026-02-04T21:27:43.222Z" },
 ]

+[[package]]
+name = "mlx-metal"
+version = "0.30.6"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" },
+]
+
 [[package]]
 name = "more-itertools"
 version = "10.8.0"