dashboard: show macOS version in debug mode (#1454 )

## Motivation When debugging cluster issues, it's useful to see which macOS version each node is running — especially since version mismatches can cause compatibility problems. The OS version data is already collected by the identity gatherer but wasn't shown in the topology graph. ## Changes - Added OS version label (e.g. "macOS 15.2") to each node in the topology graph when debug mode is enabled - Renders below the existing TB and RDMA debug labels using the same styling conventions - Sources data from the existing `nodeIdentities` store (no backend changes needed) ## Why It Works The `nodeIdentities` store already contains `osVersion` for each node. We simply read it in the `TopologyGraph` component and append a text label in the debug section, following the exact same pattern as the TB and RDMA labels. ## Test Plan ### Manual Testing  - Enable debug mode in the dashboard - Verify OS version label appears below TB/RDMA labels on each node - Verify label disappears when debug mode is disabled ### Automated Testing - Dashboard build passes (`npm run build`) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: rltakashige <rl.takashige@gmail.com> Co-authored-by: Ryuichi Leo Takashige <leo@exolabs.net>
feat: add enable_thinking toggle for thinking-capable models (#1457 )
2026-02-12 23:21:44 -05:00 · 2026-02-12 17:56:55 +00:00 · 2026-02-12 17:35:24 +00:00
8 changed files with 119 additions and 2 deletions
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -12,6 +12,8 @@
    ttftMs,
    tps,
    totalTokens,
+    thinkingEnabled as thinkingEnabledStore,
+    setConversationThinking,
  } from "$lib/stores/app.svelte";
  import ChatAttachments from "./ChatAttachments.svelte";
  import ImageParamsPanel from "./ImageParamsPanel.svelte";
@@ -25,6 +27,7 @@
    autofocus?: boolean;
    showModelSelector?: boolean;
    modelTasks?: Record<string, string[]>;
+    modelCapabilities?: Record<string, string[]>;
  }

  let {
@@ -34,6 +37,7 @@
    autofocus = true,
    showModelSelector = false,
    modelTasks = {},
+    modelCapabilities = {},
  }: Props = $props();

  let message = $state("");
@@ -41,6 +45,7 @@
  let fileInputRef: HTMLInputElement | undefined = $state();
  let uploadedFiles = $state<ChatUploadedFile[]>([]);
  let isDragOver = $state(false);
+  const thinkingEnabled = $derived(thinkingEnabledStore());
  let loading = $derived(isLoading());
  const currentModel = $derived(selectedChatModel());
  const instanceData = $derived(instances());
@@ -95,6 +100,12 @@
    );
  });

+  const modelSupportsThinking = $derived(() => {
+    if (!currentModel) return false;
+    const caps = modelCapabilities[currentModel] || [];
+    return caps.includes("thinking") && caps.includes("text");
+  });
+
  const isEditOnlyWithoutImage = $derived(
    currentModel !== null &&
      modelSupportsOnlyImageEditing(currentModel) &&
@@ -282,7 +293,11 @@
      // Use image generation for text-to-image models
      generateImage(content);
    } else {
-      sendMessage(content, files);
+      sendMessage(
+        content,
+        files,
+        modelSupportsThinking() ? thinkingEnabled : null,
+      );
    }

    // Refocus the textarea after sending
@@ -520,6 +535,35 @@
            </div>
          {/if}
        </div>
+        <!-- Thinking toggle -->
+        {#if modelSupportsThinking()}
+          <button
+            type="button"
+            onclick={() => setConversationThinking(!thinkingEnabled)}
+            class="flex items-center gap-1.5 px-2 py-1 rounded text-xs font-mono tracking-wide transition-all duration-200 flex-shrink-0 cursor-pointer border {thinkingEnabled
+              ? 'bg-exo-yellow/15 border-exo-yellow/40 text-exo-yellow'
+              : 'bg-exo-medium-gray/30 border-exo-medium-gray/50 text-exo-light-gray/60 hover:text-exo-light-gray'}"
+            title={thinkingEnabled
+              ? "Thinking enabled — click to disable"
+              : "Thinking disabled — click to enable"}
+          >
+            <svg
+              class="w-3.5 h-3.5"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="1.5"
+            >
+              <path
+                d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
+                stroke-linecap="round"
+                stroke-linejoin="round"
+              />
+            </svg>
+            <span>{thinkingEnabled ? "THINK" : "NO THINK"}</span>
+          </button>
+        {/if}
+
        <!-- Performance stats -->
        {#if currentTtft !== null || currentTps !== null}
          <div class="flex items-center gap-4 text-xs font-mono flex-shrink-0">
--- a/dashboard/src/lib/components/TopologyGraph.svelte
+++ b/dashboard/src/lib/components/TopologyGraph.svelte
@@ -7,6 +7,7 @@
    debugMode,
    nodeThunderboltBridge,
    nodeRdmaCtl,
+    nodeIdentities,
    type NodeInfo,
  } from "$lib/stores/app.svelte";

@@ -33,6 +34,7 @@
  const debugEnabled = $derived(debugMode());
  const tbBridgeData = $derived(nodeThunderboltBridge());
  const rdmaCtlData = $derived(nodeRdmaCtl());
+  const identitiesData = $derived(nodeIdentities());

  function getNodeLabel(nodeId: string): string {
    const node = data?.nodes?.[nodeId];
@@ -1177,6 +1179,22 @@
            .attr("font-size", debugFontSize)
            .attr("font-family", "SF Mono, Monaco, monospace")
            .text(rdmaText);
+          debugLabelY += debugLineHeight;
+        }
+
+        const identity = identitiesData[nodeInfo.id];
+        if (identity?.osVersion) {
+          nodeG
+            .append("text")
+            .attr("x", nodeInfo.x)
+            .attr("y", debugLabelY)
+            .attr("text-anchor", "middle")
+            .attr("fill", "rgba(179,179,179,0.7)")
+            .attr("font-size", debugFontSize)
+            .attr("font-family", "SF Mono, Monaco, monospace")
+            .text(
+              `macOS ${identity.osVersion}${identity.osBuildVersion ? ` (${identity.osBuildVersion})` : ""}`,
+            );
        }
      }
    });
--- a/dashboard/src/lib/stores/app.svelte.ts
+++ b/dashboard/src/lib/stores/app.svelte.ts
@@ -296,6 +296,7 @@ export interface Conversation {
  modelId: string | null;
  sharding: string | null;
  instanceType: string | null;
+  enableThinking: boolean | null;
 }

 const STORAGE_KEY = "exo-conversations";
@@ -605,6 +606,7 @@ class AppStore {
          modelId: conversation.modelId ?? null,
          sharding: conversation.sharding ?? null,
          instanceType: conversation.instanceType ?? null,
+          enableThinking: conversation.enableThinking ?? null,
        }));
      }
    } catch (error) {
@@ -794,6 +796,7 @@ class AppStore {
      modelId: derivedModelId,
      sharding: derivedSharding,
      instanceType: derivedInstanceType,
+      enableThinking: null,
    };

    this.conversations.unshift(conversation);
@@ -819,6 +822,7 @@ class AppStore {
    this.hasStartedChat = true;
    this.isTopologyMinimized = true;
    this.isSidebarOpen = true; // Auto-open sidebar when chatting
+    this.thinkingEnabled = conversation.enableThinking ?? true;
    this.refreshConversationModelFromInstances();

    return true;
@@ -1932,6 +1936,11 @@ class AppStore {
    }
  }

+  /**
+   * Whether thinking is enabled for the current conversation
+   */
+  thinkingEnabled = $state(true);
+
  /**
   * Selected model for chat (can be set by the UI)
   */
@@ -2110,6 +2119,7 @@ class AppStore {
      textContent?: string;
      preview?: string;
    }[],
+    enableThinking?: boolean | null,
  ): Promise<void> {
    if ((!content.trim() && (!files || files.length === 0)) || this.isLoading)
      return;
@@ -2257,6 +2267,9 @@ class AppStore {
          stream: true,
          logprobs: true,
          top_logprobs: 5,
+          ...(enableThinking != null && {
+            enable_thinking: enableThinking,
+          }),
        }),
      });

@@ -2915,6 +2928,18 @@ class AppStore {
    );
  }

+  /**
+   * Update the thinking preference for the active conversation
+   */
+  setConversationThinking(enabled: boolean) {
+    this.thinkingEnabled = enabled;
+    const conv = this.getActiveConversation();
+    if (conv) {
+      conv.enableThinking = enabled;
+      this.saveConversationsToStorage();
+    }
+  }
+
  /**
   * Start a download on a specific node
   */
@@ -3028,6 +3053,7 @@ export const isLoadingPreviews = () => appStore.isLoadingPreviews;
 export const lastUpdate = () => appStore.lastUpdate;
 export const isTopologyMinimized = () => appStore.isTopologyMinimized;
 export const selectedChatModel = () => appStore.selectedChatModel;
+export const thinkingEnabled = () => appStore.thinkingEnabled;
 export const debugMode = () => appStore.getDebugMode();
 export const topologyOnlyMode = () => appStore.getTopologyOnlyMode();
 export const chatSidebarVisible = () => appStore.getChatSidebarVisible();
@@ -3043,7 +3069,8 @@ export const sendMessage = (
    textContent?: string;
    preview?: string;
  }[],
-) => appStore.sendMessage(content, files);
+  enableThinking?: boolean | null,
+) => appStore.sendMessage(content, files, enableThinking);
 export const generateImage = (prompt: string, modelId?: string) =>
  appStore.generateImage(prompt, modelId);
 export const editImage = (
@@ -3086,6 +3113,8 @@ export const deleteAllConversations = () => appStore.deleteAllConversations();
 export const renameConversation = (id: string, name: string) =>
  appStore.renameConversation(id, name);
 export const getActiveConversation = () => appStore.getActiveConversation();
+export const setConversationThinking = (enabled: boolean) =>
+  appStore.setConversationThinking(enabled);

 // Sidebar actions
 export const isSidebarOpen = () => appStore.isSidebarOpen;
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -190,6 +190,19 @@
    return tasks;
  });

+  const modelCapabilities = $derived(() => {
+    const caps: Record<string, string[]> = {};
+    for (const model of models) {
+      if (model.capabilities && model.capabilities.length > 0) {
+        caps[model.id] = model.capabilities;
+        if (model.hugging_face_id) {
+          caps[model.hugging_face_id] = model.capabilities;
+        }
+      }
+    }
+    return caps;
+  });
+
  // Helper to check if a model supports image generation
  function modelSupportsImageGeneration(modelId: string): boolean {
    const model = models.find(
@@ -2270,6 +2283,7 @@
                showHelperText={false}
                showModelSelector={true}
                modelTasks={modelTasks()}
+                modelCapabilities={modelCapabilities()}
              />
            </div>
          </div>
@@ -3049,6 +3063,7 @@
                placeholder="Ask anything"
                showModelSelector={true}
                modelTasks={modelTasks()}
+                modelCapabilities={modelCapabilities()}
              />
            </div>
          </div>
--- a/src/exo/master/adapters/chat_completions.py
+++ b/src/exo/master/adapters/chat_completions.py
@@ -79,6 +79,7 @@ def chat_request_to_text_generation(
        seed=request.seed,
        stream=request.stream,
        tools=request.tools,
+        enable_thinking=request.enable_thinking,
        chat_template_messages=chat_template_messages
        if chat_template_messages
        else None,
--- a/src/exo/shared/types/api.py
+++ b/src/exo/shared/types/api.py
@@ -199,6 +199,7 @@ class ChatCompletionRequest(BaseModel):
    top_p: float | None = None
    top_k: int | None = None
    tools: list[dict[str, Any]] | None = None
+    enable_thinking: bool | None = None
    tool_choice: str | dict[str, Any] | None = None
    parallel_tool_calls: bool | None = None
    user: str | None = None
--- a/src/exo/shared/types/text_generation.py
+++ b/src/exo/shared/types/text_generation.py
@@ -40,5 +40,6 @@ class TextGenerationTaskParams(BaseModel, frozen=True):
    stop: str | list[str] | None = None
    seed: int | None = None
    chat_template_messages: list[dict[str, Any]] | None = None
+    enable_thinking: bool | None = None
    logprobs: bool = False
    top_logprobs: int | None = None
--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -462,11 +462,19 @@ def apply_chat_template(
        partial_assistant_content = cast(str, formatted_messages[-1].get("content", ""))
        formatted_messages = formatted_messages[:-1]

+    extra_kwargs: dict[str, Any] = {}
+    if task_params.enable_thinking is not None:
+        # Qwen3 and GLM use "enable_thinking"; DeepSeek uses "thinking".
+        # Jinja ignores unknown variables, so passing both is safe.
+        extra_kwargs["enable_thinking"] = task_params.enable_thinking
+        extra_kwargs["thinking"] = task_params.enable_thinking
+
    prompt: str = tokenizer.apply_chat_template(
        formatted_messages,
        tokenize=False,
        add_generation_prompt=True,
        tools=task_params.tools,
+        **extra_kwargs,
    )

    if partial_assistant_content: