Compare commits

...

2 Commits

Author SHA1 Message Date
Alex Cheema
6950f94109 dashboard: show macOS version in debug mode (#1454)
## Motivation

When debugging cluster issues, it's useful to see which macOS version
each node is running — especially since version mismatches can cause
compatibility problems. The OS version data is already collected by the
identity gatherer but wasn't shown in the topology graph.

## Changes

- Added OS version label (e.g. "macOS 15.2") to each node in the
topology graph when debug mode is enabled
- Renders below the existing TB and RDMA debug labels using the same
styling conventions
- Sources data from the existing `nodeIdentities` store (no backend
changes needed)

## Why It Works

The `nodeIdentities` store already contains `osVersion` for each node.
We simply read it in the `TopologyGraph` component and append a text
label in the debug section, following the exact same pattern as the TB
and RDMA labels.

## Test Plan

### Manual Testing
<!-- Hardware: MacBook Pro -->
- Enable debug mode in the dashboard
- Verify OS version label appears below TB/RDMA labels on each node
- Verify label disappears when debug mode is disabled

### Automated Testing
- Dashboard build passes (`npm run build`)

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: rltakashige <rl.takashige@gmail.com>
Co-authored-by: Ryuichi Leo Takashige <leo@exolabs.net>
2026-02-12 17:56:55 +00:00
Alex Cheema
d0c44273db feat: add enable_thinking toggle for thinking-capable models (#1457)
## Motivation

Fixes #1456. Models like DeepSeek V3.2, Qwen3, and GLM-4.7 always run in
thinking mode because their chat templates auto-inject `<think>`. Users
need a way to disable thinking for models that support both modes.

## Changes

**API**: Added `enable_thinking: bool | None` to `ChatCompletionRequest`
and `TextGenerationTaskParams`. Passed through the adapter to
`tokenizer.apply_chat_template()` as a kwarg (only when explicitly set,
so models without the template variable are unaffected).

**Dashboard**: Added a thinking toggle button in the chat input area.
Visible only when the selected model has both "text" and "thinking"
capabilities.

## Why It Works

Most thinking model chat templates (DeepSeek, Qwen3, GLM) accept
`enable_thinking` as a Jinja template variable. Passing
`enable_thinking=False` prevents the template from injecting `<think>`,
matching the vLLM convention.

## Test Plan

### Manual Testing
- `curl` with `"enable_thinking": false` against a thinking model — no
`<think>` in output
- Dashboard toggle visible for thinking models, hidden for text-only
models

### Automated Testing
- basedpyright: 0 errors
- ruff: clean
- pytest: 188 passed
- dashboard build: success

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 17:35:24 +00:00
8 changed files with 119 additions and 2 deletions

View File

@@ -12,6 +12,8 @@
ttftMs,
tps,
totalTokens,
thinkingEnabled as thinkingEnabledStore,
setConversationThinking,
} from "$lib/stores/app.svelte";
import ChatAttachments from "./ChatAttachments.svelte";
import ImageParamsPanel from "./ImageParamsPanel.svelte";
@@ -25,6 +27,7 @@
autofocus?: boolean;
showModelSelector?: boolean;
modelTasks?: Record<string, string[]>;
modelCapabilities?: Record<string, string[]>;
}
let {
@@ -34,6 +37,7 @@
autofocus = true,
showModelSelector = false,
modelTasks = {},
modelCapabilities = {},
}: Props = $props();
let message = $state("");
@@ -41,6 +45,7 @@
let fileInputRef: HTMLInputElement | undefined = $state();
let uploadedFiles = $state<ChatUploadedFile[]>([]);
let isDragOver = $state(false);
const thinkingEnabled = $derived(thinkingEnabledStore());
let loading = $derived(isLoading());
const currentModel = $derived(selectedChatModel());
const instanceData = $derived(instances());
@@ -95,6 +100,12 @@
);
});
const modelSupportsThinking = $derived(() => {
if (!currentModel) return false;
const caps = modelCapabilities[currentModel] || [];
return caps.includes("thinking") && caps.includes("text");
});
const isEditOnlyWithoutImage = $derived(
currentModel !== null &&
modelSupportsOnlyImageEditing(currentModel) &&
@@ -282,7 +293,11 @@
// Use image generation for text-to-image models
generateImage(content);
} else {
sendMessage(content, files);
sendMessage(
content,
files,
modelSupportsThinking() ? thinkingEnabled : null,
);
}
// Refocus the textarea after sending
@@ -520,6 +535,35 @@
</div>
{/if}
</div>
<!-- Thinking toggle -->
{#if modelSupportsThinking()}
<button
type="button"
onclick={() => setConversationThinking(!thinkingEnabled)}
class="flex items-center gap-1.5 px-2 py-1 rounded text-xs font-mono tracking-wide transition-all duration-200 flex-shrink-0 cursor-pointer border {thinkingEnabled
? 'bg-exo-yellow/15 border-exo-yellow/40 text-exo-yellow'
: 'bg-exo-medium-gray/30 border-exo-medium-gray/50 text-exo-light-gray/60 hover:text-exo-light-gray'}"
title={thinkingEnabled
? "Thinking enabled — click to disable"
: "Thinking disabled — click to enable"}
>
<svg
class="w-3.5 h-3.5"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
>
<path
d="M12 2a7 7 0 0 0-7 7c0 2.38 1.19 4.47 3 5.74V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.26c1.81-1.27 3-3.36 3-5.74a7 7 0 0 0-7-7zM9 20h6M10 22h4"
stroke-linecap="round"
stroke-linejoin="round"
/>
</svg>
<span>{thinkingEnabled ? "THINK" : "NO THINK"}</span>
</button>
{/if}
<!-- Performance stats -->
{#if currentTtft !== null || currentTps !== null}
<div class="flex items-center gap-4 text-xs font-mono flex-shrink-0">

View File

@@ -7,6 +7,7 @@
debugMode,
nodeThunderboltBridge,
nodeRdmaCtl,
nodeIdentities,
type NodeInfo,
} from "$lib/stores/app.svelte";
@@ -33,6 +34,7 @@
const debugEnabled = $derived(debugMode());
const tbBridgeData = $derived(nodeThunderboltBridge());
const rdmaCtlData = $derived(nodeRdmaCtl());
const identitiesData = $derived(nodeIdentities());
function getNodeLabel(nodeId: string): string {
const node = data?.nodes?.[nodeId];
@@ -1177,6 +1179,22 @@
.attr("font-size", debugFontSize)
.attr("font-family", "SF Mono, Monaco, monospace")
.text(rdmaText);
debugLabelY += debugLineHeight;
}
const identity = identitiesData[nodeInfo.id];
if (identity?.osVersion) {
nodeG
.append("text")
.attr("x", nodeInfo.x)
.attr("y", debugLabelY)
.attr("text-anchor", "middle")
.attr("fill", "rgba(179,179,179,0.7)")
.attr("font-size", debugFontSize)
.attr("font-family", "SF Mono, Monaco, monospace")
.text(
`macOS ${identity.osVersion}${identity.osBuildVersion ? ` (${identity.osBuildVersion})` : ""}`,
);
}
}
});

View File

@@ -296,6 +296,7 @@ export interface Conversation {
modelId: string | null;
sharding: string | null;
instanceType: string | null;
enableThinking: boolean | null;
}
const STORAGE_KEY = "exo-conversations";
@@ -605,6 +606,7 @@ class AppStore {
modelId: conversation.modelId ?? null,
sharding: conversation.sharding ?? null,
instanceType: conversation.instanceType ?? null,
enableThinking: conversation.enableThinking ?? null,
}));
}
} catch (error) {
@@ -794,6 +796,7 @@ class AppStore {
modelId: derivedModelId,
sharding: derivedSharding,
instanceType: derivedInstanceType,
enableThinking: null,
};
this.conversations.unshift(conversation);
@@ -819,6 +822,7 @@ class AppStore {
this.hasStartedChat = true;
this.isTopologyMinimized = true;
this.isSidebarOpen = true; // Auto-open sidebar when chatting
this.thinkingEnabled = conversation.enableThinking ?? true;
this.refreshConversationModelFromInstances();
return true;
@@ -1932,6 +1936,11 @@ class AppStore {
}
}
/**
* Whether thinking is enabled for the current conversation
*/
thinkingEnabled = $state(true);
/**
* Selected model for chat (can be set by the UI)
*/
@@ -2110,6 +2119,7 @@ class AppStore {
textContent?: string;
preview?: string;
}[],
enableThinking?: boolean | null,
): Promise<void> {
if ((!content.trim() && (!files || files.length === 0)) || this.isLoading)
return;
@@ -2257,6 +2267,9 @@ class AppStore {
stream: true,
logprobs: true,
top_logprobs: 5,
...(enableThinking != null && {
enable_thinking: enableThinking,
}),
}),
});
@@ -2915,6 +2928,18 @@ class AppStore {
);
}
/**
* Update the thinking preference for the active conversation
*/
setConversationThinking(enabled: boolean) {
this.thinkingEnabled = enabled;
const conv = this.getActiveConversation();
if (conv) {
conv.enableThinking = enabled;
this.saveConversationsToStorage();
}
}
/**
* Start a download on a specific node
*/
@@ -3028,6 +3053,7 @@ export const isLoadingPreviews = () => appStore.isLoadingPreviews;
export const lastUpdate = () => appStore.lastUpdate;
export const isTopologyMinimized = () => appStore.isTopologyMinimized;
export const selectedChatModel = () => appStore.selectedChatModel;
export const thinkingEnabled = () => appStore.thinkingEnabled;
export const debugMode = () => appStore.getDebugMode();
export const topologyOnlyMode = () => appStore.getTopologyOnlyMode();
export const chatSidebarVisible = () => appStore.getChatSidebarVisible();
@@ -3043,7 +3069,8 @@ export const sendMessage = (
textContent?: string;
preview?: string;
}[],
) => appStore.sendMessage(content, files);
enableThinking?: boolean | null,
) => appStore.sendMessage(content, files, enableThinking);
export const generateImage = (prompt: string, modelId?: string) =>
appStore.generateImage(prompt, modelId);
export const editImage = (
@@ -3086,6 +3113,8 @@ export const deleteAllConversations = () => appStore.deleteAllConversations();
export const renameConversation = (id: string, name: string) =>
appStore.renameConversation(id, name);
export const getActiveConversation = () => appStore.getActiveConversation();
export const setConversationThinking = (enabled: boolean) =>
appStore.setConversationThinking(enabled);
// Sidebar actions
export const isSidebarOpen = () => appStore.isSidebarOpen;

View File

@@ -190,6 +190,19 @@
return tasks;
});
const modelCapabilities = $derived(() => {
const caps: Record<string, string[]> = {};
for (const model of models) {
if (model.capabilities && model.capabilities.length > 0) {
caps[model.id] = model.capabilities;
if (model.hugging_face_id) {
caps[model.hugging_face_id] = model.capabilities;
}
}
}
return caps;
});
// Helper to check if a model supports image generation
function modelSupportsImageGeneration(modelId: string): boolean {
const model = models.find(
@@ -2270,6 +2283,7 @@
showHelperText={false}
showModelSelector={true}
modelTasks={modelTasks()}
modelCapabilities={modelCapabilities()}
/>
</div>
</div>
@@ -3049,6 +3063,7 @@
placeholder="Ask anything"
showModelSelector={true}
modelTasks={modelTasks()}
modelCapabilities={modelCapabilities()}
/>
</div>
</div>

View File

@@ -79,6 +79,7 @@ def chat_request_to_text_generation(
seed=request.seed,
stream=request.stream,
tools=request.tools,
enable_thinking=request.enable_thinking,
chat_template_messages=chat_template_messages
if chat_template_messages
else None,

View File

@@ -199,6 +199,7 @@ class ChatCompletionRequest(BaseModel):
top_p: float | None = None
top_k: int | None = None
tools: list[dict[str, Any]] | None = None
enable_thinking: bool | None = None
tool_choice: str | dict[str, Any] | None = None
parallel_tool_calls: bool | None = None
user: str | None = None

View File

@@ -40,5 +40,6 @@ class TextGenerationTaskParams(BaseModel, frozen=True):
stop: str | list[str] | None = None
seed: int | None = None
chat_template_messages: list[dict[str, Any]] | None = None
enable_thinking: bool | None = None
logprobs: bool = False
top_logprobs: int | None = None

View File

@@ -462,11 +462,19 @@ def apply_chat_template(
partial_assistant_content = cast(str, formatted_messages[-1].get("content", ""))
formatted_messages = formatted_messages[:-1]
extra_kwargs: dict[str, Any] = {}
if task_params.enable_thinking is not None:
# Qwen3 and GLM use "enable_thinking"; DeepSeek uses "thinking".
# Jinja ignores unknown variables, so passing both is safe.
extra_kwargs["enable_thinking"] = task_params.enable_thinking
extra_kwargs["thinking"] = task_params.enable_thinking
prompt: str = tokenizer.apply_chat_template(
formatted_messages,
tokenize=False,
add_generation_prompt=True,
tools=task_params.tools,
**extra_kwargs,
)
if partial_assistant_content: