testing macmon

test macmon
test override
2026-02-04 11:11:45 -05:00 · 2026-02-04 15:13:58 +05:00 · 2026-02-04 12:10:04 +05:00 · 2026-02-04 11:25:32 +05:00 · 2026-02-04 10:37:34 +05:00 · 2026-02-04 10:08:25 +05:00
26 changed files with 606 additions and 62 deletions
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -143,3 +143,139 @@ jobs:
          export HOME="$RUNNER_TEMP"
          export EXO_TESTS=1
          EXO_RESOURCES_DIR="$PWD/resources" $TEST_ENV/bin/python -m pytest src -m "not slow" --import-mode=importlib
+
+  dashboard-tests:
+    name: Dashboard E2E Tests
+    runs-on: macos-26
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          lfs: false
+
+      - uses: cachix/install-nix-action@v31
+        with:
+          nix_path: nixpkgs=channel:nixos-unstable
+
+      - uses: cachix/cachix-action@v14
+        name: Configure Cachix
+        with:
+          name: exo
+          authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
+
+      - name: Build Metal packages
+        run: |
+          if nix build .#metal-toolchain 2>/dev/null; then
+            echo "metal-toolchain built successfully (likely cache hit)"
+          else
+            echo "metal-toolchain build failed, extracting from Xcode..."
+
+            NAR_HASH="sha256-ayR5mXN4sZAddwKEG2OszGRF93k9ZFc7H0yi2xbylQw="
+            NAR_NAME="metal-toolchain-17C48.nar"
+
+            WORK_DIR="${RUNNER_TEMP}/metal-work"
+            mkdir -p "$WORK_DIR"
+
+            xcodebuild -downloadComponent MetalToolchain
+
+            DMG_PATH=$(find /System/Library/AssetsV2/com_apple_MobileAsset_MetalToolchain -name '*.dmg' 2>/dev/null | head -1)
+            if [ -z "$DMG_PATH" ]; then
+              echo "Error: Could not find Metal toolchain DMG"
+              exit 1
+            fi
+
+            echo "Found DMG at: $DMG_PATH"
+            hdiutil attach "$DMG_PATH" -mountpoint "${WORK_DIR}/metal-dmg"
+
+            cp -R "${WORK_DIR}/metal-dmg/Metal.xctoolchain" "${WORK_DIR}/metal-export"
+            hdiutil detach "${WORK_DIR}/metal-dmg"
+
+            nix nar pack "${WORK_DIR}/metal-export" > "${WORK_DIR}/${NAR_NAME}"
+            STORE_PATH=$(nix store add --mode flat "${WORK_DIR}/${NAR_NAME}")
+            echo "Added NAR to store: $STORE_PATH"
+
+            rm -rf "$WORK_DIR"
+
+            nix build .#metal-toolchain
+          fi
+
+          nix build .#mlx
+
+      - name: Install macmon for hardware monitoring
+        run: brew install macmon
+
+      - name: Load nix develop environment
+        run: nix run github:nicknovitski/nix-develop/v1
+
+      - name: Sync Python dependencies
+        run: uv sync --all-packages
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: 'npm'
+          cache-dependency-path: dashboard/package-lock.json
+
+      - name: Install dashboard dependencies
+        working-directory: dashboard
+        run: npm ci
+
+      - name: Install Playwright browsers
+        working-directory: dashboard
+        run: npx playwright install chromium --with-deps
+
+      - name: Build dashboard
+        working-directory: dashboard
+        run: npm run build
+
+      - name: Verify macmon is accessible
+        run: |
+          echo "PATH: $PATH"
+          which macmon || echo "macmon not in PATH"
+          macmon --version
+          # Test macmon actually works - capture stderr too
+          echo "Testing macmon pipe output (with stderr)..."
+          timeout 5 macmon pipe --interval 1000 2>&1 || echo "macmon pipe exit code: $?"
+          # Try running macmon raw (not pipe mode)
+          echo "Testing macmon raw output..."
+          macmon raw 2>&1 | head -5 || echo "macmon raw failed"
+
+      - name: Verify Python can find macmon
+        run: |
+          echo "Testing shutil.which from uv run python..."
+          uv run python -c "import shutil; print('Python shutil.which macmon:', shutil.which('macmon'))"
+
+      - name: Run Playwright tests
+        working-directory: dashboard
+        run: |
+          export PATH="/usr/sbin:/usr/bin:/opt/homebrew/bin:$PATH"
+          echo "Effective PATH: $PATH"
+          which macmon && echo "macmon found at $(which macmon)"
+          npm test
+        env:
+          CI: true
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: dashboard/playwright-report/
+          retention-days: 30
+
+      - name: Upload video recordings
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: test-videos
+          path: dashboard/test-results/
+          retention-days: 30
+
+      - name: Upload snapshot diffs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: snapshot-diffs
+          path: dashboard/tests/**/*-snapshots/*-diff.png
+          retention-days: 30
--- a/.gitignore
+++ b/.gitignore
@@ -29,5 +29,12 @@ dashboard/build/
 dashboard/node_modules/
 dashboard/.svelte-kit/

+# playwright
+dashboard/test-results/
+dashboard/playwright-report/
+dashboard/playwright/.cache/
+dashboard/tests/**/*-snapshots/*-actual.png
+dashboard/tests/**/*-snapshots/*-diff.png
+
 # host config snapshots
 hosts_*.json
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -14,12 +14,13 @@
 				"mode-watcher": "^1.1.0"
 			},
 			"devDependencies": {
+				"@playwright/test": "^1.41.0",
 				"@sveltejs/adapter-static": "^3.0.10",
 				"@sveltejs/kit": "^2.48.4",
 				"@sveltejs/vite-plugin-svelte": "^5.0.0",
 				"@tailwindcss/vite": "^4.0.0",
 				"@types/d3": "^7.4.3",
-				"@types/node": "^22",
+				"@types/node": "^22.19.8",
 				"d3": "^7.9.0",
 				"prettier": "^3.4.2",
 				"prettier-plugin-svelte": "^3.3.3",
@@ -518,6 +519,22 @@
 				"@jridgewell/sourcemap-codec": "^1.4.14"
 			}
 		},
+		"node_modules/@playwright/test": {
+			"version": "1.58.1",
+			"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.1.tgz",
+			"integrity": "sha512-6LdVIUERWxQMmUSSQi0I53GgCBYgM2RpGngCPY7hSeju+VrKjq3lvs7HpJoPbDiY5QM5EYRtRX5fvrinnMAz3w==",
+			"dev": true,
+			"license": "Apache-2.0",
+			"dependencies": {
+				"playwright": "1.58.1"
+			},
+			"bin": {
+				"playwright": "cli.js"
+			},
+			"engines": {
+				"node": ">=18"
+			}
+		},
 		"node_modules/@polka/url": {
 			"version": "1.0.0-next.29",
 			"resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.29.tgz",
@@ -1515,9 +1532,9 @@
 			"license": "MIT"
 		},
 		"node_modules/@types/node": {
-			"version": "22.19.1",
-			"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.1.tgz",
-			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
+			"version": "22.19.8",
+			"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.8.tgz",
+			"integrity": "sha512-ebO/Yl+EAvVe8DnMfi+iaAyIqYdK0q/q0y0rw82INWEKJOBe6b/P3YWE8NW7oOlF/nXFNrHwhARrN/hdgDkraA==",
 			"dev": true,
 			"license": "MIT",
 			"dependencies": {
@@ -2655,6 +2672,53 @@
 				"url": "https://github.com/sponsors/jonschlinkert"
 			}
 		},
+		"node_modules/playwright": {
+			"version": "1.58.1",
+			"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.1.tgz",
+			"integrity": "sha512-+2uTZHxSCcxjvGc5C891LrS1/NlxglGxzrC4seZiVjcYVQfUa87wBL6rTDqzGjuoWNjnBzRqKmF6zRYGMvQUaQ==",
+			"dev": true,
+			"license": "Apache-2.0",
+			"dependencies": {
+				"playwright-core": "1.58.1"
+			},
+			"bin": {
+				"playwright": "cli.js"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"optionalDependencies": {
+				"fsevents": "2.3.2"
+			}
+		},
+		"node_modules/playwright-core": {
+			"version": "1.58.1",
+			"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.1.tgz",
+			"integrity": "sha512-bcWzOaTxcW+VOOGBCQgnaKToLJ65d6AqfLVKEWvexyS3AS6rbXl+xdpYRMGSRBClPvyj44njOWoxjNdL/H9UNg==",
+			"dev": true,
+			"license": "Apache-2.0",
+			"bin": {
+				"playwright-core": "cli.js"
+			},
+			"engines": {
+				"node": ">=18"
+			}
+		},
+		"node_modules/playwright/node_modules/fsevents": {
+			"version": "2.3.2",
+			"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+			"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+			"dev": true,
+			"hasInstallScript": true,
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"darwin"
+			],
+			"engines": {
+				"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+			}
+		},
 		"node_modules/postcss": {
 			"version": "8.5.6",
 			"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
--- a/dashboard/package.json
+++ b/dashboard/package.json
@@ -8,18 +8,23 @@
 		"build": "vite build",
 		"preview": "vite preview",
 		"prepare": "svelte-kit sync || echo ''",
-		"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json"
+		"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
+		"test": "playwright test",
+		"test:e2e": "playwright test tests/e2e",
+		"test:visual": "playwright test tests/visual",
+		"test:update-snapshots": "playwright test tests/visual --update-snapshots"
 	},
 	"devDependencies": {
-		"prettier": "^3.4.2",
-		"prettier-plugin-svelte": "^3.3.3",
+		"@playwright/test": "^1.41.0",
 		"@sveltejs/adapter-static": "^3.0.10",
 		"@sveltejs/kit": "^2.48.4",
 		"@sveltejs/vite-plugin-svelte": "^5.0.0",
 		"@tailwindcss/vite": "^4.0.0",
 		"@types/d3": "^7.4.3",
-		"@types/node": "^22",
+		"@types/node": "^22.19.8",
 		"d3": "^7.9.0",
+		"prettier": "^3.4.2",
+		"prettier-plugin-svelte": "^3.3.3",
 		"svelte": "^5.0.0",
 		"svelte-check": "^4.0.0",
 		"tailwindcss": "^4.0.0",
--- a/dashboard/playwright.config.ts
+++ b/dashboard/playwright.config.ts
@@ -0,0 +1,43 @@
+/// <reference types="node" />
+import { defineConfig, devices } from "@playwright/test";
+
+export default defineConfig({
+  testDir: "./tests",
+  fullyParallel: true,
+  forbidOnly: !!process.env.CI,
+  retries: process.env.CI ? 2 : 0,
+  workers: process.env.CI ? 1 : undefined,
+  reporter: [["html", { open: "never" }], ["list"]],
+  use: {
+    baseURL: "http://localhost:52415",
+    trace: "on-first-retry",
+    video: "on",
+    screenshot: "only-on-failure",
+  },
+  projects: [
+    {
+      name: "chromium",
+      use: { ...devices["Desktop Chrome"] },
+    },
+  ],
+  webServer: {
+    command: "cd .. && uv run exo",
+    url: "http://localhost:52415/node_id",
+    reuseExistingServer: !process.env.CI,
+    timeout: 300000, // 5 minutes - CI needs time to install dependencies
+    env: {
+      ...process.env,
+      // Ensure macmon and system tools are accessible
+      PATH: `/usr/sbin:/usr/bin:/opt/homebrew/bin:${process.env.PATH}`,
+      // Override memory detection for CI (macmon may not work on CI runners)
+      // 24GB is typical for GitHub Actions macos-26 runners
+      ...(process.env.CI ? { OVERRIDE_MEMORY_MB: "24000" } : {}),
+    },
+  },
+  expect: {
+    toHaveScreenshot: {
+      maxDiffPixelRatio: 0.05,
+      threshold: 0.2,
+    },
+  },
+});
--- a/dashboard/src/lib/components/ChatForm.svelte
+++ b/dashboard/src/lib/components/ChatForm.svelte
@@ -407,6 +407,7 @@
          <!-- Custom dropdown -->
          <div class="relative flex-1 max-w-xs">
            <button
+              data-testid="chat-model-selector"
              bind:this={dropdownButtonRef}
              type="button"
              onclick={() => (isModelDropdownOpen = !isModelDropdownOpen)}
@@ -587,6 +588,7 @@
      >

      <textarea
+        data-testid="chat-input"
        bind:this={textareaRef}
        bind:value={message}
        onkeydown={handleKeydown}
@@ -606,6 +608,7 @@
      ></textarea>

      <button
+        data-testid="send-button"
        type="submit"
        disabled={!canSend || loading || isEditOnlyWithoutImage}
        class="px-2.5 sm:px-4 py-1.5 sm:py-2 rounded text-xs sm:text-xs tracking-[0.1em] sm:tracking-[0.15em] uppercase font-medium transition-all duration-200 whitespace-nowrap
--- a/dashboard/src/lib/components/ChatMessages.svelte
+++ b/dashboard/src/lib/components/ChatMessages.svelte
@@ -237,6 +237,9 @@
      class="group flex {message.role === 'user'
        ? 'justify-end'
        : 'justify-start'}"
+      data-testid={message.role === "user"
+        ? "user-message"
+        : "assistant-message"}
    >
      <div
        class={message.role === "user"
--- a/dashboard/src/lib/components/ModelCard.svelte
+++ b/dashboard/src/lib/components/ModelCard.svelte
@@ -977,6 +977,7 @@

    <!-- Launch Button -->
    <button
+      data-testid="launch-button"
      onclick={onLaunch}
      disabled={isLaunching || !canFit}
      class="w-full py-2 text-sm font-mono tracking-wider uppercase border transition-all duration-200
--- a/dashboard/src/routes/+page.svelte
+++ b/dashboard/src/routes/+page.svelte
@@ -1663,12 +1663,14 @@
        <div
          class="flex-1 relative bg-exo-dark-gray/40 rounded-lg overflow-hidden"
        >
-          <TopologyGraph
-            class="w-full h-full"
-            highlightedNodes={highlightedNodes()}
-            filteredNodes={nodeFilter}
-            onNodeClick={togglePreviewNodeFilter}
-          />
+          <div data-testid="topology-graph" class="w-full h-full">
+            <TopologyGraph
+              class="w-full h-full"
+              highlightedNodes={highlightedNodes()}
+              filteredNodes={nodeFilter}
+              onNodeClick={togglePreviewNodeFilter}
+            />
+          </div>

          <!-- Thunderbolt Bridge Cycle Warning -->
          {#if tbBridgeCycles.length > 0}
@@ -1782,12 +1784,14 @@
            class="flex-1 relative bg-exo-dark-gray/40 mx-4 mb-4 rounded-lg overflow-hidden"
          >
            <!-- The main topology graph - full container -->
-            <TopologyGraph
-              class="w-full h-full"
-              highlightedNodes={highlightedNodes()}
-              filteredNodes={nodeFilter}
-              onNodeClick={togglePreviewNodeFilter}
-            />
+            <div data-testid="topology-graph" class="w-full h-full">
+              <TopologyGraph
+                class="w-full h-full"
+                highlightedNodes={highlightedNodes()}
+                filteredNodes={nodeFilter}
+                onNodeClick={togglePreviewNodeFilter}
+              />
+            </div>

            <!-- Thunderbolt Bridge Cycle Warning -->
            {#if tbBridgeCycles.length > 0}
@@ -2363,6 +2367,7 @@
            <!-- Model Dropdown (Custom) -->
            <div class="flex-shrink-0 mb-3 relative">
              <button
+                data-testid="model-dropdown"
                type="button"
                onclick={() => (isModelDropdownOpen = !isModelDropdownOpen)}
                class="w-full bg-exo-medium-gray/50 border border-exo-yellow/30 rounded pl-3 pr-8 py-2.5 text-sm font-mono text-left tracking-wide cursor-pointer transition-all duration-200 hover:border-exo-yellow/50 focus:outline-none focus:border-exo-yellow/70 {isModelDropdownOpen
@@ -2499,6 +2504,7 @@
                        model.id,
                      )}
                      <button
+                        data-testid="model-option"
                        type="button"
                        onclick={() => {
                          if (modelCanFit) {
@@ -2777,6 +2783,7 @@
                    {#each allPreviews as apiPreview, i}
                      <div
                        role="group"
+                        data-testid="model-card"
                        onmouseenter={() => {
                          if (apiPreview.memory_delta_by_node) {
                            hoveredPreviewNodes = new Set(
--- a/dashboard/tests/e2e/chat-message.spec.ts
+++ b/dashboard/tests/e2e/chat-message.spec.ts
@@ -0,0 +1,68 @@
+import { test, expect } from "@playwright/test";
+import {
+  waitForTopologyLoaded,
+  waitForModelCards,
+  waitForChatReady,
+  waitForAssistantMessage,
+  sendChatMessage,
+  selectModelFromLaunchDropdown,
+} from "../helpers/wait-for-ready";
+
+test.describe("Chat Message", () => {
+  test("should send a message and receive a response", async ({ page }) => {
+    // Increase timeout for this test since it involves model loading and inference
+    test.setTimeout(600000); // 10 minutes
+
+    await page.goto("/");
+    await waitForTopologyLoaded(page);
+
+    // First select the model from the dropdown (model cards appear after selection)
+    await selectModelFromLaunchDropdown(page, /qwen.*0\.6b/i);
+
+    // Now wait for model cards to appear
+    await waitForModelCards(page);
+
+    // Find and click on the model card (should already be filtered to Qwen)
+    const modelCard = page.locator('[data-testid="model-card"]').first();
+    await expect(modelCard).toBeVisible({ timeout: 10000 });
+
+    // Click the launch button
+    const launchButton = modelCard.locator('[data-testid="launch-button"]');
+    await launchButton.click();
+
+    // Wait for the model to be ready (may take time to download)
+    await expect(
+      page
+        .locator('[data-testid="instance-status"]')
+        .filter({ hasText: /READY/i })
+        .first(),
+    ).toBeVisible({ timeout: 300000 }); // 5 minutes for download
+
+    // Wait for chat to be ready
+    await waitForChatReady(page);
+
+    // Select the model in the chat selector if needed
+    const modelSelector = page.locator('[data-testid="chat-model-selector"]');
+    if (await modelSelector.isVisible()) {
+      await modelSelector.click();
+      await page.locator("text=/qwen.*0\\.6b/i").first().click();
+    }
+
+    // Send a simple message
+    await sendChatMessage(page, "What is 2+2?");
+
+    // Wait for assistant response
+    await waitForAssistantMessage(page, 120000); // 2 minutes for inference
+
+    // Verify the assistant message is visible
+    const assistantMessage = page
+      .locator('[data-testid="assistant-message"]')
+      .last();
+    await expect(assistantMessage).toBeVisible();
+
+    // The response should contain something (not empty)
+    const messageContent = await assistantMessage.textContent();
+    expect(messageContent).toBeTruthy();
+    expect(messageContent!.length).toBeGreaterThan(0);
+  });
+});
--- a/dashboard/tests/e2e/launch-instance.spec.ts
+++ b/dashboard/tests/e2e/launch-instance.spec.ts
@@ -0,0 +1,36 @@
+import { test, expect } from "@playwright/test";
+import {
+  waitForTopologyLoaded,
+  waitForModelCards,
+  selectModelFromLaunchDropdown,
+} from "../helpers/wait-for-ready";
+
+test.describe("Launch Instance", () => {
+  test("should launch Qwen3-0.6B-4bit model", async ({ page }) => {
+    await page.goto("/");
+    await waitForTopologyLoaded(page);
+
+    // First select the model from the dropdown (model cards appear after selection)
+    await selectModelFromLaunchDropdown(page, /qwen.*0\.6b/i);
+
+    // Now wait for model cards to appear
+    await waitForModelCards(page);
+
+    // Find and click on the model card (should already be filtered to Qwen)
+    const modelCard = page.locator('[data-testid="model-card"]').first();
+    await expect(modelCard).toBeVisible({ timeout: 10000 });
+
+    // Click the launch button
+    const launchButton = modelCard.locator('[data-testid="launch-button"]');
+    await launchButton.click();
+
+    // Wait for the model to start (status should change to READY or show download progress)
+    // The model may need to download first, so we wait with a longer timeout
+    await expect(
+      page
+        .locator('[data-testid="instance-status"]')
+        .filter({ hasText: /READY|downloading/i })
+        .first(),
+    ).toBeVisible({ timeout: 300000 }); // 5 minutes for download
+  });
+});
--- a/dashboard/tests/helpers/wait-for-ready.ts
+++ b/dashboard/tests/helpers/wait-for-ready.ts
@@ -0,0 +1,117 @@
+import { expect, type Page } from "@playwright/test";
+
+const BASE_URL = "http://localhost:52415";
+
+export async function waitForApiReady(
+  page: Page,
+  timeoutMs = 30000,
+): Promise<void> {
+  const startTime = Date.now();
+  while (Date.now() - startTime < timeoutMs) {
+    try {
+      const response = await page.request.get(`${BASE_URL}/node_id`);
+      if (response.ok()) {
+        return;
+      }
+    } catch {
+      // API not ready yet, continue polling
+    }
+    await page.waitForTimeout(500);
+  }
+  throw new Error(`API did not become ready within ${timeoutMs}ms`);
+}
+
+export async function waitForTopologyLoaded(page: Page): Promise<void> {
+  await expect(page.locator('[data-testid="topology-graph"]')).toBeVisible({
+    timeout: 30000,
+  });
+}
+
+export async function waitForModelCards(page: Page): Promise<void> {
+  await expect(page.locator('[data-testid="model-card"]').first()).toBeVisible({
+    timeout: 30000,
+  });
+}
+
+export async function selectModelFromLaunchDropdown(
+  page: Page,
+  modelPattern: RegExp | string,
+): Promise<void> {
+  // Click the model dropdown in the Launch Instance panel
+  const dropdown = page.locator('button:has-text("SELECT MODEL")');
+  await expect(dropdown).toBeVisible({ timeout: 30000 });
+  await dropdown.click();
+
+  // Wait for dropdown menu to appear and select the model
+  const modelOption = page.locator("button").filter({ hasText: modelPattern });
+  await expect(modelOption.first()).toBeVisible({ timeout: 10000 });
+  await modelOption.first().click();
+}
+
+export async function waitForChatReady(page: Page): Promise<void> {
+  await expect(page.locator('[data-testid="chat-input"]')).toBeVisible({
+    timeout: 10000,
+  });
+  await expect(page.locator('[data-testid="send-button"]')).toBeVisible({
+    timeout: 10000,
+  });
+}
+
+export async function waitForAssistantMessage(
+  page: Page,
+  timeoutMs = 60000,
+): Promise<void> {
+  await expect(
+    page.locator('[data-testid="assistant-message"]').last(),
+  ).toBeVisible({ timeout: timeoutMs });
+}
+
+export async function waitForStreamingComplete(
+  page: Page,
+  timeoutMs = 120000,
+): Promise<void> {
+  const startTime = Date.now();
+  while (Date.now() - startTime < timeoutMs) {
+    const sendButton = page.locator('[data-testid="send-button"]');
+    const buttonText = await sendButton.textContent();
+    if (
+      buttonText &&
+      !buttonText.includes("PROCESSING") &&
+      !buttonText.includes("...")
+    ) {
+      return;
+    }
+    await page.waitForTimeout(500);
+  }
+  throw new Error(`Streaming did not complete within ${timeoutMs}ms`);
+}
+
+export async function selectModel(
+  page: Page,
+  modelName: string,
+): Promise<void> {
+  const modelSelector = page.locator('[data-testid="chat-model-selector"]');
+  await modelSelector.click();
+  await page.locator(`text=${modelName}`).click();
+}
+
+export async function sendChatMessage(
+  page: Page,
+  message: string,
+): Promise<void> {
+  const chatInput = page.locator('[data-testid="chat-input"]');
+  await chatInput.fill(message);
+  const sendButton = page.locator('[data-testid="send-button"]');
+  await sendButton.click();
+}
+
+export async function launchModel(
+  page: Page,
+  modelCardIndex = 0,
+): Promise<void> {
+  const modelCards = page.locator('[data-testid="model-card"]');
+  const launchButton = modelCards
+    .nth(modelCardIndex)
+    .locator('[data-testid="launch-button"]');
+  await launchButton.click();
+}
--- a/dashboard/tests/visual/chat-interface.spec.ts
+++ b/dashboard/tests/visual/chat-interface.spec.ts
@@ -0,0 +1,26 @@
+import { test, expect } from "@playwright/test";
+import { waitForTopologyLoaded } from "../helpers/wait-for-ready";
+
+test.describe("Chat Interface", () => {
+  test("should display chat input and send button", async ({ page }) => {
+    await page.goto("/");
+    await waitForTopologyLoaded(page);
+
+    const chatInput = page.locator('[data-testid="chat-input"]');
+    await expect(chatInput).toBeVisible();
+
+    const sendButton = page.locator('[data-testid="send-button"]');
+    await expect(sendButton).toBeVisible();
+  });
+
+  test("should allow typing in chat input", async ({ page }) => {
+    await page.goto("/");
+    await waitForTopologyLoaded(page);
+
+    const chatInput = page.locator('[data-testid="chat-input"]');
+    await expect(chatInput).toBeVisible();
+
+    await chatInput.fill("Test message");
+    await expect(chatInput).toHaveValue("Test message");
+  });
+});
--- a/dashboard/tests/visual/homepage.spec.ts
+++ b/dashboard/tests/visual/homepage.spec.ts
@@ -0,0 +1,16 @@
+import { test, expect } from "@playwright/test";
+import { waitForTopologyLoaded } from "../helpers/wait-for-ready";
+
+test.describe("Homepage", () => {
+  test("should load and display key elements", async ({ page }) => {
+    await page.goto("/");
+    await waitForTopologyLoaded(page);
+
+    // Verify key UI elements are present
+    await expect(
+      page.locator('[data-testid="topology-graph"]').first(),
+    ).toBeVisible();
+    await expect(page.locator('[data-testid="chat-input"]')).toBeVisible();
+    await expect(page.locator('[data-testid="send-button"]')).toBeVisible();
+  });
+});
--- a/src/exo/master/adapters/chat_completions.py
+++ b/src/exo/master/adapters/chat_completions.py
@@ -66,7 +66,9 @@ def chat_request_to_text_generation(

    return TextGenerationTaskParams(
        model=request.model,
-        input=input_messages if input_messages else "",
+        input=input_messages
+        if input_messages
+        else [InputMessage(role="user", content="")],
        instructions=instructions,
        max_output_tokens=request.max_tokens,
        temperature=request.temperature,
--- a/src/exo/master/adapters/claude.py
+++ b/src/exo/master/adapters/claude.py
@@ -141,7 +141,9 @@ def claude_request_to_text_generation(

    return TextGenerationTaskParams(
        model=request.model,
-        input=input_messages if input_messages else "",
+        input=input_messages
+        if input_messages
+        else [InputMessage(role="user", content="")],
        instructions=instructions,
        max_output_tokens=request.max_tokens,
        temperature=request.temperature,
--- a/src/exo/master/adapters/responses.py
+++ b/src/exo/master/adapters/responses.py
@@ -43,10 +43,10 @@ def _extract_content(content: str | list[ResponseContentPart]) -> str:
 def responses_request_to_text_generation(
    request: ResponsesRequest,
 ) -> TextGenerationTaskParams:
-    input_value: str | list[InputMessage]
+    input_value: list[InputMessage]
    built_chat_template: list[dict[str, Any]] | None = None
    if isinstance(request.input, str):
-        input_value = request.input
+        input_value = [InputMessage(role="user", content=request.input)]
    else:
        input_messages: list[InputMessage] = []
        chat_template_messages: list[dict[str, Any]] = []
@@ -95,7 +95,11 @@ def responses_request_to_text_generation(
                    }
                )

-        input_value = input_messages if input_messages else ""
+        input_value = (
+            input_messages
+            if input_messages
+            else [InputMessage(role="user", content="")]
+        )
        built_chat_template = chat_template_messages if chat_template_messages else None

    return TextGenerationTaskParams(
--- a/src/exo/master/tests/test_master.py
+++ b/src/exo/master/tests/test_master.py
@@ -28,7 +28,7 @@ from exo.shared.types.profiling import (
 )
 from exo.shared.types.tasks import TaskStatus
 from exo.shared.types.tasks import TextGeneration as TextGenerationTask
-from exo.shared.types.text_generation import TextGenerationTaskParams
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.instances import (
    InstanceMeta,
    MlxRingInstance,
@@ -136,7 +136,9 @@ async def test_master():
                        command_id=CommandId(),
                        task_params=TextGenerationTaskParams(
                            model=ModelId("llama-3.2-1b"),
-                            input="Hello, how are you?",
+                            input=[
+                                InputMessage(role="user", content="Hello, how are you?")
+                            ],
                        ),
                    )
                ),
@@ -189,7 +191,7 @@ async def test_master():
        assert isinstance(events[2].event.task, TextGenerationTask)
        assert events[2].event.task.task_params == TextGenerationTaskParams(
            model=ModelId("llama-3.2-1b"),
-            input="Hello, how are you?",
+            input=[InputMessage(role="user", content="Hello, how are you?")],
        )

        await master.shutdown()
--- a/src/exo/shared/types/text_generation.py
+++ b/src/exo/shared/types/text_generation.py
@@ -28,7 +28,7 @@ class TextGenerationTaskParams(BaseModel, frozen=True):
    """

    model: ModelId
-    input: str | list[InputMessage]
+    input: list[InputMessage]
    instructions: str | None = None
    max_output_tokens: int | None = None
    temperature: float | None = None
--- a/src/exo/worker/engines/mlx/generator/generate.py
+++ b/src/exo/worker/engines/mlx/generator/generate.py
@@ -17,7 +17,7 @@ from exo.shared.types.api import (
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
 from exo.shared.types.mlx import KVCacheType
-from exo.shared.types.text_generation import TextGenerationTaskParams
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.runner_response import (
    GenerationResponse,
 )
@@ -100,7 +100,7 @@ def warmup_inference(
        tokenizer=tokenizer,
        task_params=TextGenerationTaskParams(
            model=ModelId(""),
-            input=content,
+            input=[InputMessage(role="user", content=content)],
        ),
    )

--- a/src/exo/worker/engines/mlx/utils_mlx.py
+++ b/src/exo/worker/engines/mlx/utils_mlx.py
@@ -436,16 +436,11 @@ def apply_chat_template(
            )

        # Convert input to messages
-        if isinstance(task_params.input, str):
-            # Simple string input becomes a single user message
-            formatted_messages.append({"role": "user", "content": task_params.input})
-        else:
-            # List of InputMessage
-            for msg in task_params.input:
-                if not msg.content:
-                    logger.warning("Received message with empty content, skipping")
-                    continue
-                formatted_messages.append({"role": msg.role, "content": msg.content})
+        for msg in task_params.input:
+            if not msg.content:
+                logger.warning("Received message with empty content, skipping")
+                continue
+            formatted_messages.append({"role": msg.role, "content": msg.content})

    prompt: str = tokenizer.apply_chat_template(
        formatted_messages,
--- a/src/exo/worker/runner/runner.py
+++ b/src/exo/worker/runner/runner.py
@@ -918,15 +918,10 @@ def _check_for_debug_prompts(task_params: TextGenerationTaskParams) -> None:

    Extracts the first user input text and checks for debug triggers.
    """
-    prompt: str
-    if isinstance(task_params.input, str):
-        prompt = task_params.input
-    else:
-        # List of InputMessage - get first message content
-        if len(task_params.input) == 0:
-            logger.debug("Empty message list in debug prompt check")
-            return
-        prompt = task_params.input[0].content
+    if len(task_params.input) == 0:
+        logger.debug("Empty message list in debug prompt check")
+        return
+    prompt = task_params.input[0].content

    if not prompt:
        return
--- a/src/exo/worker/tests/unittests/test_mlx/conftest.py
+++ b/src/exo/worker/tests/unittests/test_mlx/conftest.py
@@ -14,7 +14,7 @@ from exo.shared.constants import EXO_MODELS_DIR
 from exo.shared.models.model_cards import ModelCard, ModelTask
 from exo.shared.types.common import ModelId
 from exo.shared.types.memory import Memory
-from exo.shared.types.text_generation import TextGenerationTaskParams
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.shards import PipelineShardMetadata, TensorShardMetadata
 from exo.worker.engines.mlx import Model
 from exo.worker.engines.mlx.generator.generate import mlx_generate
@@ -114,7 +114,7 @@ def run_gpt_oss_pipeline_device(

        task = TextGenerationTaskParams(
            model=DEFAULT_GPT_OSS_MODEL_ID,
-            input=prompt_text,
+            input=[InputMessage(role="user", content=prompt_text)],
            max_output_tokens=max_tokens,
        )

@@ -182,7 +182,7 @@ def run_gpt_oss_tensor_parallel_device(

        task = TextGenerationTaskParams(
            model=DEFAULT_GPT_OSS_MODEL_ID,
-            input=prompt_text,
+            input=[InputMessage(role="user", content=prompt_text)],
            max_output_tokens=max_tokens,
        )

--- a/src/exo/worker/tests/unittests/test_plan/test_task_forwarding.py
+++ b/src/exo/worker/tests/unittests/test_plan/test_task_forwarding.py
@@ -2,7 +2,7 @@ from typing import cast

 import exo.worker.plan as plan_mod
 from exo.shared.types.tasks import Task, TaskId, TaskStatus, TextGeneration
-from exo.shared.types.text_generation import TextGenerationTaskParams
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.instances import BoundInstance, InstanceId
 from exo.shared.types.worker.runners import (
    RunnerIdle,
@@ -59,7 +59,9 @@ def test_plan_forwards_pending_chat_completion_when_runner_ready():
        instance_id=INSTANCE_1_ID,
        task_status=TaskStatus.Pending,
        command_id=COMMAND_1_ID,
-        task_params=TextGenerationTaskParams(model=MODEL_A_ID, input=""),
+        task_params=TextGenerationTaskParams(
+            model=MODEL_A_ID, input=[InputMessage(role="user", content="")]
+        ),
    )

    result = plan_mod.plan(
@@ -106,7 +108,9 @@ def test_plan_does_not_forward_chat_completion_if_any_runner_not_ready():
        instance_id=INSTANCE_1_ID,
        task_status=TaskStatus.Pending,
        command_id=COMMAND_1_ID,
-        task_params=TextGenerationTaskParams(model=MODEL_A_ID, input=""),
+        task_params=TextGenerationTaskParams(
+            model=MODEL_A_ID, input=[InputMessage(role="user", content="")]
+        ),
    )

    result = plan_mod.plan(
@@ -150,7 +154,9 @@ def test_plan_does_not_forward_tasks_for_other_instances():
        instance_id=other_instance_id,
        task_status=TaskStatus.Pending,
        command_id=COMMAND_1_ID,
-        task_params=TextGenerationTaskParams(model=MODEL_A_ID, input=""),
+        task_params=TextGenerationTaskParams(
+            model=MODEL_A_ID, input=[InputMessage(role="user", content="")]
+        ),
    )

    result = plan_mod.plan(
@@ -198,7 +204,9 @@ def test_plan_ignores_non_pending_or_non_chat_tasks():
        instance_id=INSTANCE_1_ID,
        task_status=TaskStatus.Complete,
        command_id=COMMAND_1_ID,
-        task_params=TextGenerationTaskParams(model=MODEL_A_ID, input=""),
+        task_params=TextGenerationTaskParams(
+            model=MODEL_A_ID, input=[InputMessage(role="user", content="")]
+        ),
    )

    other_task_id = TaskId("other-task")
--- a/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
+++ b/src/exo/worker/tests/unittests/test_runner/test_event_ordering.py
@@ -22,7 +22,7 @@ from exo.shared.types.tasks import (
    TaskStatus,
    TextGeneration,
 )
-from exo.shared.types.text_generation import TextGenerationTaskParams
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.runner_response import GenerationResponse
 from exo.shared.types.worker.runners import (
    RunnerConnected,
@@ -86,7 +86,7 @@ SHUTDOWN_TASK = Shutdown(

 CHAT_PARAMS = TextGenerationTaskParams(
    model=MODEL_A_ID,
-    input="hello",
+    input=[InputMessage(role="user", content="hello")],
    stream=True,
    max_output_tokens=4,
    temperature=0.0,
--- a/tests/headless_runner.py
+++ b/tests/headless_runner.py
@@ -23,7 +23,7 @@ from exo.shared.types.tasks import (
    Task,
    TextGeneration,
 )
-from exo.shared.types.text_generation import TextGenerationTaskParams
+from exo.shared.types.text_generation import InputMessage, TextGenerationTaskParams
 from exo.shared.types.worker.instances import (
    BoundInstance,
    Instance,
@@ -196,7 +196,11 @@ async def execute_test(test: Tests, instance: Instance, hn: str) -> list[Event]:
                task_params=TextGenerationTaskParams(
                    model=test.model_id,
                    instructions="You are a helpful assistant",
-                    input="What is the capital of France?",
+                    input=[
+                        InputMessage(
+                            role="user", content="What is the capital of France?"
+                        )
+                    ],
                ),
                command_id=CommandId("yo"),
                instance_id=iid,
Author	SHA1	Message	Date
Sami Khan	d611f55332	testing macmon	2026-02-04 15:13:58 +05:00
Sami Khan	66174b6509	test macmon	2026-02-04 12:10:04 +05:00
Sami Khan	7a2abfa0ed	test override	2026-02-04 11:25:32 +05:00
Sami Khan	5aea62c8ef	fix test flow	2026-02-04 10:37:34 +05:00
Sami Khan	32ce382445	fix path	2026-02-04 10:08:25 +05:00
Sami Khan	a4c42993e0	networksetup fix	2026-02-04 09:45:38 +05:00
Sami Khan	38d03ce1fa	macmon in path	2026-02-04 09:32:04 +05:00
Sami Khan	ad0b1a2ce9	Add macmon to CI and restore E2E tests for model launch and chat	2026-02-04 09:13:33 +05:00
Sami Khan	6f7c9000cf	Simplify to basic UI element tests (no snapshots)	2026-02-04 07:50:17 +05:00
Sami Khan	c9ff05f012	Simplify to basic UI element tests (no snapshots)	2026-02-04 07:44:29 +05:00
Sami Khan	164f8fb38c	Remove E2E tests, keep only visual snapshots for CI	2026-02-04 07:31:34 +05:00
Sami Khan	698eb9ad17	Skip model-launch tests in CI	2026-02-04 07:11:59 +05:00
Sami Khan	2ef29eeb5f	Fix CI: add uv sync step to dashboard tests	2026-02-04 06:04:29 +05:00
Sami Khan	e847bbd675	Fix CI: pre-install Python deps, increase timeout, add @types/node	2026-02-04 05:28:03 +05:00
Sami Khan	8f1ca88e5d	remove mock tests	2026-02-04 01:23:12 +05:00
Sami Khan	075c5c545e	Add dashboard Playwright tests with CI	2026-02-04 01:02:39 +05:00
Alex Cheema	acb97127bf	Normalize TextGenerationTaskParams.input to list[InputMessage] (#1360 ) ## Motivation With the addition of the Responses API, we introduced `str \| list[InputMessage]` as the type for `TextGenerationTaskParams.input` since the Responses API supports sending input as a plain string. But there was no reason to leak that flexibility past the API adapter boundary — it just meant every downstream consumer had to do `if isinstance(messages, str):` checks, adding complexity for no benefit. ## Changes - Changed `TextGenerationTaskParams.input` from `str \| list[InputMessage]` to `list[InputMessage]` - Each API adapter (Chat Completions, Claude Messages, Responses) now normalizes to `list[InputMessage]` at the boundary - Removed `isinstance(task_params.input, str)` branches in `utils_mlx.py` and `runner.py` - Wrapped string inputs in `[InputMessage(role="user", content=...)]` in the warmup path and all test files ## Why It Works The API adapters are the only place where we deal with raw user input formats. By normalizing there, all downstream code (worker, runner, MLX engine) can just assume `list[InputMessage]` and skip the type-checking branches. The type system (`basedpyright`) catches any missed call sites at compile time. ## Test Plan ### Automated Testing - `uv run basedpyright` — 0 errors - `uv run ruff check` — passes - `nix fmt` — applied - `uv run pytest` — 174 passed, 1 skipped Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-03 06:01:56 -08:00