diff --git a/core/http/react-ui/src/pages/NodeBackendLogs.jsx b/core/http/react-ui/src/pages/NodeBackendLogs.jsx
index 4110713df..58e798233 100644
--- a/core/http/react-ui/src/pages/NodeBackendLogs.jsx
+++ b/core/http/react-ui/src/pages/NodeBackendLogs.jsx
@@ -1,5 +1,5 @@
 import { useState, useEffect, useCallback, useRef, useMemo } from 'react'
-import { useParams, useOutletContext, Link } from 'react-router-dom'
+import { useParams, useOutletContext, Link, useNavigate } from 'react-router-dom'
 import { nodesApi } from '../utils/api'
 import { formatTimestamp } from '../utils/format'
 import { apiUrl } from '../utils/basePath'
@@ -19,6 +19,16 @@ export default function NodeBackendLogs() {
   const { nodeId, modelId: rawModelId } = useParams()
   const modelId = decodeURIComponent(rawModelId || '')
   const { addToast } = useOutletContext()
+  const navigate = useNavigate()
+
+  // The route param can be a bare model name ("qwen3-0.6b") OR a per-replica
+  // process key ("qwen3-0.6b#0"). The worker's BackendLogStore treats them
+  // differently — bare = aggregate across replicas, suffixed = exact replica.
+  // Surface that distinction so operators know what they're looking at.
+  const replicaSepIdx = modelId.indexOf('#')
+  const baseModelName = replicaSepIdx >= 0 ? modelId.slice(0, replicaSepIdx) : modelId
+  const replicaIndex = replicaSepIdx >= 0 ? parseInt(modelId.slice(replicaSepIdx + 1), 10) : null
+  const isMerged = replicaIndex === null
 
   const [lines, setLines] = useState([])
   const [loading, setLoading] = useState(true)
@@ -27,6 +37,10 @@ export default function NodeBackendLogs() {
   const [showDetails, setShowDetails] = useState(true)
   const [wsConnected, setWsConnected] = useState(false)
   const [nodeName, setNodeName] = useState('')
+  // Replicas of this base model on this node — drives whether the
+  // merged-vs-replica toggle is rendered. Single-replica deployments
+  // never see the toggle (no decision to make).
+  const [replicas, setReplicas] = useState([])
   const logContainerRef = useRef(null)
   const wsRef = useRef(null)
   const reconnectTimerRef = useRef(null)
@@ -43,6 +57,22 @@ export default function NodeBackendLogs() {
     }
   }, [nodeId])
 
+  // Fetch the replica list for this base model on this node so we know
+  // whether to render the merged-vs-replica toggle. Cheap query; runs once
+  // per (nodeId, baseModelName) change.
+  useEffect(() => {
+    if (!nodeId || !baseModelName) return
+    nodesApi.getModels(nodeId)
+      .then(arr => {
+        const reps = (Array.isArray(arr) ? arr : [])
+          .filter(m => m.model_name === baseModelName)
+          .map(m => m.replica_index ?? 0)
+          .sort((a, b) => a - b)
+        setReplicas(reps)
+      })
+      .catch(() => setReplicas([]))
+  }, [nodeId, baseModelName])
+
   // Auto-scroll to bottom when new lines arrive
   useEffect(() => {
     if (autoScroll && logContainerRef.current) {
@@ -139,13 +169,54 @@ export default function NodeBackendLogs() {
     )
   }
 
+  // Show the merged/per-replica toggle only when this model has > 1 replica
+  // on this node. Single-replica deployments don't see a control they can't
+  // meaningfully use.
+  const showReplicaToggle = replicas.length > 1
+
   return (
     <div className="page page--wide">
       <div className="page-header">
         <div>
           <h1 className="page-title" style={{ marginBottom: 0 }}>
             <i className="fas fa-terminal" style={{ fontSize: '0.8em', marginRight: 'var(--spacing-sm)' }} />
-            {modelId}
+            {baseModelName}
+            {!isMerged && (
+              <span
+                className="cell-mono"
+                style={{
+                  marginLeft: 'var(--spacing-sm)',
+                  fontSize: '0.6875rem',
+                  fontWeight: 500,
+                  padding: '2px 8px',
+                  borderRadius: 'var(--radius-sm)',
+                  background: 'var(--color-bg-tertiary)',
+                  border: '1px solid var(--color-border-subtle)',
+                  color: 'var(--color-text-secondary)',
+                  verticalAlign: 'middle',
+                }}
+              >
+                replica {replicaIndex}
+              </span>
+            )}
+            {isMerged && replicas.length > 1 && (
+              <span
+                className="cell-mono"
+                style={{
+                  marginLeft: 'var(--spacing-sm)',
+                  fontSize: '0.6875rem',
+                  fontWeight: 500,
+                  padding: '2px 8px',
+                  borderRadius: 'var(--radius-sm)',
+                  background: 'var(--color-bg-tertiary)',
+                  border: '1px solid var(--color-border-subtle)',
+                  color: 'var(--color-text-secondary)',
+                  verticalAlign: 'middle',
+                }}
+              >
+                merged · {replicas.length} replicas
+              </span>
+            )}
           </h1>
           <p className="page-subtitle" style={{ marginTop: 'var(--spacing-xs)' }}>
             Backend logs from node <strong>{nodeName || nodeId}</strong>
@@ -154,6 +225,33 @@ export default function NodeBackendLogs() {
         </div>
       </div>
 
+      {showReplicaToggle && (
+        <div role="radiogroup" aria-label="Replica scope" className="segmented" style={{ marginBottom: 'var(--spacing-sm)' }}>
+          {replicas.map(idx => (
+            <button
+              key={idx}
+              type="button"
+              role="radio"
+              aria-checked={replicaIndex === idx}
+              className={`segmented__item${replicaIndex === idx ? ' is-active' : ''}`}
+              onClick={() => navigate(`/app/node-backend-logs/${nodeId}/${encodeURIComponent(baseModelName + '#' + idx)}`)}
+            >
+              Replica {idx}
+            </button>
+          ))}
+          <button
+            type="button"
+            role="radio"
+            aria-checked={isMerged}
+            className={`segmented__item${isMerged ? ' is-active' : ''}`}
+            onClick={() => navigate(`/app/node-backend-logs/${nodeId}/${encodeURIComponent(baseModelName)}`)}
+            title="Show an interleaved timeline of all replicas — useful for comparing replica behavior side-by-side"
+          >
+            <i className="fas fa-layer-group" aria-hidden="true" /> All merged
+          </button>
+        </div>
+      )}
+
       {/* Toolbar */}
       <div style={{ display: 'flex', gap: 'var(--spacing-sm)', marginBottom: 'var(--spacing-md)', alignItems: 'center', flexWrap: 'wrap' }}>
         <div style={{ display: 'flex', gap: 2 }}>
diff --git a/core/http/react-ui/src/pages/Nodes.jsx b/core/http/react-ui/src/pages/Nodes.jsx
index 8326d81bc..bc6383b72 100644
--- a/core/http/react-ui/src/pages/Nodes.jsx
+++ b/core/http/react-ui/src/pages/Nodes.jsx
@@ -1198,12 +1198,38 @@ export default function Nodes() {
                                   </tr>
                                 </thead>
                                 <tbody>
-                                  {models.map(m => {
-                                    const stCfg = modelStateConfig[m.state] || modelStateConfig.idle
-                                    return (
-                                      <tr key={m.id || m.model_name}>
+                                  {(() => {
+                                    // Pre-compute per-model replica counts so the disambiguation
+                                    // pill only renders when this node actually hosts >1 replica
+                                    // of the same model. Single-replica deployments stay clean.
+                                    const replicaCounts = {}
+                                    models.forEach(m => { replicaCounts[m.model_name] = (replicaCounts[m.model_name] || 0) + 1 })
+                                    return models.map(m => {
+                                      const stCfg = modelStateConfig[m.state] || modelStateConfig.idle
+                                      const showReplica = (replicaCounts[m.model_name] || 0) > 1
+                                      // Per-replica process key — what the worker stores logs under and what the
+                                      // store's GetLines/Subscribe match on for replica-scoped filtering.
+                                      const processKey = `${m.model_name}#${m.replica_index ?? 0}`
+                                      return (
+                                      <tr key={m.id || `${m.model_name}#${m.replica_index ?? 0}`}>
                                         <td style={{ fontFamily: 'var(--font-mono)', fontSize: '0.8125rem' }}>
                                           {m.model_name}
+                                          {showReplica && (
+                                            <span
+                                              className="cell-mono"
+                                              aria-label={`replica ${m.replica_index ?? 0}`}
+                                              title={`Replica ${m.replica_index ?? 0} on this node`}
+                                              style={{
+                                                marginLeft: 8, padding: '1px 6px', borderRadius: 'var(--radius-sm)',
+                                                background: 'var(--color-bg-tertiary)',
+                                                border: '1px solid var(--color-border-subtle)',
+                                                fontSize: '0.6875rem', fontWeight: 500,
+                                                color: 'var(--color-text-secondary)',
+                                              }}
+                                            >
+                                              rep {m.replica_index ?? 0}
+                                            </span>
+                                          )}
                                         </td>
                                         <td>
                                           <span style={{
@@ -1222,10 +1248,14 @@ export default function Nodes() {
                                             href="#"
                                             onClick={(e) => {
                                               e.preventDefault()
-                                              navigate(`/app/node-backend-logs/${node.id}/${encodeURIComponent(m.model_name)}`)
+                                              // Send the replica-scoped process key (modelName#replicaIndex).
+                                              // The worker's BackendLogStore returns only this replica's lines
+                                              // when given the full key; a future "merged" toggle in the logs
+                                              // page can navigate to the bare modelName URL to use aggregation.
+                                              navigate(`/app/node-backend-logs/${node.id}/${encodeURIComponent(processKey)}`)
                                             }}
                                             style={{ fontSize: '0.75rem', color: 'var(--color-primary)' }}
-                                            title="View backend logs"
+                                            title={showReplica ? `View backend logs for replica ${m.replica_index ?? 0}` : 'View backend logs'}
                                           >
                                             <i className="fas fa-terminal" />
                                           </a>
@@ -1249,7 +1279,8 @@ export default function Nodes() {
                                         </td>
                                       </tr>
                                     )
-                                  })}
+                                  })
+                                  })()}
                                 </tbody>
                               </table>
                             )}
diff --git a/pkg/model/backend_log_store.go b/pkg/model/backend_log_store.go
index c20b387d8..720460c22 100644
--- a/pkg/model/backend_log_store.go
+++ b/pkg/model/backend_log_store.go
@@ -2,12 +2,19 @@ package model
 
 import (
 	"sort"
+	"strings"
 	"sync"
 	"time"
 
 	"github.com/emirpasic/gods/v2/queues/circularbuffer"
 )
 
+// replicaSeparator separates a model ID from the replica index in the
+// supervisor's process key (e.g. "qwen3-0.6b#0"). Mirrored from the
+// worker's buildProcessKey — duplicated as a constant here to keep this
+// package free of CLI imports.
+const replicaSeparator = "#"
+
 // BackendLogLine represents a single line of output from a backend process.
 type BackendLogLine struct {
 	Timestamp time.Time `json:"timestamp"`
@@ -88,29 +95,79 @@ func (s *BackendLogStore) AppendLine(modelID, stream, text string) {
 }
 
 // GetLines returns a copy of all log lines for a model, or an empty slice.
+//
+// When modelID contains no replica suffix (no `#`), it's treated as a model
+// prefix and the lines from all `modelID#N` replicas are merged in
+// timestamp order. This keeps the existing per-model logs UI working in
+// distributed mode after the worker started using `modelID#replicaIndex`
+// as its process key (multi-replica refactor) — the UI asks for "qwen3-0.6b"
+// and gets the union of all replicas' logs.
+//
+// When modelID contains a `#` (e.g. "qwen3-0.6b#0"), it's treated as an
+// exact process key for per-replica filtering by callers that need it.
 func (s *BackendLogStore) GetLines(modelID string) []BackendLogLine {
 	s.mu.RLock()
-	buf, ok := s.buffers[modelID]
+	exactBuf, exactOK := s.buffers[modelID]
 	s.mu.RUnlock()
-	if !ok {
+
+	// Exact match — single key. Caller knew the full process key.
+	if exactOK {
+		exactBuf.mu.Lock()
+		lines := exactBuf.queue.Values()
+		exactBuf.mu.Unlock()
+		return lines
+	}
+
+	// No exact match: aggregate any replicas if modelID looks like a model prefix.
+	if strings.Contains(modelID, replicaSeparator) {
 		return []BackendLogLine{}
 	}
 
-	buf.mu.Lock()
-	lines := buf.queue.Values()
-	buf.mu.Unlock()
-	return lines
-}
-
-// ListModels returns a sorted list of model IDs that have log buffers.
-func (s *BackendLogStore) ListModels() []string {
+	prefix := modelID + replicaSeparator
+	var matching []*backendLogBuffer
 	s.mu.RLock()
-	models := make([]string, 0, len(s.buffers))
-	for id := range s.buffers {
-		models = append(models, id)
+	for k, b := range s.buffers {
+		if strings.HasPrefix(k, prefix) {
+			matching = append(matching, b)
+		}
 	}
 	s.mu.RUnlock()
 
+	if len(matching) == 0 {
+		return []BackendLogLine{}
+	}
+
+	// Merge the per-replica buffers and sort by timestamp so the operator
+	// sees a single coherent timeline rather than per-replica blocks.
+	var merged []BackendLogLine
+	for _, b := range matching {
+		b.mu.Lock()
+		merged = append(merged, b.queue.Values()...)
+		b.mu.Unlock()
+	}
+	sort.SliceStable(merged, func(i, j int) bool { return merged[i].Timestamp.Before(merged[j].Timestamp) })
+	return merged
+}
+
+// ListModels returns a sorted list of model IDs that have log buffers.
+// Replica suffixes (`#N`) are stripped and the result is deduplicated, so
+// callers see one entry per loaded model regardless of replica count.
+func (s *BackendLogStore) ListModels() []string {
+	s.mu.RLock()
+	seen := make(map[string]struct{}, len(s.buffers))
+	for id := range s.buffers {
+		base := id
+		if i := strings.Index(id, replicaSeparator); i >= 0 {
+			base = id[:i]
+		}
+		seen[base] = struct{}{}
+	}
+	s.mu.RUnlock()
+
+	models := make([]string, 0, len(seen))
+	for id := range seen {
+		models = append(models, id)
+	}
 	sort.Strings(models)
 	return models
 }
@@ -145,23 +202,107 @@ func (s *BackendLogStore) Remove(modelID string) {
 // Subscribe returns a channel that receives new log lines for the given model
 // in real-time, plus an unsubscribe function. The channel has a buffer of 100
 // lines to absorb short bursts without blocking the writer.
+//
+// Like GetLines, a modelID without a `#` separator subscribes to every
+// matching `modelID#N` replica buffer that exists at subscribe time, so the
+// stream merges all replicas. Subscribers are NOT auto-attached to replicas
+// that come up later — callers needing dynamic membership should resubscribe.
 func (s *BackendLogStore) Subscribe(modelID string) (chan BackendLogLine, func()) {
 	ch := make(chan BackendLogLine, 100)
 
-	buf := s.getOrCreateBuffer(modelID)
-	buf.mu.Lock()
-	id := buf.nextSubID
-	buf.nextSubID++
-	buf.subscribers[id] = ch
-	buf.mu.Unlock()
+	// Per-replica caller (full process key) — exact subscription.
+	if strings.Contains(modelID, replicaSeparator) {
+		buf := s.getOrCreateBuffer(modelID)
+		buf.mu.Lock()
+		id := buf.nextSubID
+		buf.nextSubID++
+		buf.subscribers[id] = ch
+		buf.mu.Unlock()
+		unsubscribe := func() {
+			buf.mu.Lock()
+			if _, exists := buf.subscribers[id]; exists {
+				delete(buf.subscribers, id)
+				close(ch)
+			}
+			buf.mu.Unlock()
+		}
+		return ch, unsubscribe
+	}
+
+	// Aggregated caller: subscribe to the bare-modelID buffer (for back-compat
+	// with single-replica writers that still write to the un-suffixed key) AND
+	// to every existing `modelID#N` replica buffer. Each per-buffer subscription
+	// receives lines into its own channel; we fan them in to `ch` here.
+	type subRef struct {
+		buf *backendLogBuffer
+		id  int
+		ch  chan BackendLogLine
+	}
+	var refs []subRef
+
+	subscribe := func(buf *backendLogBuffer) {
+		bufCh := make(chan BackendLogLine, 100)
+		buf.mu.Lock()
+		id := buf.nextSubID
+		buf.nextSubID++
+		buf.subscribers[id] = bufCh
+		buf.mu.Unlock()
+		refs = append(refs, subRef{buf: buf, id: id, ch: bufCh})
+	}
+
+	if buf, ok := func() (*backendLogBuffer, bool) {
+		s.mu.RLock()
+		b, ok := s.buffers[modelID]
+		s.mu.RUnlock()
+		return b, ok
+	}(); ok {
+		subscribe(buf)
+	}
+
+	prefix := modelID + replicaSeparator
+	s.mu.RLock()
+	for k, b := range s.buffers {
+		if strings.HasPrefix(k, prefix) {
+			subscribe(b)
+		}
+	}
+	s.mu.RUnlock()
+
+	// Fan-in goroutine: forward every per-buffer channel into the merged
+	// channel until all source channels close, then close the merged channel.
+	if len(refs) == 0 {
+		// No source buffers yet: still return a channel so callers don't crash;
+		// it'll close on unsubscribe.
+		unsubscribe := func() { close(ch) }
+		return ch, unsubscribe
+	}
+
+	var fanWG sync.WaitGroup
+	closeOnce := sync.OnceFunc(func() { close(ch) })
+	for _, r := range refs {
+		fanWG.Add(1)
+		go func(c chan BackendLogLine) {
+			defer fanWG.Done()
+			for line := range c {
+				select {
+				case ch <- line:
+				default: // drop on slow consumer to match non-aggregated behavior
+				}
+			}
+		}(r.ch)
+	}
+	go func() { fanWG.Wait(); closeOnce() }()
 
 	unsubscribe := func() {
-		buf.mu.Lock()
-		if _, exists := buf.subscribers[id]; exists {
-			delete(buf.subscribers, id)
-			close(ch)
+		for _, r := range refs {
+			r.buf.mu.Lock()
+			if c, exists := r.buf.subscribers[r.id]; exists {
+				delete(r.buf.subscribers, r.id)
+				close(c) // closes the per-buffer source channel; fan-in goroutine exits
+			}
+			r.buf.mu.Unlock()
 		}
-		buf.mu.Unlock()
+		closeOnce()
 	}
 
 	return ch, unsubscribe
diff --git a/pkg/model/backend_log_store_test.go b/pkg/model/backend_log_store_test.go
new file mode 100644
index 000000000..eceff0b30
--- /dev/null
+++ b/pkg/model/backend_log_store_test.go
@@ -0,0 +1,140 @@
+package model
+
+import (
+	"reflect"
+	"sort"
+	"testing"
+	"time"
+)
+
+// TestGetLines_PrefixAggregation pins the multi-replica behavior added when
+// the worker's process key changed from `modelID` to `modelID#replicaIndex`.
+// The frontend still asks for logs of `qwen3-0.6b`, but the actual buffers
+// live under `qwen3-0.6b#0` and `qwen3-0.6b#1` — without aggregation,
+// operators see no logs in distributed mode.
+func TestGetLines_PrefixAggregation(t *testing.T) {
+	s := NewBackendLogStore(100)
+
+	// Two replicas of the same model, plus a different model that should
+	// never leak in. AppendLine timestamps via time.Now(), so add small
+	// sleeps so the merged order is deterministic.
+	s.AppendLine("qwen3-0.6b#0", "stderr", "r0-line-1")
+	time.Sleep(2 * time.Millisecond)
+	s.AppendLine("qwen3-0.6b#1", "stderr", "r1-line-1")
+	time.Sleep(2 * time.Millisecond)
+	s.AppendLine("qwen3-0.6b#0", "stdout", "r0-line-2")
+	time.Sleep(2 * time.Millisecond)
+	s.AppendLine("other-model#0", "stderr", "should-not-appear")
+
+	got := s.GetLines("qwen3-0.6b")
+	var texts []string
+	for _, l := range got {
+		texts = append(texts, l.Text)
+	}
+	want := []string{"r0-line-1", "r1-line-1", "r0-line-2"}
+	if !reflect.DeepEqual(texts, want) {
+		t.Fatalf("aggregated texts = %v, want %v", texts, want)
+	}
+
+	// Per-replica filtering: full process key returns only that replica.
+	r0 := s.GetLines("qwen3-0.6b#0")
+	if len(r0) != 2 {
+		t.Fatalf("replica 0 should have 2 lines, got %d", len(r0))
+	}
+	for _, l := range r0 {
+		if l.Text == "r1-line-1" {
+			t.Fatalf("replica 0 must not include replica 1's lines")
+		}
+	}
+
+	// No matching replica: empty slice (not nil; existing callers rely on len()).
+	if got := s.GetLines("never-loaded-model"); len(got) != 0 {
+		t.Fatalf("unknown model should yield empty slice, got %v", got)
+	}
+}
+
+// TestListModels_DedupReplicas confirms the /v1/backend-logs listing shows
+// one entry per model, not one per replica — operators don't think about
+// replica indexes; they pick a model.
+func TestListModels_DedupReplicas(t *testing.T) {
+	s := NewBackendLogStore(100)
+	s.AppendLine("model-a#0", "stderr", "x")
+	s.AppendLine("model-a#1", "stderr", "y")
+	s.AppendLine("model-b#0", "stderr", "z")
+	s.AppendLine("model-c", "stderr", "no-replica-suffix") // back-compat for non-distributed
+
+	got := s.ListModels()
+	sort.Strings(got)
+	want := []string{"model-a", "model-b", "model-c"}
+	if !reflect.DeepEqual(got, want) {
+		t.Fatalf("ListModels = %v, want %v", got, want)
+	}
+}
+
+// TestSubscribe_AggregatesAcrossReplicas confirms the WebSocket streaming
+// path (the live tail UI) receives lines from every replica when the
+// caller subscribes by bare modelID.
+func TestSubscribe_AggregatesAcrossReplicas(t *testing.T) {
+	s := NewBackendLogStore(100)
+
+	// Pre-create both replica buffers so Subscribe can find them.
+	s.AppendLine("model-a#0", "stderr", "preload-r0")
+	s.AppendLine("model-a#1", "stderr", "preload-r1")
+
+	ch, unsubscribe := s.Subscribe("model-a")
+	defer unsubscribe()
+
+	// Emit one line per replica after subscribing.
+	s.AppendLine("model-a#0", "stderr", "live-r0")
+	s.AppendLine("model-a#1", "stderr", "live-r1")
+	// Different model — must not appear.
+	s.AppendLine("model-b#0", "stderr", "leak-check")
+
+	seen := map[string]bool{}
+	deadline := time.After(500 * time.Millisecond)
+	for len(seen) < 2 {
+		select {
+		case line, ok := <-ch:
+			if !ok {
+				t.Fatalf("subscribe channel closed early; saw %v", seen)
+			}
+			seen[line.Text] = true
+			if line.Text == "leak-check" {
+				t.Fatalf("subscribe leaked a line from a different model")
+			}
+		case <-deadline:
+			t.Fatalf("timed out waiting for fan-in lines; saw %v", seen)
+		}
+	}
+	if !seen["live-r0"] || !seen["live-r1"] {
+		t.Fatalf("missing live lines from replicas: saw %v", seen)
+	}
+}
+
+// TestSubscribe_PerReplicaFilter pins that callers passing the full process
+// key get only that replica — useful for a future per-replica logs view.
+func TestSubscribe_PerReplicaFilter(t *testing.T) {
+	s := NewBackendLogStore(100)
+
+	ch, unsubscribe := s.Subscribe("model-a#0")
+	defer unsubscribe()
+
+	s.AppendLine("model-a#0", "stderr", "wanted")
+	s.AppendLine("model-a#1", "stderr", "unwanted")
+
+	select {
+	case line := <-ch:
+		if line.Text != "wanted" {
+			t.Fatalf("expected line from replica 0, got %q", line.Text)
+		}
+	case <-time.After(500 * time.Millisecond):
+		t.Fatalf("no line received from replica-scoped subscription")
+	}
+
+	// Drain quickly: confirm replica 1 didn't leak in.
+	select {
+	case line := <-ch:
+		t.Fatalf("replica-scoped sub leaked line from replica 1: %q", line.Text)
+	case <-time.After(50 * time.Millisecond):
+	}
+}