diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go
index 6312e3cb9..81f89b927 100644
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -257,10 +257,36 @@ func streamTranscription(c echo.Context, req backend.TranscriptionRequest, ml *m
 			"delta": finalResult.Text,
 		})
 	}
-	_ = writeEvent(map[string]any{
+	// done carries the assembled text plus, when the backend produced them,
+	// per-segment timings, audio duration, and detected language. The OpenAI
+	// streaming spec only specifies `text`; the extra fields are an additive
+	// extension so streaming clients (e.g. notetaker) can build the same
+	// TranscriptionResultSeconds shape they get from the JSON response path
+	// without us forcing them off SSE just to recover segments. Spec-compliant
+	// clients ignore unknown fields.
+	doneEvent := map[string]any{
 		"type": "transcript.text.done",
 		"text": finalResult.Text,
-	})
+	}
+	if finalResult.Language != "" {
+		doneEvent["language"] = finalResult.Language
+	}
+	if finalResult.Duration > 0 {
+		doneEvent["duration"] = finalResult.Duration
+	}
+	if len(finalResult.Segments) > 0 {
+		segs := make([]map[string]any, 0, len(finalResult.Segments))
+		for _, seg := range finalResult.Segments {
+			segs = append(segs, map[string]any{
+				"id":    seg.Id,
+				"start": seg.Start.Seconds(),
+				"end":   seg.End.Seconds(),
+				"text":  seg.Text,
+			})
+		}
+		doneEvent["segments"] = segs
+	}
+	_ = writeEvent(doneEvent)
 	_, _ = fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n")
 	c.Response().Flush()
 	return nil