diff --git a/core/config/meta/registry.go b/core/config/meta/registry.go
index 84fc9afda..b7ffa9290 100644
--- a/core/config/meta/registry.go
+++ b/core/config/meta/registry.go
@@ -457,6 +457,55 @@ func DefaultRegistry() map[string]FieldMetaOverride {
 			Component:   "json-editor",
 			Order:       78,
 		},
+		"pipeline.voice_recognition.enforce": {
+			Section:     "pipeline",
+			Label:       "Voice Gate Enforce",
+			Description: "Whether the gate rejects unauthorized speakers. Enabled (default) drops unauthorized utterances before the LLM. Disabled still resolves and surfaces the speaker (for the conversation.item.speaker event and personalization) but never drops a turn.",
+			Component:   "toggle",
+			Order:       80,
+		},
+		"pipeline.voice_recognition.identity.announce": {
+			Section:     "pipeline",
+			Label:       "Speaker Identity Announce",
+			Description: "Emit a conversation.item.speaker event to the client naming the recognized speaker. When set, identity is resolved on every turn even if 'when' is 'first'.",
+			Component:   "toggle",
+			Order:       81,
+		},
+		"pipeline.voice_recognition.identity.announce_unknown": {
+			Section:     "pipeline",
+			Label:       "Speaker Identity Announce Unknown",
+			Description: "Also emit the conversation.item.speaker event (with matched=false) when no confident match is found. Default only announces on a match.",
+			Component:   "toggle",
+			Order:       82,
+		},
+		"pipeline.voice_recognition.identity.personalize": {
+			Section:     "pipeline",
+			Label:       "Speaker Identity Personalize",
+			Description: "Inform the LLM who is speaking so it can tailor replies. Enables the name and system-note injection below.",
+			Component:   "toggle",
+			Order:       83,
+		},
+		"pipeline.voice_recognition.identity.inject_name": {
+			Section:     "pipeline",
+			Label:       "Speaker Identity Inject Name",
+			Description: "Personalization: set the per-message OpenAI 'name' field on each user turn to the recognized speaker.",
+			Component:   "toggle",
+			Order:       84,
+		},
+		"pipeline.voice_recognition.identity.inject_system_note": {
+			Section:     "pipeline",
+			Label:       "Speaker Identity Inject System Note",
+			Description: "Personalization: append a 'The current speaker is <name>.' note to the system message reflecting the latest speaker.",
+			Component:   "toggle",
+			Order:       85,
+		},
+		"pipeline.voice_recognition.identity.note_unknown": {
+			Section:     "pipeline",
+			Label:       "Speaker Identity Note Unknown",
+			Description: "Personalization: when the speaker is unidentified, append 'The current speaker is unknown.' to the system message so the model can ask who it is talking to.",
+			Component:   "toggle",
+			Order:       86,
+		},
 		"pipeline.max_history_items": {
 			Section:     "pipeline",
 			Label:       "Max History Items",
diff --git a/core/config/model_config.go b/core/config/model_config.go
index 9586beea3..5dbfd2026 100644
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -769,6 +769,13 @@ type PipelineVoiceRecognition struct {
 	Allow VoiceRecognitionAllow `yaml:"allow,omitempty" json:"allow,omitempty"`
 	// References are the authorized reference speakers (verify mode).
 	References []VoiceReference `yaml:"references,omitempty" json:"references,omitempty"`
+	// Enforce controls the authorization gate. A nil value or true rejects
+	// unauthorized speakers (the historical behavior). false resolves the
+	// speaker's identity for surfacing/personalization but never drops a turn.
+	Enforce *bool `yaml:"enforce,omitempty" json:"enforce,omitempty"`
+	// Identity surfaces the recognized speaker to the client and the LLM. It is
+	// independent of Enforce: identity can be surfaced without gating.
+	Identity *VoiceIdentityConfig `yaml:"identity,omitempty" json:"identity,omitempty"`
 }
 
 // @Description VoiceRecognitionAllow filters authorized registry identities.
@@ -785,6 +792,25 @@ type VoiceReference struct {
 	Audio string `yaml:"audio,omitempty" json:"audio,omitempty"`
 }
 
+// @Description VoiceIdentityConfig surfaces the recognized speaker to the realtime
+// client and the LLM. When set, identity is resolved on every turn even if the
+// gate's When is "first" (the gate still authorizes only once).
+type VoiceIdentityConfig struct {
+	// Announce emits a conversation.item.speaker event to the client.
+	Announce bool `yaml:"announce,omitempty" json:"announce,omitempty"`
+	// AnnounceUnknown also emits the event when there is no confident match.
+	AnnounceUnknown bool `yaml:"announce_unknown,omitempty" json:"announce_unknown,omitempty"`
+	// Personalize informs the LLM who is speaking.
+	Personalize bool `yaml:"personalize,omitempty" json:"personalize,omitempty"`
+	// InjectName sets the per-message name field on each user turn.
+	InjectName bool `yaml:"inject_name,omitempty" json:"inject_name,omitempty"`
+	// InjectSystemNote maintains a "current speaker" note in the system message.
+	InjectSystemNote bool `yaml:"inject_system_note,omitempty" json:"inject_system_note,omitempty"`
+	// NoteUnknown adds a "the current speaker is unknown" note (enables the model
+	// to ask who it is talking to).
+	NoteUnknown bool `yaml:"note_unknown,omitempty" json:"note_unknown,omitempty"`
+}
+
 // VoiceGateEnabled reports whether a voice-recognition gate is configured. The
 // mere presence of the block is the intent signal: a present-but-incomplete
 // block (e.g. missing model) must fail closed at construction, not be silently
@@ -793,6 +819,28 @@ func (p Pipeline) VoiceGateEnabled() bool {
 	return p.VoiceRecognition != nil
 }
 
+// EnforceGate reports whether the gate rejects unauthorized speakers. A nil
+// Enforce means "enforce" so existing configs keep gating.
+func (p PipelineVoiceRecognition) EnforceGate() bool {
+	return p.Enforce == nil || *p.Enforce
+}
+
+// IdentityEnabled reports whether the speaker's identity must be resolved for
+// surfacing or personalization.
+func (p PipelineVoiceRecognition) IdentityEnabled() bool {
+	return p.Identity != nil && (p.Identity.Announce || p.Identity.Personalize)
+}
+
+// AnnounceEnabled reports whether to emit the conversation.item.speaker event.
+func (p PipelineVoiceRecognition) AnnounceEnabled() bool {
+	return p.Identity != nil && p.Identity.Announce
+}
+
+// PersonalizeEnabled reports whether to inform the LLM of the speaker.
+func (p PipelineVoiceRecognition) PersonalizeEnabled() bool {
+	return p.Identity != nil && p.Identity.Personalize
+}
+
 // Normalize fills in defaults in place for omitted fields.
 func (v *PipelineVoiceRecognition) Normalize() {
 	if v.Mode == "" {
diff --git a/core/config/voice_gate_test.go b/core/config/voice_gate_test.go
index 5c7782f1c..c0d25bf82 100644
--- a/core/config/voice_gate_test.go
+++ b/core/config/voice_gate_test.go
@@ -70,4 +70,32 @@ var _ = Describe("PipelineVoiceRecognition", func() {
 			Expect((Pipeline{VoiceRecognition: &PipelineVoiceRecognition{}}).VoiceGateEnabled()).To(BeTrue())
 		})
 	})
+
+	Describe("Enforce / Identity helpers", func() {
+		It("treats a nil Enforce as enforcing (backward compatible)", func() {
+			v := PipelineVoiceRecognition{Model: "spk"}
+			Expect(v.EnforceGate()).To(BeTrue())
+		})
+		It("honors an explicit enforce:false", func() {
+			off := false
+			v := PipelineVoiceRecognition{Model: "spk", Enforce: &off}
+			Expect(v.EnforceGate()).To(BeFalse())
+		})
+		It("reports identity disabled when no identity block is set", func() {
+			v := PipelineVoiceRecognition{Model: "spk"}
+			Expect(v.IdentityEnabled()).To(BeFalse())
+			Expect(v.AnnounceEnabled()).To(BeFalse())
+			Expect(v.PersonalizeEnabled()).To(BeFalse())
+		})
+		It("reports identity enabled when announce or personalize is on", func() {
+			v := PipelineVoiceRecognition{Model: "spk", Identity: &VoiceIdentityConfig{Announce: true}}
+			Expect(v.IdentityEnabled()).To(BeTrue())
+			Expect(v.AnnounceEnabled()).To(BeTrue())
+			Expect(v.PersonalizeEnabled()).To(BeFalse())
+
+			v2 := PipelineVoiceRecognition{Model: "spk", Identity: &VoiceIdentityConfig{Personalize: true}}
+			Expect(v2.IdentityEnabled()).To(BeTrue())
+			Expect(v2.PersonalizeEnabled()).To(BeTrue())
+		})
+	})
 })
diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go
index 343ef4c07..8de50e580 100644
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -1311,28 +1311,32 @@ func commitUtterance(ctx context.Context, utt []byte, session *Session, conv *Co
 	// turn wastes only transcription compute, which has no side effects. The
 	// transcript is still emitted to the same peer that sent the audio, which
 	// reveals nothing new to them.
-	type gateOutcome struct {
-		allowed bool
-		matched string
-		reason  string
-		err     error
+	// Resolve the speaker when the gate must authorize this turn, or when identity
+	// surfacing/personalization needs a fresh identity. Identity resolution
+	// ignores the when:first short-circuit (that only skips re-authorization).
+	type resolveOutcome struct {
+		res resolution
+		err error
 	}
-	var gateCh chan gateOutcome
-	runGate := false
+	var resolveCh chan resolveOutcome
+	runResolve := false
 	if session.voiceGate != nil && session.InputAudioTranscription != nil {
-		skip := false
-		if session.voiceGate.cfg.When == config.VoiceGateWhenFirst {
+		enforce := session.voiceGate.cfg.EnforceGate()
+		gateNeedsAuth := enforce
+		if enforce && session.voiceGate.cfg.When == config.VoiceGateWhenFirst {
 			session.gateMu.Lock()
-			skip = session.voiceVerified
+			if session.voiceVerified {
+				gateNeedsAuth = false
+			}
 			session.gateMu.Unlock()
 		}
-		if !skip {
-			runGate = true
-			gateCh = make(chan gateOutcome, 1)
+		if gateNeedsAuth || session.voiceGate.cfg.IdentityEnabled() {
+			runResolve = true
+			resolveCh = make(chan resolveOutcome, 1)
 			wavPath := f.Name()
 			go func() {
-				allowed, matched, reason, gerr := session.voiceGate.Authorize(ctx, wavPath)
-				gateCh <- gateOutcome{allowed: allowed, matched: matched, reason: reason, err: gerr}
+				r, rerr := session.voiceGate.Resolve(ctx, wavPath)
+				resolveCh <- resolveOutcome{res: r, err: rerr}
 			}()
 		}
 	}
@@ -1348,8 +1352,8 @@ func commitUtterance(ctx context.Context, utt []byte, session *Session, conv *Co
 		if err != nil {
 			// Drain the gate goroutine before returning so its in-flight read of
 			// the temp WAV finishes before the deferred os.Remove fires.
-			if runGate {
-				<-gateCh
+			if runResolve {
+				<-resolveCh
 			}
 			sendError(t, "transcription_failed", err.Error(), "", "event_TODO")
 			return
@@ -1361,41 +1365,58 @@ func commitUtterance(ctx context.Context, utt []byte, session *Session, conv *Co
 		return
 	}
 
-	// Join on the gate before any side-effecting step.
-	if runGate {
-		out := <-gateCh
-		allowed := out.allowed
-		reason := out.reason
+	// Join on the resolution before any side-effecting step.
+	var speaker *types.Speaker
+	if runResolve {
+		out := <-resolveCh
+		enforce := session.voiceGate.cfg.EnforceGate()
+
 		if out.err != nil {
-			// Fail closed: a gate that cannot decide must not let audio through.
-			xlog.Error("voice recognition gate error", "error", out.err)
-			allowed = false
-			reason = "verification error"
-		}
-		alreadyVerified := false
-		if session.voiceGate.cfg.When == config.VoiceGateWhenFirst {
-			session.gateMu.Lock()
-			alreadyVerified = session.voiceVerified
-			session.gateMu.Unlock()
-		}
-		proceed, markVerified := session.voiceGate.decide(alreadyVerified, allowed)
-		if !proceed {
-			xlog.Debug("voice recognition gate rejected utterance", "reason", reason)
-			if session.voiceGate.cfg.OnReject == config.VoiceGateRejectEvent {
-				sendError(t, "speaker_not_authorized", "speaker not authorized: "+reason, "", "event_TODO")
+			if enforce {
+				// Fail closed: a gate that cannot decide must not let audio through.
+				xlog.Error("voice recognition gate error", "error", out.err)
+				if session.voiceGate.cfg.OnReject == config.VoiceGateRejectEvent {
+					sendError(t, "speaker_not_authorized", "speaker not authorized: verification error", "", "event_TODO")
+				}
+				return
 			}
-			return
+			// Non-enforcing: degrade to an unknown speaker and continue.
+			xlog.Warn("voice identity resolve failed; continuing as unknown speaker", "error", out.err)
+		} else {
+			s := out.res.speaker
+			speaker = &s
 		}
-		xlog.Debug("voice recognition gate authorized utterance", "speaker", out.matched)
-		if markVerified {
-			session.gateMu.Lock()
-			session.voiceVerified = true
-			session.gateMu.Unlock()
+
+		if enforce {
+			alreadyVerified := false
+			if session.voiceGate.cfg.When == config.VoiceGateWhenFirst {
+				session.gateMu.Lock()
+				alreadyVerified = session.voiceVerified
+				session.gateMu.Unlock()
+			}
+			allowed, reason := false, "verification error"
+			if out.err == nil {
+				allowed, reason = session.voiceGate.authorize(out.res)
+			}
+			proceed, markVerified := session.voiceGate.decide(alreadyVerified, allowed)
+			if !proceed {
+				xlog.Debug("voice recognition gate rejected utterance", "reason", reason)
+				if session.voiceGate.cfg.OnReject == config.VoiceGateRejectEvent {
+					sendError(t, "speaker_not_authorized", "speaker not authorized: "+reason, "", "event_TODO")
+				}
+				return
+			}
+			if markVerified {
+				session.gateMu.Lock()
+				session.voiceVerified = true
+				session.gateMu.Unlock()
+			}
+			xlog.Debug("voice recognition gate authorized utterance", "speaker", out.res.speaker.Name)
 		}
 	}
 
 	if !session.TranscriptionOnly {
-		generateResponse(ctx, session, utt, transcript, conv, t)
+		generateResponse(ctx, session, utt, transcript, speaker, conv, t)
 	}
 }
 
@@ -1419,15 +1440,28 @@ func runVAD(ctx context.Context, session *Session, adata []int16) ([]schema.VADS
 	return resp.Segments, nil
 }
 
+// speakerNote renders the system-prompt note for the current speaker. Returns
+// an empty string when there is no name and unknown notes are disabled.
+func speakerNote(s *types.Speaker, noteUnknown bool) string {
+	if s != nil && s.Matched && s.Name != "" {
+		return "The current speaker is " + s.Name + "."
+	}
+	if noteUnknown {
+		return "The current speaker is unknown."
+	}
+	return ""
+}
+
 // Function to generate a response based on the conversation
-func generateResponse(ctx context.Context, session *Session, utt []byte, transcript string, conv *Conversation, t Transport) {
+func generateResponse(ctx context.Context, session *Session, utt []byte, transcript string, speaker *types.Speaker, conv *Conversation, t Transport) {
 	xlog.Debug("Generating realtime response...")
 
 	// Create user message item
 	item := types.MessageItemUnion{
 		User: &types.MessageItemUser{
-			ID:     generateItemID(),
-			Status: types.ItemStatusCompleted,
+			ID:      generateItemID(),
+			Status:  types.ItemStatusCompleted,
+			Speaker: speaker,
 			Content: []types.MessageContentInput{
 				{
 					Type:       types.MessageContentTypeInputAudio,
@@ -1445,6 +1479,17 @@ func generateResponse(ctx context.Context, session *Session, utt []byte, transcr
 		Item: item,
 	})
 
+	// Surface the recognized speaker to the client. Skip the event for an
+	// unidentified speaker unless announce_unknown is set.
+	if speaker != nil && session.voiceGate != nil && session.voiceGate.cfg.AnnounceEnabled() {
+		if speaker.Matched || session.voiceGate.cfg.Identity.AnnounceUnknown {
+			sendEvent(t, types.ConversationItemSpeakerEvent{
+				ItemID:  item.User.ID,
+				Speaker: *speaker,
+			})
+		}
+	}
+
 	triggerResponse(ctx, session, conv, t, nil)
 }
 
@@ -1508,6 +1553,8 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
 	})
 
 	imgIndex := 0
+	var lastUserSpeaker *types.Speaker
+	personalize := session.voiceGate != nil && session.voiceGate.cfg.PersonalizeEnabled()
 	conv.Lock.Lock()
 	items := trimRealtimeItems(conv.Items, session.MaxHistoryItems)
 	for _, item := range items {
@@ -1515,6 +1562,11 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
 			msg := schema.Message{
 				Role: string(types.MessageRoleUser),
 			}
+			lastUserSpeaker = item.User.Speaker
+			if personalize && session.voiceGate.cfg.Identity.InjectName &&
+				item.User.Speaker != nil && item.User.Speaker.Matched && item.User.Speaker.Name != "" {
+				msg.Name = item.User.Speaker.Name
+			}
 			textContent := ""
 			nrOfImgsInMessage := 0
 			for _, content := range item.User.Content {
@@ -1601,6 +1653,13 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
 	}
 	conv.Lock.Unlock()
 
+	if personalize && session.voiceGate.cfg.Identity.InjectSystemNote {
+		if note := speakerNote(lastUserSpeaker, session.voiceGate.cfg.Identity.NoteUnknown); note != "" {
+			conversationHistory[0].StringContent += "\n\n" + note
+			conversationHistory[0].Content = conversationHistory[0].StringContent
+		}
+	}
+
 	var images []string
 	for _, m := range conversationHistory {
 		images = append(images, m.StringImages...)
diff --git a/core/http/endpoints/openai/realtime_doubles_test.go b/core/http/endpoints/openai/realtime_doubles_test.go
index accd6af51..727ce7dcc 100644
--- a/core/http/endpoints/openai/realtime_doubles_test.go
+++ b/core/http/endpoints/openai/realtime_doubles_test.go
@@ -83,6 +83,8 @@ type fakeModel struct {
 	predictChunkDeltas [][]*proto.ChatDelta
 	predictResp        backend.LLMResponse
 	predictErr         error
+
+	lastMessages schema.Messages
 }
 
 func (m *fakeModel) VAD(context.Context, *schema.VADRequest) (*schema.VADResponse, error) {
@@ -93,7 +95,8 @@ func (m *fakeModel) Transcribe(context.Context, string, string, bool, bool, stri
 	return m.transcribeFinal, nil
 }
 
-func (m *fakeModel) Predict(_ context.Context, _ schema.Messages, _, _, _ []string, cb func(string, backend.TokenUsage) bool, _ []types.ToolUnion, _ *types.ToolChoiceUnion, _, _ *int, _ map[string]float64) (func() (backend.LLMResponse, error), error) {
+func (m *fakeModel) Predict(_ context.Context, msgs schema.Messages, _, _, _ []string, cb func(string, backend.TokenUsage) bool, _ []types.ToolUnion, _ *types.ToolChoiceUnion, _, _ *int, _ map[string]float64) (func() (backend.LLMResponse, error), error) {
+	m.lastMessages = msgs
 	if m.predictErr != nil {
 		return nil, m.predictErr
 	}
diff --git a/core/http/endpoints/openai/realtime_speaker_event_test.go b/core/http/endpoints/openai/realtime_speaker_event_test.go
new file mode 100644
index 000000000..fbbe5ded9
--- /dev/null
+++ b/core/http/endpoints/openai/realtime_speaker_event_test.go
@@ -0,0 +1,54 @@
+package openai
+
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("ConversationItemSpeakerEvent", func() {
+	It("marshals with the conversation.item.speaker type and nested speaker", func() {
+		ev := types.ConversationItemSpeakerEvent{
+			ItemID:  "item_123",
+			Speaker: types.Speaker{Name: "Jeremy", ID: "spk_1", Labels: map[string]string{"family": "yes"}, Confidence: 92, Distance: 0.1, Matched: true},
+		}
+		b, err := json.Marshal(ev)
+		Expect(err).ToNot(HaveOccurred())
+
+		var got map[string]any
+		Expect(json.Unmarshal(b, &got)).To(Succeed())
+		Expect(got["type"]).To(Equal("conversation.item.speaker"))
+		Expect(got["item_id"]).To(Equal("item_123"))
+
+		spk := got["speaker"].(map[string]any)
+		Expect(spk["name"]).To(Equal("Jeremy"))
+		Expect(spk["id"]).To(Equal("spk_1"))
+		Expect(spk["matched"]).To(Equal(true))
+		Expect(spk["labels"]).To(HaveKeyWithValue("family", "yes"))
+	})
+
+	It("omits labels when the speaker has none", func() {
+		ev := types.ConversationItemSpeakerEvent{ItemID: "i", Speaker: types.Speaker{Name: "Jeremy", Matched: true}}
+		b, err := json.Marshal(ev)
+		Expect(err).ToNot(HaveOccurred())
+		var got map[string]any
+		Expect(json.Unmarshal(b, &got)).To(Succeed())
+		spk := got["speaker"].(map[string]any)
+		_, hasLabels := spk["labels"]
+		Expect(hasLabels).To(BeFalse())
+	})
+
+	It("omits the name for an unknown speaker but keeps matched=false", func() {
+		ev := types.ConversationItemSpeakerEvent{ItemID: "i", Speaker: types.Speaker{Matched: false}}
+		b, err := json.Marshal(ev)
+		Expect(err).ToNot(HaveOccurred())
+		var got map[string]any
+		Expect(json.Unmarshal(b, &got)).To(Succeed())
+		spk := got["speaker"].(map[string]any)
+		_, hasName := spk["name"]
+		Expect(hasName).To(BeFalse())
+		Expect(spk["matched"]).To(Equal(false))
+	})
+})
diff --git a/core/http/endpoints/openai/realtime_voicegate.go b/core/http/endpoints/openai/realtime_voicegate.go
index 54332536f..9bd6f10f2 100644
--- a/core/http/endpoints/openai/realtime_voicegate.go
+++ b/core/http/endpoints/openai/realtime_voicegate.go
@@ -7,6 +7,7 @@ import (
 
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
 	"github.com/mudler/LocalAI/core/services/voicerecognition"
 	"github.com/mudler/LocalAI/pkg/model"
 )
@@ -29,6 +30,32 @@ type voiceGate struct {
 	verifyFn func(ctx context.Context, uttWav, refWav string) (bool, error)
 }
 
+// resolution is the outcome of resolving a committed utterance's speaker. It
+// carries the surfacing-facing Speaker plus the metadata the policy layer needs
+// (labels for the allow-list) and a human reason when no usable identity exists.
+type resolution struct {
+	speaker types.Speaker     // name/id/confidence/distance/matched
+	labels  map[string]string // identify-mode metadata labels, for the allow-list
+	found   bool              // a candidate identity existed at all
+	reason  string            // why-unknown / deny reason at the resolve level
+}
+
+// confidence maps a cosine distance to a 0..100 score relative to the match
+// threshold, mirroring the /v1/voice/identify endpoint.
+func confidence(distance, threshold float32) float32 {
+	if threshold <= 0 {
+		return 0
+	}
+	c := (1 - distance/threshold) * 100
+	if c < 0 {
+		return 0
+	}
+	if c > 100 {
+		return 100
+	}
+	return c
+}
+
 // newVoiceGate builds a gate from a pipeline's voice_recognition config. It
 // validates fail-fast (before loading the model), loads the recognition model
 // config, wires the real backend seams, and pre-embeds references for verify
@@ -89,91 +116,143 @@ func newVoiceGate(
 	return g, nil
 }
 
-// Authorize embeds the utterance and decides allow/deny.
-//
-//	allowed: speaker is authorized.
-//	matched: matched person's name (informational), empty if none.
-//	reason:  human-readable deny reason.
-//	err:     backend failure (caller should fail closed).
-func (g *voiceGate) Authorize(ctx context.Context, wavPath string) (allowed bool, matched string, reason string, err error) {
+// Resolve embeds the utterance once and resolves the speaker's identity. It does
+// NOT apply the authorization policy (see authorize). On a backend error it
+// returns the error and a resolution whose reason explains the failure.
+func (g *voiceGate) Resolve(ctx context.Context, wavPath string) (resolution, error) {
 	if g.cfg.Mode == config.VoiceGateModeVerify {
-		return g.authorizeVerify(ctx, wavPath)
+		return g.resolveVerify(ctx, wavPath)
 	}
-	return g.authorizeIdentify(ctx, wavPath)
+	return g.resolveIdentify(ctx, wavPath)
 }
 
-func (g *voiceGate) authorizeIdentify(ctx context.Context, wavPath string) (bool, string, string, error) {
+func (g *voiceGate) resolveIdentify(ctx context.Context, wavPath string) (resolution, error) {
 	emb, err := g.embedFn(ctx, wavPath)
 	if err != nil {
-		return false, "", "embed failed", err
+		return resolution{reason: "embed failed"}, err
 	}
 	if len(emb) == 0 {
-		return false, "", "no speech detected", nil
+		return resolution{reason: "no speech detected"}, nil
 	}
 	matches, err := g.registry.Identify(ctx, emb, 1)
 	if err != nil {
-		return false, "", "identify failed", err
+		return resolution{reason: "identify failed"}, err
 	}
 	if len(matches) == 0 {
-		return false, "", "unknown speaker", nil
+		return resolution{reason: "unknown speaker"}, nil
 	}
 	m := matches[0]
-	if m.Distance > g.cfg.Threshold {
-		return false, m.Metadata.Name, "distance above threshold", nil
+	matched := m.Distance <= g.cfg.Threshold
+	r := resolution{
+		speaker: types.Speaker{
+			Name:       m.Metadata.Name,
+			ID:         m.Metadata.ID,
+			Labels:     m.Metadata.Labels,
+			Distance:   m.Distance,
+			Confidence: confidence(m.Distance, g.cfg.Threshold),
+			Matched:    matched,
+		},
+		labels: m.Metadata.Labels,
+		found:  true,
 	}
-	if !g.allowMatch(m.Metadata) {
-		return false, m.Metadata.Name, "speaker not in allow list", nil
+	if !matched {
+		r.reason = "distance above threshold"
 	}
-	return true, m.Metadata.Name, "", nil
+	return r, nil
+}
+
+func (g *voiceGate) resolveVerify(ctx context.Context, wavPath string) (resolution, error) {
+	if g.cfg.AntiSpoofing {
+		for _, ref := range g.refAudios {
+			ok, err := g.verifyFn(ctx, wavPath, ref.Audio)
+			if err != nil {
+				return resolution{reason: "verify failed"}, err
+			}
+			if ok {
+				return resolution{
+					speaker: types.Speaker{Name: ref.Name, Confidence: 100, Matched: true},
+					found:   true,
+				}, nil
+			}
+		}
+		return resolution{reason: "no reference matched"}, nil
+	}
+
+	emb, err := g.embedFn(ctx, wavPath)
+	if err != nil {
+		return resolution{reason: "embed failed"}, err
+	}
+	if len(emb) == 0 {
+		return resolution{reason: "no speech detected"}, nil
+	}
+	for _, ref := range g.refEmbeds {
+		d := cosineDistance(emb, ref.emb)
+		if d <= g.cfg.Threshold {
+			return resolution{
+				speaker: types.Speaker{Name: ref.name, Distance: d, Confidence: confidence(d, g.cfg.Threshold), Matched: true},
+				found:   true,
+			}, nil
+		}
+	}
+	return resolution{reason: "no reference matched"}, nil
+}
+
+// authorize applies the gate's policy to an already-resolved identity.
+func (g *voiceGate) authorize(r resolution) (allowed bool, reason string) {
+	if g.cfg.Mode == config.VoiceGateModeVerify {
+		if r.speaker.Matched {
+			return true, ""
+		}
+		if r.reason == "" {
+			return false, "no reference matched"
+		}
+		return false, r.reason
+	}
+	if !r.found {
+		return false, r.reason
+	}
+	if !r.speaker.Matched {
+		return false, "distance above threshold"
+	}
+	if !g.allowMatch(r.speaker.Name, r.labels) {
+		return false, "speaker not in allow list"
+	}
+	return true, ""
 }
 
 // allowMatch reports whether a matched identity is authorized. An empty allow
 // (no names and no labels) authorizes any registered speaker.
-func (g *voiceGate) allowMatch(meta voicerecognition.Metadata) bool {
+func (g *voiceGate) allowMatch(name string, labels map[string]string) bool {
 	a := g.cfg.Allow
 	if len(a.Names) == 0 && len(a.Labels) == 0 {
 		return true
 	}
 	for _, n := range a.Names {
-		if n == meta.Name {
+		if n == name {
 			return true
 		}
 	}
 	for _, l := range a.Labels {
-		if _, ok := meta.Labels[l]; ok {
+		if _, ok := labels[l]; ok {
 			return true
 		}
 	}
 	return false
 }
 
-func (g *voiceGate) authorizeVerify(ctx context.Context, wavPath string) (bool, string, string, error) {
-	if g.cfg.AntiSpoofing {
-		for _, r := range g.refAudios {
-			ok, err := g.verifyFn(ctx, wavPath, r.Audio)
-			if err != nil {
-				return false, "", "verify failed", err
-			}
-			if ok {
-				return true, r.Name, "", nil
-			}
-		}
-		return false, "", "no reference matched", nil
+// Authorize is the legacy convenience wrapper: resolve then apply policy.
+//
+//	allowed: speaker is authorized.
+//	matched: matched person's name (informational), empty if none.
+//	reason:  human-readable deny reason.
+//	err:     backend failure (caller should fail closed).
+func (g *voiceGate) Authorize(ctx context.Context, wavPath string) (allowed bool, matched string, reason string, err error) {
+	r, rerr := g.Resolve(ctx, wavPath)
+	if rerr != nil {
+		return false, "", r.reason, rerr
 	}
-
-	emb, err := g.embedFn(ctx, wavPath)
-	if err != nil {
-		return false, "", "embed failed", err
-	}
-	if len(emb) == 0 {
-		return false, "", "no speech detected", nil
-	}
-	for _, r := range g.refEmbeds {
-		if cosineDistance(emb, r.emb) <= g.cfg.Threshold {
-			return true, r.name, "", nil
-		}
-	}
-	return false, "", "no reference matched", nil
+	allowed, reason = g.authorize(r)
+	return allowed, r.speaker.Name, reason, nil
 }
 
 // decide interprets an Authorize result against the gate's when-policy and the
diff --git a/core/http/endpoints/openai/realtime_voicegate_integration_test.go b/core/http/endpoints/openai/realtime_voicegate_integration_test.go
index f8aae72c5..b0f7f0b49 100644
--- a/core/http/endpoints/openai/realtime_voicegate_integration_test.go
+++ b/core/http/endpoints/openai/realtime_voicegate_integration_test.go
@@ -152,3 +152,252 @@ var _ = Describe("realtime voice gate integration (commitUtterance)", func() {
 		Expect(tr2.countEvents(types.ServerEventTypeResponseDone)).To(BeNumerically(">=", 1))
 	})
 })
+
+var _ = Describe("realtime speaker surfacing (commitUtterance)", func() {
+	utt := make([]byte, 32)
+
+	It("emits conversation.item.speaker for a confident match when announce is on", func() {
+		session, _ := itSession(itGate("alice", "alice", []float32{1, 0, 0}, nil,
+			config.VoiceGateWhenEvery, config.VoiceGateRejectEvent))
+		session.voiceGate.cfg.Identity = &config.VoiceIdentityConfig{Announce: true}
+		tr := &fakeTransport{}
+
+		commitUtterance(context.Background(), utt, session, &Conversation{}, tr)
+
+		Expect(tr.countEvents(types.ServerEventTypeConversationItemSpeaker)).To(Equal(1))
+	})
+
+	It("does not emit the speaker event for an unknown speaker unless announce_unknown is set", func() {
+		// match distance above threshold => not matched
+		gate := &voiceGate{
+			cfg: config.PipelineVoiceRecognition{
+				Mode: config.VoiceGateModeIdentify, Threshold: 0.25,
+				When: config.VoiceGateWhenEvery, OnReject: config.VoiceGateRejectEvent,
+				Enforce:  boolPtr(false),
+				Identity: &config.VoiceIdentityConfig{Announce: true},
+			},
+			registry: &fakeRegistry{matches: []voicerecognition.Match{
+				{Distance: 0.9, Metadata: voicerecognition.Metadata{Name: "alice"}},
+			}},
+			embedFn: func(context.Context, string) ([]float32, error) { return []float32{1, 0, 0}, nil },
+		}
+		session, _ := itSession(gate)
+		tr := &fakeTransport{}
+
+		commitUtterance(context.Background(), utt, session, &Conversation{}, tr)
+		Expect(tr.countEvents(types.ServerEventTypeConversationItemSpeaker)).To(Equal(0))
+
+		gate.cfg.Identity.AnnounceUnknown = true
+		tr2 := &fakeTransport{}
+		commitUtterance(context.Background(), utt, session, &Conversation{}, tr2)
+		Expect(tr2.countEvents(types.ServerEventTypeConversationItemSpeaker)).To(Equal(1))
+	})
+
+	It("never drops a turn when enforce is false even for a disallowed speaker", func() {
+		session, _ := itSession(itGate("bob", "alice", []float32{1, 0, 0}, nil,
+			config.VoiceGateWhenEvery, config.VoiceGateRejectEvent))
+		session.voiceGate.cfg.Enforce = boolPtr(false)
+		tr := &fakeTransport{}
+
+		commitUtterance(context.Background(), utt, session, &Conversation{}, tr)
+
+		Expect(hasSpeakerNotAuthorized(tr)).To(BeFalse())
+		Expect(tr.countEvents(types.ServerEventTypeResponseDone)).To(BeNumerically(">=", 1))
+	})
+})
+
+var _ = Describe("realtime speaker personalization (triggerResponseAtTurn)", func() {
+	utt := make([]byte, 32)
+
+	findRole := func(msgs schema.Messages, role string) *schema.Message {
+		for i := range msgs {
+			if msgs[i].Role == role {
+				return &msgs[i]
+			}
+		}
+		return nil
+	}
+
+	It("sets the user message name and a current-speaker system note", func() {
+		session, m := itSession(itGate("alice", "alice", []float32{1, 0, 0}, nil,
+			config.VoiceGateWhenEvery, config.VoiceGateRejectEvent))
+		session.voiceGate.cfg.Identity = &config.VoiceIdentityConfig{
+			Personalize: true, InjectName: true, InjectSystemNote: true,
+		}
+		session.Instructions = "You are helpful."
+		tr := &fakeTransport{}
+
+		commitUtterance(context.Background(), utt, session, &Conversation{}, tr)
+
+		user := findRole(m.lastMessages, "user")
+		Expect(user).ToNot(BeNil())
+		Expect(user.Name).To(Equal("alice"))
+		sys := findRole(m.lastMessages, "system")
+		Expect(sys).ToNot(BeNil())
+		Expect(sys.StringContent).To(ContainSubstring("The current speaker is alice."))
+	})
+
+	It("omits the unknown note unless note_unknown is set", func() {
+		base := func() (*Session, *fakeModel) {
+			gate := &voiceGate{
+				cfg: config.PipelineVoiceRecognition{
+					Mode: config.VoiceGateModeIdentify, Threshold: 0.25,
+					When: config.VoiceGateWhenEvery, OnReject: config.VoiceGateRejectEvent,
+					Enforce:  boolPtr(false),
+					Identity: &config.VoiceIdentityConfig{Personalize: true, InjectSystemNote: true},
+				},
+				registry: &fakeRegistry{matches: []voicerecognition.Match{
+					{Distance: 0.9, Metadata: voicerecognition.Metadata{Name: "alice"}},
+				}},
+				embedFn: func(context.Context, string) ([]float32, error) { return []float32{1, 0, 0}, nil },
+			}
+			s, m := itSession(gate)
+			s.Instructions = "You are helpful."
+			return s, m
+		}
+
+		s1, m1 := base()
+		commitUtterance(context.Background(), utt, s1, &Conversation{}, &fakeTransport{})
+		Expect(findRole(m1.lastMessages, "system").StringContent).ToNot(ContainSubstring("unknown"))
+
+		s2, m2 := base()
+		s2.voiceGate.cfg.Identity.NoteUnknown = true
+		commitUtterance(context.Background(), utt, s2, &Conversation{}, &fakeTransport{})
+		Expect(findRole(m2.lastMessages, "system").StringContent).To(ContainSubstring("The current speaker is unknown."))
+	})
+})
+
+var _ = Describe("realtime when:first with identity (commitUtterance)", func() {
+	utt := make([]byte, 32)
+
+	// statefulIdentityGate builds a when:first identify gate with an Identity
+	// block (so identity is resolved every turn) whose embedFn is driven by a
+	// per-turn counter: the failOnSecond flag makes the second and later embeds
+	// return an error, exercising the stricter fail-closed path on a re-resolve.
+	statefulIdentityGate := func(failOnSecond bool) *voiceGate {
+		calls := 0
+		return &voiceGate{
+			cfg: config.PipelineVoiceRecognition{
+				Mode:      config.VoiceGateModeIdentify,
+				Threshold: 0.25,
+				When:      config.VoiceGateWhenFirst,
+				OnReject:  config.VoiceGateRejectEvent,
+				Allow:     config.VoiceRecognitionAllow{Names: []string{"alice"}},
+				Identity:  &config.VoiceIdentityConfig{Announce: true, Personalize: true, InjectName: true},
+			},
+			registry: &fakeRegistry{matches: []voicerecognition.Match{
+				{Distance: 0.1, Metadata: voicerecognition.Metadata{Name: "alice"}},
+			}},
+			embedFn: func(context.Context, string) ([]float32, error) {
+				calls++
+				if failOnSecond && calls > 1 {
+					return nil, errors.New("embed backend down")
+				}
+				return []float32{1, 0, 0}, nil
+			},
+		}
+	}
+
+	It("re-resolves identity every turn and fails closed when a later embed errors", func() {
+		gate := statefulIdentityGate(true)
+		session, _ := itSession(gate)
+		conv := &Conversation{} // shared so voiceVerified persists across turns
+
+		// Turn 1: authorized; identity resolved, speaker surfaced, response runs.
+		tr1 := &fakeTransport{}
+		commitUtterance(context.Background(), utt, session, conv, tr1)
+		Expect(hasSpeakerNotAuthorized(tr1)).To(BeFalse())
+		Expect(tr1.countEvents(types.ServerEventTypeConversationItemSpeaker)).To(Equal(1))
+		Expect(tr1.countEvents(types.ServerEventTypeResponseDone)).To(BeNumerically(">=", 1))
+
+		// Turn 2: when:first would skip re-authorization, but the Identity block
+		// forces a fresh resolve. That resolve now errors, and because the gate
+		// enforces, the turn is dropped fail-closed rather than riding on the
+		// cached first verification.
+		tr2 := &fakeTransport{}
+		commitUtterance(context.Background(), utt, session, conv, tr2)
+		Expect(hasSpeakerNotAuthorized(tr2)).To(BeTrue())
+		Expect(tr2.countEvents(types.ServerEventTypeResponseDone)).To(Equal(0))
+	})
+
+	It("re-resolves identity every turn so a later turn still surfaces and names the speaker", func() {
+		gate := statefulIdentityGate(false)
+		session, m := itSession(gate)
+		conv := &Conversation{}
+
+		tr1 := &fakeTransport{}
+		commitUtterance(context.Background(), utt, session, conv, tr1)
+		Expect(hasSpeakerNotAuthorized(tr1)).To(BeFalse())
+		Expect(tr1.countEvents(types.ServerEventTypeResponseDone)).To(BeNumerically(">=", 1))
+
+		// Turn 2: authorization is skipped (when:first, already verified) but the
+		// speaker event still fires and the per-message name is set, proving the
+		// per-turn re-resolution (not the cached first verification) drove it.
+		tr2 := &fakeTransport{}
+		commitUtterance(context.Background(), utt, session, conv, tr2)
+		Expect(tr2.countEvents(types.ServerEventTypeConversationItemSpeaker)).To(Equal(1))
+		var lastUser *schema.Message
+		for i := range m.lastMessages {
+			if m.lastMessages[i].Role == "user" {
+				lastUser = &m.lastMessages[i]
+			}
+		}
+		Expect(lastUser).ToNot(BeNil())
+		Expect(lastUser.Name).To(Equal("alice"))
+	})
+})
+
+var _ = Describe("realtime multi-speaker history attribution (triggerResponse)", func() {
+	userAudioItem := func(name, transcript string) *types.MessageItemUnion {
+		return &types.MessageItemUnion{
+			User: &types.MessageItemUser{
+				ID:      generateItemID(),
+				Status:  types.ItemStatusCompleted,
+				Speaker: &types.Speaker{Name: name, Matched: true},
+				Content: []types.MessageContentInput{
+					{Type: types.MessageContentTypeInputAudio, Transcript: transcript},
+				},
+			},
+		}
+	}
+
+	It("attributes each user turn to its own speaker and notes the latest one", func() {
+		session, m := itSession(itGate("alice", "alice", []float32{1, 0, 0}, nil,
+			config.VoiceGateWhenEvery, config.VoiceGateRejectEvent))
+		session.Instructions = "You are helpful."
+		session.MaxHistoryItems = 10 // keep both items; 0 would mean "no trim" too
+		session.voiceGate.cfg.Identity = &config.VoiceIdentityConfig{
+			Personalize: true, InjectName: true, InjectSystemNote: true,
+		}
+
+		conv := &Conversation{Items: []*types.MessageItemUnion{
+			userAudioItem("alice", "hello there"),
+			userAudioItem("bob", "what is the weather"),
+		}}
+		tr := &fakeTransport{}
+
+		triggerResponse(context.Background(), session, conv, tr, nil)
+
+		var users []*schema.Message
+		var sys *schema.Message
+		for i := range m.lastMessages {
+			switch m.lastMessages[i].Role {
+			case "user":
+				users = append(users, &m.lastMessages[i])
+			case "system":
+				if sys == nil {
+					sys = &m.lastMessages[i]
+				}
+			}
+		}
+		Expect(users).To(HaveLen(2))
+		Expect(users[0].Name).To(Equal("alice"))
+		Expect(users[1].Name).To(Equal("bob"))
+
+		Expect(sys).ToNot(BeNil())
+		Expect(sys.StringContent).To(ContainSubstring("The current speaker is bob."))
+		Expect(sys.StringContent).ToNot(ContainSubstring("alice"))
+	})
+})
+
+func boolPtr(b bool) *bool { return &b }
diff --git a/core/http/endpoints/openai/realtime_voicegate_test.go b/core/http/endpoints/openai/realtime_voicegate_test.go
index bdbbc2f4a..3d9b458e1 100644
--- a/core/http/endpoints/openai/realtime_voicegate_test.go
+++ b/core/http/endpoints/openai/realtime_voicegate_test.go
@@ -10,6 +10,82 @@ import (
 	. "github.com/onsi/gomega"
 )
 
+var _ = Describe("voiceGate.Resolve + authorize", func() {
+	mkGate := func(allow []string) *voiceGate {
+		return &voiceGate{
+			cfg: config.PipelineVoiceRecognition{
+				Mode:      config.VoiceGateModeIdentify,
+				Threshold: 0.25,
+				Allow:     config.VoiceRecognitionAllow{Names: allow},
+			},
+			registry: &fakeRegistry{matches: []voicerecognition.Match{
+				{Distance: 0.1, Metadata: voicerecognition.Metadata{ID: "spk_1", Name: "alice", Labels: map[string]string{"family": "yes"}}},
+			}},
+			embedFn: func(context.Context, string) ([]float32, error) { return []float32{1, 0, 0}, nil },
+		}
+	}
+
+	It("resolves a confident identity with name, id and a 0..100 confidence", func() {
+		r, err := mkGate(nil).Resolve(context.Background(), "x.wav")
+		Expect(err).ToNot(HaveOccurred())
+		Expect(r.found).To(BeTrue())
+		Expect(r.speaker.Name).To(Equal("alice"))
+		Expect(r.speaker.ID).To(Equal("spk_1"))
+		Expect(r.speaker.Matched).To(BeTrue())
+		Expect(r.speaker.Confidence).To(BeNumerically(">", 0))
+		Expect(r.speaker.Confidence).To(BeNumerically("<=", 100))
+		Expect(r.speaker.Labels).To(HaveKeyWithValue("family", "yes"))
+	})
+
+	It("marks a candidate above the threshold as not matched", func() {
+		g := mkGate(nil)
+		g.registry = &fakeRegistry{matches: []voicerecognition.Match{
+			{Distance: 0.9, Metadata: voicerecognition.Metadata{Name: "alice"}},
+		}}
+		r, err := g.Resolve(context.Background(), "x.wav")
+		Expect(err).ToNot(HaveOccurred())
+		Expect(r.found).To(BeTrue())
+		Expect(r.speaker.Matched).To(BeFalse())
+		Expect(r.speaker.Name).To(Equal("alice")) // name still surfaced
+	})
+
+	It("authorize allows a confident match in the allow list", func() {
+		g := mkGate([]string{"alice"})
+		r, _ := g.Resolve(context.Background(), "x.wav")
+		allowed, reason := g.authorize(r)
+		Expect(allowed).To(BeTrue())
+		Expect(reason).To(BeEmpty())
+	})
+
+	It("authorize denies a confident match outside the allow list", func() {
+		g := mkGate([]string{"bob"})
+		r, _ := g.Resolve(context.Background(), "x.wav")
+		allowed, reason := g.authorize(r)
+		Expect(allowed).To(BeFalse())
+		Expect(reason).To(Equal("speaker not in allow list"))
+	})
+
+	It("authorize allows by label when names do not match", func() {
+		g := mkGate(nil)
+		g.cfg.Allow = config.VoiceRecognitionAllow{Labels: []string{"family"}}
+		r, _ := g.Resolve(context.Background(), "x.wav")
+		allowed, _ := g.authorize(r)
+		Expect(allowed).To(BeTrue())
+	})
+})
+
+var _ = Describe("confidence", func() {
+	It("is 100 at zero distance", func() {
+		Expect(confidence(0, 0.25)).To(BeNumerically("~", 100, 1e-4))
+	})
+	It("clamps to 0 above the threshold", func() {
+		Expect(confidence(0.5, 0.25)).To(BeNumerically("~", 0, 1e-4))
+	})
+	It("is 0 for a non-positive threshold", func() {
+		Expect(confidence(0.1, 0)).To(BeNumerically("~", 0, 1e-4))
+	})
+})
+
 var _ = Describe("cosineDistance", func() {
 	It("is 0 for identical vectors", func() {
 		Expect(cosineDistance([]float32{1, 0, 0}, []float32{1, 0, 0})).To(BeNumerically("~", 0, 1e-6))
diff --git a/core/http/endpoints/openai/types/message_item.go b/core/http/endpoints/openai/types/message_item.go
index 52997fe8c..88d680648 100644
--- a/core/http/endpoints/openai/types/message_item.go
+++ b/core/http/endpoints/openai/types/message_item.go
@@ -102,6 +102,10 @@ type MessageItemUser struct {
 
 	// The status of the item. Has no effect on the conversation.
 	Status ItemStatus `json:"status,omitempty"`
+
+	// Speaker is the recognized speaker for this audio turn (LocalAI extension).
+	// Used to attribute past turns when rebuilding the LLM message history.
+	Speaker *Speaker `json:"speaker,omitempty"`
 }
 
 func (m MessageItemUser) MessageItemType() MessageItemType {
diff --git a/core/http/endpoints/openai/types/server_events.go b/core/http/endpoints/openai/types/server_events.go
index bae680fd5..8183a8b78 100644
--- a/core/http/endpoints/openai/types/server_events.go
+++ b/core/http/endpoints/openai/types/server_events.go
@@ -20,6 +20,9 @@ const (
 	ServerEventTypeConversationItemInputAudioTranscriptionFailed    ServerEventType = "conversation.item.input_audio_transcription.failed"
 	ServerEventTypeConversationItemTruncated                        ServerEventType = "conversation.item.truncated"
 	ServerEventTypeConversationItemDeleted                          ServerEventType = "conversation.item.deleted"
+	// ServerEventTypeConversationItemSpeaker is a LocalAI extension: it reports
+	// the recognized speaker for a user audio item. OpenAI clients ignore it.
+	ServerEventTypeConversationItemSpeaker ServerEventType = "conversation.item.speaker"
 	ServerEventTypeInputAudioBufferCommitted                        ServerEventType = "input_audio_buffer.committed"
 	ServerEventTypeInputAudioBufferCleared                          ServerEventType = "input_audio_buffer.cleared"
 	ServerEventTypeInputAudioBufferSpeechStarted                    ServerEventType = "input_audio_buffer.speech_started"
@@ -335,6 +338,33 @@ func (m ConversationItemAddedEvent) MarshalJSON() ([]byte, error) {
 	return json.Marshal(shadow)
 }
 
+// ConversationItemSpeakerEvent reports the recognized speaker for a user audio
+// item. LocalAI extension; not part of the OpenAI Realtime API.
+type ConversationItemSpeakerEvent struct {
+	ServerEventBase
+	// ItemID is the conversation item this speaker belongs to.
+	ItemID string `json:"item_id"`
+	// Speaker is the recognized identity.
+	Speaker Speaker `json:"speaker"`
+}
+
+func (m ConversationItemSpeakerEvent) ServerEventType() ServerEventType {
+	return ServerEventTypeConversationItemSpeaker
+}
+
+func (m ConversationItemSpeakerEvent) MarshalJSON() ([]byte, error) {
+	type typeAlias ConversationItemSpeakerEvent
+	type typeWrapper struct {
+		typeAlias
+		Type ServerEventType `json:"type"`
+	}
+	shadow := typeWrapper{
+		typeAlias: typeAlias(m),
+		Type:      m.ServerEventType(),
+	}
+	return json.Marshal(shadow)
+}
+
 // Returned when a conversation item is finalized.
 //
 // The event will include the full content of the Item except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if needed.
diff --git a/core/http/endpoints/openai/types/speaker.go b/core/http/endpoints/openai/types/speaker.go
new file mode 100644
index 000000000..a5b02c927
--- /dev/null
+++ b/core/http/endpoints/openai/types/speaker.go
@@ -0,0 +1,14 @@
+package types
+
+// Speaker is the recognized speaker for a committed audio turn. It is a LocalAI
+// extension to the OpenAI Realtime schema, carried on the user conversation item
+// and surfaced via the conversation.item.speaker event. Confidence is a 0..100
+// score relative to the match threshold (same formula as /v1/voice/identify).
+type Speaker struct {
+	Name       string            `json:"name,omitempty"`
+	ID         string            `json:"id,omitempty"`
+	Labels     map[string]string `json:"labels,omitempty"`
+	Confidence float32           `json:"confidence"`
+	Distance   float32           `json:"distance"`
+	Matched    bool              `json:"matched"`
+}
diff --git a/docs/content/features/openai-realtime.md b/docs/content/features/openai-realtime.md
index 51f8960bf..48cfc9332 100644
--- a/docs/content/features/openai-realtime.md
+++ b/docs/content/features/openai-realtime.md
@@ -141,6 +141,8 @@ The API follows the OpenAI Realtime API protocol for handling sessions, audio bu
 
 A pipeline realtime model can require speaker verification before it responds. Add a `voice_recognition` block under `pipeline`. When present, each committed utterance is verified against authorized speakers; unauthorized utterances are dropped before the LLM runs (no LLM call, no tool execution, no TTS). The session stays open.
 
+The same block also drives two optional, independent behaviors: an authorization gate (`enforce`) and speaker surfacing/personalization (`identity`). Set `enforce: false` to keep recognizing the speaker without ever rejecting a turn.
+
 ```yaml
 name: my-realtime
 pipeline:
@@ -152,6 +154,7 @@ pipeline:
     model: speaker-recognition   # the speaker-recognition backend model
     mode: identify               # "identify" (registry) or "verify" (references)
     threshold: 0.25              # cosine distance; <= passes
+    enforce: true                # authorization gate (default true)
     when: every                  # "every" (default) or "first"
     on_reject: drop_event        # "drop_event" (default) or "drop_silent"
     anti_spoofing: false         # optional liveness check (verify mode)
@@ -170,19 +173,73 @@ pipeline:
         audio: /models/voices/bob.wav
 ```
 
+### Identifying speakers without gating
+
+To recognize who is speaking and surface it to the client and the LLM without ever rejecting a turn, set `enforce: false` and add an `identity` block. The `identity` block works with or without the gate; when it is set, the speaker is resolved on every turn even if `when: first`.
+
+```yaml
+name: my-realtime
+pipeline:
+  vad: silero-vad
+  transcription: whisper
+  llm: qwen
+  tts: kokoro
+  voice_recognition:
+    model: speaker-recognition
+    mode: identify
+    threshold: 0.25
+    # Authorization gate. Defaults to enforcing (rejects unauthorized speakers).
+    # Set enforce:false to identify the speaker WITHOUT rejecting anyone.
+    enforce: false
+    when: every
+    # Surface the recognized speaker to the client and the LLM. Works with or
+    # without enforce; when set, identity is resolved on every turn even if
+    # when:first.
+    identity:
+      announce: true            # emit the conversation.item.speaker event
+      announce_unknown: false   # also emit it when there is no confident match
+      personalize: true         # tell the LLM who is speaking
+      inject_name: true         # set the per-message OpenAI name field
+      inject_system_note: true  # append a "current speaker" line to the system message
+      note_unknown: false       # append a "speaker is unknown" note when unidentified
+```
+
 | Field | Meaning |
 |-------|---------|
 | `model` | Speaker-recognition backend model name. |
 | `mode` | `identify` matches against speakers registered via `/v1/voice/register`; `verify` matches against the `references` audios. |
 | `threshold` | Maximum cosine distance that still counts as a match (default ~0.25). |
-| `when` | `every` verifies each utterance; `first` verifies once then trusts the session. |
+| `enforce` | Authorization gate. `true` (or omitted) rejects unauthorized speakers (the gating behavior above). `false` resolves and surfaces the speaker without ever dropping a turn. |
+| `when` | `every` verifies each utterance; `first` verifies once then trusts the session. When an `identity` block is set, the speaker is still resolved on every turn even with `first`. |
 | `on_reject` | `drop_event` drops and emits a `speaker_not_authorized` error event; `drop_silent` drops quietly. |
 | `anti_spoofing` | Verify mode only: runs the backend liveness check (slower). |
 | `allow.names` / `allow.labels` | identify mode: which registry identities are authorized. Empty = any registered speaker. |
 | `references` | verify mode: authorized reference speakers; the utterance passes if it matches any. |
+| `identity.announce` | Emit the `conversation.item.speaker` event to the client (see below). |
+| `identity.announce_unknown` | Also emit that event when there is no confident match. By default the event is emitted only on a match. |
+| `identity.personalize` | Inform the LLM who is speaking. |
+| `identity.inject_name` | Set the per-message OpenAI `name` field on each user turn. |
+| `identity.inject_system_note` | Append a `The current speaker is <Name>.` line to the system message. |
+| `identity.note_unknown` | When unidentified, append `The current speaker is unknown.` (lets the model ask who it is talking to). |
 
 `identify` mode requires the voice registry (speakers registered through `/v1/voice/register`). `verify` mode needs no registry: reference audios are embedded once at model load.
 
+### The `conversation.item.speaker` event
+
+When `identity.announce` is enabled, the server emits a `conversation.item.speaker` event after the user conversation item, naming the recognized speaker:
+
+```json
+{
+  "type": "conversation.item.speaker",
+  "item_id": "item_abc",
+  "speaker": { "name": "Jeremy", "id": "spk_1", "labels": { "role": "owner" }, "confidence": 92.0, "distance": 0.1, "matched": true }
+}
+```
+
+`confidence` is a 0-100 score, `distance` is the cosine distance, and `matched` is `true` when a confident match was found. `labels` carries any labels attached to the registered speaker (identify mode); it is omitted when the speaker has none. The `name` and `id` fields are omitted when empty. By default the event is emitted only on a match; set `identity.announce_unknown: true` to also emit it (with `matched: false`) when no speaker is identified.
+
+This event is a LocalAI extension to the OpenAI Realtime API and is server-emitted only. Standard OpenAI Realtime clients ignore event types they do not recognize, so enabling it is non-breaking.
+
 ## Examples
 
 - [Realtime voice assistant demo (Go)](https://github.com/localai-org/localai-realtime-demo): a minimal Go client for the Realtime (WebSocket) API with a full talk-back voice loop and an example tool call. Ships a `docker compose` setup that brings up a realtime-capable LocalAI for you.
diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go
index 72ac88b74..5a257bdb0 100644
--- a/tests/e2e/e2e_suite_test.go
+++ b/tests/e2e/e2e_suite_test.go
@@ -275,6 +275,40 @@ var _ = BeforeSuite(func() {
 	Expect(err).ToNot(HaveOccurred())
 	Expect(os.WriteFile(filepath.Join(modelsPath, "realtime-pipeline-gated.yaml"), gatedData, 0644)).To(Succeed())
 
+	// Identity-surfacing pipeline: the same speaker backend, but enforce:false
+	// (never drop a turn) plus an identity block so the server emits the
+	// conversation.item.speaker event and personalizes the LLM turn. Used by the
+	// speaker-identity e2e specs.
+	identityCfg := map[string]any{
+		"name": "realtime-pipeline-identity",
+		"pipeline": map[string]any{
+			"vad":           "mock-vad",
+			"transcription": "mock-stt",
+			"llm":           "mock-llm",
+			"tts":           "mock-tts",
+			"voice_recognition": map[string]any{
+				"model":     "mock-speaker",
+				"mode":      "verify",
+				"threshold": 0.25,
+				"when":      "every",
+				"enforce":   false,
+				"references": []map[string]any{
+					{"name": "e2e-speaker", "audio": voiceRefPath},
+				},
+				"identity": map[string]any{
+					"announce":           true,
+					"announce_unknown":   true,
+					"personalize":        true,
+					"inject_name":        true,
+					"inject_system_note": true,
+				},
+			},
+		},
+	}
+	identityData, err := yaml.Marshal(identityCfg)
+	Expect(err).ToNot(HaveOccurred())
+	Expect(os.WriteFile(filepath.Join(modelsPath, "realtime-pipeline-identity.yaml"), identityData, 0644)).To(Succeed())
+
 	// Router model setup: a score classifier (mock-backend Score) selects
 	// between two candidate chat models based on keyword matches against the
 	// candidate label fragments. Exercises the full RouteModel middleware path
diff --git a/tests/e2e/realtime_speaker_identity_test.go b/tests/e2e/realtime_speaker_identity_test.go
new file mode 100644
index 000000000..41a15f0fb
--- /dev/null
+++ b/tests/e2e/realtime_speaker_identity_test.go
@@ -0,0 +1,95 @@
+package e2e_test
+
+import (
+	"encoding/base64"
+	"time"
+
+	"github.com/gorilla/websocket"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+// These specs drive the speaker-identity surfacing end to end against a real
+// LocalAI server over a real WebSocket, using the mock backend's VoiceEmbed
+// (DC-biased PCM -> one of two orthogonal speaker vectors). The pipeline is
+// realtime-pipeline-identity: verify mode with enforce:false plus an identity
+// block, so the server resolves the speaker, emits a conversation.item.speaker
+// event, and never drops a turn.
+var _ = Describe("Realtime speaker identity surfacing", Label("Realtime"), func() {
+	// open connects to the identity pipeline and disables server VAD so the
+	// test can commit the input buffer manually.
+	open := func() *websocket.Conn {
+		c := connectWS("realtime-pipeline-identity")
+		created := readServerEvent(c, 30*time.Second)
+		Expect(created["type"]).To(Equal("session.created"))
+		sendClientEvent(c, disableVADEvent())
+		drainUntil(c, "session.updated", 10*time.Second)
+		return c
+	}
+
+	commit := func(c *websocket.Conn, pcm []byte) {
+		sendClientEvent(c, map[string]any{
+			"type":  "input_audio_buffer.append",
+			"audio": base64.StdEncoding.EncodeToString(pcm),
+		})
+		sendClientEvent(c, map[string]any{"type": "input_audio_buffer.commit"})
+	}
+
+	// collectUntilDone reads events until response.done (or timeout), returning
+	// the conversation.item.speaker event (nil if none) and whether the turn
+	// reached response.done.
+	collectUntilDone := func(c *websocket.Conn, timeout time.Duration) (speaker map[string]any, gotDone bool) {
+		deadline := time.Now().Add(timeout)
+		for time.Now().Before(deadline) {
+			evt := readServerEvent(c, time.Until(deadline))
+			switch evt["type"] {
+			case "conversation.item.speaker":
+				speaker = evt
+			case "response.done":
+				return speaker, true
+			}
+		}
+		return speaker, false
+	}
+
+	It("emits conversation.item.speaker naming an authorized speaker and still responds", func() {
+		c := open()
+		defer func() { _ = c.Close() }()
+
+		// Positive DC bias matches the enrolled reference speaker.
+		commit(c, pcmWithDC(300, 16000, 1000, 8000))
+		drainUntil(c, "input_audio_buffer.committed", 30*time.Second)
+
+		speaker, gotDone := collectUntilDone(c, 60*time.Second)
+		Expect(speaker).ToNot(BeNil(), "expected a conversation.item.speaker event")
+		Expect(speaker["item_id"]).ToNot(BeEmpty())
+
+		spk, ok := speaker["speaker"].(map[string]any)
+		Expect(ok).To(BeTrue(), "speaker payload should be an object")
+		Expect(spk["matched"]).To(Equal(true))
+		Expect(spk["name"]).To(Equal("e2e-speaker"))
+
+		Expect(gotDone).To(BeTrue(), "enforce:false should let the turn reach response.done")
+	})
+
+	It("emits an unknown speaker event and still responds when enforce is false", func() {
+		c := open()
+		defer func() { _ = c.Close() }()
+
+		// Negative DC bias is a different speaker that matches no reference.
+		commit(c, pcmWithDC(300, 16000, 1000, -8000))
+		drainUntil(c, "input_audio_buffer.committed", 30*time.Second)
+
+		speaker, gotDone := collectUntilDone(c, 60*time.Second)
+		Expect(speaker).ToNot(BeNil(), "announce_unknown should still emit the event")
+
+		spk, ok := speaker["speaker"].(map[string]any)
+		Expect(ok).To(BeTrue(), "speaker payload should be an object")
+		Expect(spk["matched"]).To(Equal(false))
+		// name is omitted for an unidentified speaker.
+		_, hasName := spk["name"]
+		Expect(hasName).To(BeFalse())
+
+		Expect(gotDone).To(BeTrue(), "enforce:false must not drop an unauthorized speaker")
+	})
+})