diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go
index 3c7a5b477..b34ee031e 100644
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -36,6 +36,10 @@ const (
 	// XXX: Presently it seems all ASR/VAD backends use 16Khz. If a backend uses 24Khz then it will likely still work, but have reduced performance
 	localSampleRate         = 16000
 	defaultRemoteSampleRate = 24000
+	// Maximum audio buffer size in bytes (100MB) to prevent memory exhaustion
+	maxAudioBufferSize = 100 * 1024 * 1024
+	// Maximum WebSocket message size in bytes (10MB) to prevent DoS attacks
+	maxWebSocketMessageSize = 10 * 1024 * 1024
 )
 
 // A model can be "emulated" that is: transcribe audio to text -> feed text to the LLM -> generate audio as result
@@ -170,6 +174,9 @@ func Realtime(application *application.Application) echo.HandlerFunc {
 		}
 		defer ws.Close()
 
+		// Set maximum message size to prevent DoS attacks
+		ws.SetReadLimit(maxWebSocketMessageSize)
+
 		// Extract query parameters from Echo context before passing to websocket handler
 		model := c.QueryParam("model")
 
@@ -373,8 +380,17 @@ func registerRealtime(application *application.Application, model string) func(c
 					continue
 				}
 
-				// Append to InputAudioBuffer
+				// Check buffer size limits before appending
 				session.AudioBufferLock.Lock()
+				newSize := len(session.InputAudioBuffer) + len(decodedAudio)
+				if newSize > maxAudioBufferSize {
+					session.AudioBufferLock.Unlock()
+					xlog.Error("audio buffer size limit exceeded", "current_size", len(session.InputAudioBuffer), "incoming_size", len(decodedAudio), "limit", maxAudioBufferSize)
+					sendError(c, "buffer_size_exceeded", fmt.Sprintf("Audio buffer size limit exceeded (max %d bytes)", maxAudioBufferSize), "", "")
+					continue
+				}
+
+				// Append to InputAudioBuffer
 				session.InputAudioBuffer = append(session.InputAudioBuffer, decodedAudio...)
 				session.AudioBufferLock.Unlock()