From b95cf79a6dfcc78e8aae49b06c0c330b98229bfb Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 12 Feb 2025 17:03:53 +0100 Subject: [PATCH] refactor(event cache): move the gist of deduplication into `BloomFilterDeduplicator` --- .../src/event_cache/deduplicator.rs | 47 ++++++++++++++++++- crates/matrix-sdk/src/event_cache/room/mod.rs | 37 ++------------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/crates/matrix-sdk/src/event_cache/deduplicator.rs b/crates/matrix-sdk/src/event_cache/deduplicator.rs index c9b8e7cd6..611e37ec5 100644 --- a/crates/matrix-sdk/src/event_cache/deduplicator.rs +++ b/crates/matrix-sdk/src/event_cache/deduplicator.rs @@ -18,7 +18,8 @@ use std::{collections::BTreeSet, fmt, sync::Mutex}; use growable_bloom_filter::{GrowableBloom, GrowableBloomBuilder}; -use tracing::warn; +use ruma::OwnedEventId; +use tracing::{debug, warn}; use super::room::events::{Event, RoomEvents}; @@ -72,6 +73,48 @@ impl BloomFilterDeduplicator { Self { bloom_filter: Mutex::new(bloom_filter) } } + /// Find duplicates in the given collection of events, and return both + /// valid events (those with an event id) as well as the event ids of + /// duplicate events. + pub fn filter_duplicate_events<'a, I>( + &'a self, + events: I, + room_events: &'a RoomEvents, + ) -> (Vec, Vec) + where + I: Iterator + 'a, + { + let mut duplicated_event_ids = Vec::new(); + + let events = self + .scan_and_learn(events, room_events) + .filter_map(|decorated_event| match decorated_event { + Decoration::Unique(event) => Some(event), + Decoration::Duplicated(event) => { + debug!(event_id = ?event.event_id(), "Found a duplicated event"); + + duplicated_event_ids.push( + event + .event_id() + // SAFETY: An event with no ID is decorated as + // `Decoration::Invalid`. Thus, it's + // safe to unwrap the `Option` here. + .expect("The event has no ID"), + ); + + // Keep the new event! + Some(event) + } + Decoration::Invalid(event) => { + warn!(?event, "Found an event with no ID"); + None + } + }) + .collect::>(); + + (events, duplicated_event_ids) + } + /// Scan a collection of events and detect duplications. /// /// This method takes a collection of events `new_events_to_scan` and @@ -82,7 +125,7 @@ impl BloomFilterDeduplicator { /// Each scanned event will update `Self`'s internal state. /// /// `existing_events` represents all events of a room that already exist. - pub fn scan_and_learn<'a, I>( + fn scan_and_learn<'a, I>( &'a self, new_events_to_scan: I, existing_events: &'a RoomEvents, diff --git a/crates/matrix-sdk/src/event_cache/room/mod.rs b/crates/matrix-sdk/src/event_cache/room/mod.rs index b31bc228c..d06bf9f6e 100644 --- a/crates/matrix-sdk/src/event_cache/room/mod.rs +++ b/crates/matrix-sdk/src/event_cache/room/mod.rs @@ -540,13 +540,10 @@ mod private { }; use once_cell::sync::OnceCell; use ruma::{serde::Raw, OwnedEventId, OwnedRoomId, RoomId}; - use tracing::{debug, error, instrument, trace, warn}; + use tracing::{error, instrument, trace}; use super::{chunk_debug_string, events::RoomEvents}; - use crate::event_cache::{ - deduplicator::{BloomFilterDeduplicator, Decoration}, - EventCacheError, - }; + use crate::event_cache::{deduplicator::BloomFilterDeduplicator, EventCacheError}; /// State for a single room's event cache. /// @@ -668,34 +665,8 @@ mod private { where I: Iterator + 'a, { - let mut duplicated_event_ids = Vec::new(); - - let events = self - .deduplicator - .scan_and_learn(events, &self.events) - .filter_map(|decorated_event| match decorated_event { - Decoration::Unique(event) => Some(event), - Decoration::Duplicated(event) => { - debug!(event_id = ?event.event_id(), "Found a duplicated event"); - - duplicated_event_ids.push( - event - .event_id() - // SAFETY: An event with no ID is decorated as - // `Decoration::Invalid`. Thus, it's - // safe to unwrap the `Option` here. - .expect("The event has no ID"), - ); - - // Keep the new event! - Some(event) - } - Decoration::Invalid(event) => { - warn!(?event, "Found an event with no ID"); - None - } - }) - .collect::>(); + let (events, duplicated_event_ids) = + self.deduplicator.filter_duplicate_events(events, &self.events); let all_duplicates = !events.is_empty() && events.len() == duplicated_event_ids.len();