From 3aa0a905b23e7da0bede174a5f4b74262793145e Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Wed, 20 Mar 2024 08:32:45 +0100 Subject: [PATCH 1/4] feat(sdk): `LinkedChunk::replace_gap_at` returns `&Chunk`. --- .../src/event_cache/linked_chunk.rs | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/crates/matrix-sdk/src/event_cache/linked_chunk.rs b/crates/matrix-sdk/src/event_cache/linked_chunk.rs index 49702fc0b..3abc79adc 100644 --- a/crates/matrix-sdk/src/event_cache/linked_chunk.rs +++ b/crates/matrix-sdk/src/event_cache/linked_chunk.rs @@ -241,17 +241,21 @@ impl LinkedChunk { /// /// Because the `chunk_identifier` can represent non-gap chunk, this method /// returns a `Result`. + /// + /// The returned `Chunk` represents the newly created `Chunk` that contains + /// the first items. pub fn replace_gap_at( &mut self, items: I, chunk_identifier: ChunkIdentifier, - ) -> Result<(), LinkedChunkError> + ) -> Result<&Chunk, LinkedChunkError> where I: IntoIterator, I::IntoIter: ExactSizeIterator, { let chunk_identifier_generator = self.chunk_identifier_generator.clone(); let chunk_ptr; + let new_chunk_ptr; { let chunk = self @@ -269,8 +273,7 @@ impl LinkedChunk { // Insert a new items chunk… .insert_next(Chunk::new_items_leaked( chunk_identifier_generator.generate_next().unwrap(), - )) - // … and insert the items. + )) // … and insert the items. .push_items(items, &chunk_identifier_generator); ( @@ -283,6 +286,11 @@ impl LinkedChunk { } }; + new_chunk_ptr = chunk + .next + // SAFETY: A new `Chunk` has just been inserted, so it exists. + .unwrap(); + // Now that new items have been pushed, we can unlink the gap chunk. chunk.unlink(); @@ -305,7 +313,10 @@ impl LinkedChunk { // pointer, which is valid and well aligned. let _chunk_boxed = unsafe { Box::from_raw(chunk_ptr.as_ptr()) }; - Ok(()) + Ok( + // SAFETY: `new_chunk_ptr` is valid, non-null and well-aligned. + unsafe { new_chunk_ptr.as_ref() }, + ) } /// Get the chunk as a reference, from its identifier, if it exists. @@ -1445,7 +1456,9 @@ mod tests { let gap_identifier = linked_chunk.chunk_identifier(Chunk::is_gap).unwrap(); assert_eq!(gap_identifier, ChunkIdentifier(1)); - linked_chunk.replace_gap_at(['d', 'e', 'f', 'g', 'h'], gap_identifier)?; + let new_chunk = + linked_chunk.replace_gap_at(['d', 'e', 'f', 'g', 'h'], gap_identifier)?; + assert_eq!(new_chunk.identifier(), ChunkIdentifier(3)); assert_items_eq!( linked_chunk, ['a', 'b'] ['d', 'e', 'f'] ['g', 'h'] ['l', 'm'] @@ -1463,7 +1476,8 @@ mod tests { let gap_identifier = linked_chunk.chunk_identifier(Chunk::is_gap).unwrap(); assert_eq!(gap_identifier, ChunkIdentifier(5)); - linked_chunk.replace_gap_at(['w', 'x', 'y', 'z'], gap_identifier)?; + let new_chunk = linked_chunk.replace_gap_at(['w', 'x', 'y', 'z'], gap_identifier)?; + assert_eq!(new_chunk.identifier(), ChunkIdentifier(6)); assert_items_eq!( linked_chunk, ['a', 'b'] ['d', 'e', 'f'] ['g', 'h'] ['l', 'm'] ['w', 'x', 'y'] ['z'] From a248ec75e2ecdb05fc7a079a77b6d7ef55a48ec7 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Wed, 20 Mar 2024 12:01:39 +0100 Subject: [PATCH 2/4] feat(sdk): Optimise how `insert_gap_at` works when inserting at first position. Imagine we have this linked chunk: ```rust assert_items_eq!(linked_chunk, ['a'] [-] ['b', 'c'] ['d', 'e', 'f']); ``` Before the patch, when we were running: ```rust let position_of_d = linked_chunk.item_position(|item| *item == 'd').unwrap(); linked_chunk.insert_gap_at((), position_of_d)?; ``` it was taking `['d', 'e', 'f']` to split it at index 0, so `[]` + `['d', 'e', 'f']`, and then was inserting a gap in between, thus resulting into: ```rust assert_items_eq!(linked_chunk, ['a'] [-] ['b', 'c'] [] [-] ['d', 'e', 'f']); ``` The problem is that it creates a useless empty chunk. It's a waste of space, and rapidly, of computation. When used with `EventCache`, this problem occurs every time a backpagination occurs (because it executes a `replace_gap_at` to insert the new item, and then executes a `insert_gap_at` if the backpagination contains another `prev_batch` token). With this patch, the result of the operation is now: ```rust assert_items_eq!(linked_chunk, ['a'] [-] ['b', 'c'] [-] ['d', 'e', 'f']); ``` The `assert_items_eq!` macro has been updated to be able to assert empty chunks. The `test_insert_gap_at` has been updated to test all edge cases. --- .../src/event_cache/linked_chunk.rs | 131 ++++++++++++------ 1 file changed, 87 insertions(+), 44 deletions(-) diff --git a/crates/matrix-sdk/src/event_cache/linked_chunk.rs b/crates/matrix-sdk/src/event_cache/linked_chunk.rs index 3abc79adc..742e6aeab 100644 --- a/crates/matrix-sdk/src/event_cache/linked_chunk.rs +++ b/crates/matrix-sdk/src/event_cache/linked_chunk.rs @@ -197,6 +197,30 @@ impl LinkedChunk { .chunk_mut(chunk_identifier) .ok_or(LinkedChunkError::InvalidChunkIdentifier { identifier: chunk_identifier })?; + // If `item_index` is 0, we don't want to split the current items chunk to + // insert a new gap chunk, otherwise it would create an empty current items + // chunk. Let's handle this case in particular. + // + // Of course this optimisation applies if there is a previous chunk. Remember + // the invariant: a `Gap` cannot be the first chunk. + if item_index == 0 && chunk.is_items() && chunk.previous.is_some() { + let previous_chunk = chunk + .previous_mut() + // SAFETY: The `previous` chunk exists because we have tested + // `chunk.previous.is_some()` in the `if` statement. + .unwrap(); + + previous_chunk.insert_next(Chunk::new_gap_leaked( + chunk_identifier_generator.generate_next().unwrap(), + content, + )); + + // We don't need to update `self.last` because we have inserted a new chunk + // before `chunk`. + + return Ok(()); + } + let chunk = match &mut chunk.content { ChunkContent::Gap(..) => { return Err(LinkedChunkError::ChunkIsAGap { identifier: chunk_identifier }); @@ -345,7 +369,7 @@ impl LinkedChunk { } } - /// Search for a chunk, and return its identifier. + /// Search backward for a chunk, and return its identifier. pub fn chunk_identifier<'a, P>(&'a self, mut predicate: P) -> Option where P: FnMut(&'a Chunk) -> bool, @@ -353,7 +377,7 @@ impl LinkedChunk { self.rchunks().find_map(|chunk| predicate(chunk).then(|| chunk.identifier())) } - /// Search for an item, and return its position. + /// Search backward for an item, and return its position. pub fn item_position<'a, P>(&'a self, mut predicate: P) -> Option where P: FnMut(&'a Item) -> bool, @@ -952,75 +976,60 @@ mod tests { }; macro_rules! assert_items_eq { - ( @_ [ $iterator:ident, $chunk_index:ident, $item_index:ident ] { [-] $( $rest:tt )* } { $( $accumulator:tt )* } ) => { + ( @_ [ $iterator:ident ] { [-] $( $rest:tt )* } { $( $accumulator:tt )* } ) => { assert_items_eq!( @_ - [ $iterator, $chunk_index, $item_index ] + [ $iterator ] { $( $rest )* } { $( $accumulator )* - $chunk_index += 1; + { + let chunk = $iterator .next().expect("next chunk (expect gap)"); + assert!(chunk.is_gap(), "chunk "); + } } ) }; - ( @_ [ $iterator:ident, $chunk_index:ident, $item_index:ident ] { [ $( $item:expr ),* ] $( $rest:tt )* } { $( $accumulator:tt )* } ) => { + ( @_ [ $iterator:ident ] { [ $( $item:expr ),* ] $( $rest:tt )* } { $( $accumulator:tt )* } ) => { assert_items_eq!( @_ - [ $iterator, $chunk_index, $item_index ] + [ $iterator ] { $( $rest )* } { $( $accumulator )* - let _expected_chunk_identifier = $iterator .peek().unwrap().1.chunk_identifier(); - $( - assert_matches!( - $iterator .next(), - Some((chunk_index, Position(chunk_identifier, item_index), & $item )) => { - // Ensure the chunk index (from the enumeration) is correct. - assert_eq!(chunk_index, $chunk_index); - // Ensure the chunk identifier is the same for all items in this chunk. - assert_eq!(chunk_identifier, _expected_chunk_identifier); - // Ensure the item has the expected position. - assert_eq!(item_index, $item_index); - } - ); - $item_index += 1; - )* - $item_index = 0; - $chunk_index += 1; + { + let chunk = $iterator .next().expect("next chunk (expect items)"); + assert!(chunk.is_items()); + + let ChunkContent::Items(items) = chunk.content() else { unreachable!() }; + + let mut items_iterator = items.iter(); + + $( + assert_eq!(items_iterator.next(), Some(& $item )); + )* + + assert!(items_iterator.next().is_none(), "no more items"); + } } ) }; - ( @_ [ $iterator:ident, $chunk_index:ident, $item_index:ident ] {} { $( $accumulator:tt )* } ) => { + ( @_ [ $iterator:ident ] {} { $( $accumulator:tt )* } ) => { { - let mut $chunk_index = 0; - let mut $item_index = 0; $( $accumulator )* + assert!( $iterator .next().is_none(), "no more chunks"); } }; ( $linked_chunk:expr, $( $all:tt )* ) => { assert_items_eq!( @_ - [ iterator, _chunk_index, _item_index ] + [ iterator ] { $( $all )* } { - let mut iterator = $linked_chunk - .chunks() - .enumerate() - .filter_map(|(chunk_index, chunk)| match &chunk.content { - ChunkContent::Gap(..) => None, - ChunkContent::Items(items) => { - let identifier = chunk.identifier(); - - Some(items.iter().enumerate().map(move |(item_index, item)| { - (chunk_index, Position(identifier, item_index), item) - })) - } - }) - .flatten() - .peekable(); + let mut iterator = $linked_chunk.chunks(); } ) } @@ -1403,14 +1412,48 @@ mod tests { assert_items_eq!(linked_chunk, ['a'] [-] ['b', 'c'] ['d', 'e', 'f']); } - // Insert at the beginning of a chunk. + // Insert at the beginning of a chunk + it's the first chunk. { let position_of_a = linked_chunk.item_position(|item| *item == 'a').unwrap(); linked_chunk.insert_gap_at((), position_of_a)?; + // A new empty chunk is created as the first chunk. assert_items_eq!(linked_chunk, [] [-] ['a'] [-] ['b', 'c'] ['d', 'e', 'f']); } + // Insert at the beginning of a chunk. + { + let position_of_d = linked_chunk.item_position(|item| *item == 'd').unwrap(); + linked_chunk.insert_gap_at((), position_of_d)?; + + // A new empty chunk is NOT created, i.e. `['d', 'e', 'f']` is not + // split into `[]` + `['d', 'e', 'f']` because it's a waste of + // space. + assert_items_eq!(linked_chunk, [] [-] ['a'] [-] ['b', 'c'] [-] ['d', 'e', 'f']); + } + + // Insert in an empty chunk + it's the first chunk. + { + let position_of_first_empty_chunk = Position(ChunkIdentifier(0), 0); + assert_matches!( + linked_chunk.insert_gap_at((), position_of_first_empty_chunk), + Err(LinkedChunkError::InvalidItemIndex { index: 0 }) + ); + } + + // Insert in an empty chunk. + { + // Replace a gap by empty items. + let gap_identifier = linked_chunk.chunk_identifier(Chunk::is_gap).unwrap(); + let position = linked_chunk.replace_gap_at([], gap_identifier)?.first_position(); + + assert_items_eq!(linked_chunk, [] [-] ['a'] [-] ['b', 'c'] [] ['d', 'e', 'f']); + + linked_chunk.insert_gap_at((), position)?; + + assert_items_eq!(linked_chunk, [] [-] ['a'] [-] ['b', 'c'] [-] [] ['d', 'e', 'f']); + } + // Insert in a chunk that does not exist. { assert_matches!( From 57b68614af71b81cf2ce8eea5fd0809ab5524811 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Wed, 20 Mar 2024 15:50:51 +0100 Subject: [PATCH 3/4] doc(sdk): US vs UK strike again. --- crates/matrix-sdk/src/event_cache/linked_chunk.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/matrix-sdk/src/event_cache/linked_chunk.rs b/crates/matrix-sdk/src/event_cache/linked_chunk.rs index 742e6aeab..f0d617b68 100644 --- a/crates/matrix-sdk/src/event_cache/linked_chunk.rs +++ b/crates/matrix-sdk/src/event_cache/linked_chunk.rs @@ -369,7 +369,7 @@ impl LinkedChunk { } } - /// Search backward for a chunk, and return its identifier. + /// Search backwards for a chunk, and return its identifier. pub fn chunk_identifier<'a, P>(&'a self, mut predicate: P) -> Option where P: FnMut(&'a Chunk) -> bool, @@ -377,7 +377,7 @@ impl LinkedChunk { self.rchunks().find_map(|chunk| predicate(chunk).then(|| chunk.identifier())) } - /// Search backward for an item, and return its position. + /// Search backwards for an item, and return its position. pub fn item_position<'a, P>(&'a self, mut predicate: P) -> Option where P: FnMut(&'a Item) -> bool, @@ -385,7 +385,7 @@ impl LinkedChunk { self.ritems().find_map(|(item_position, item)| predicate(item).then_some(item_position)) } - /// Iterate over the chunks, backward. + /// Iterate over the chunks, backwards. /// /// It iterates from the last to the first chunk. pub fn rchunks(&self) -> LinkedChunkIterBackward<'_, Item, Gap, CAP> { From 962c0bf4fda41eae404f7ff37c3715268d057b2c Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Wed, 20 Mar 2024 15:57:04 +0100 Subject: [PATCH 4/4] doc(sdk): Improve `SAFETY` paragraphs, and replace `unwrap`s by `expect`s. --- crates/matrix-sdk/src/event_cache/linked_chunk.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/matrix-sdk/src/event_cache/linked_chunk.rs b/crates/matrix-sdk/src/event_cache/linked_chunk.rs index f0d617b68..0630d8b81 100644 --- a/crates/matrix-sdk/src/event_cache/linked_chunk.rs +++ b/crates/matrix-sdk/src/event_cache/linked_chunk.rs @@ -208,7 +208,7 @@ impl LinkedChunk { .previous_mut() // SAFETY: The `previous` chunk exists because we have tested // `chunk.previous.is_some()` in the `if` statement. - .unwrap(); + .expect("Previous chunk must be present"); previous_chunk.insert_next(Chunk::new_gap_leaked( chunk_identifier_generator.generate_next().unwrap(), @@ -266,8 +266,8 @@ impl LinkedChunk { /// Because the `chunk_identifier` can represent non-gap chunk, this method /// returns a `Result`. /// - /// The returned `Chunk` represents the newly created `Chunk` that contains - /// the first items. + /// This method returns a reference to the (first if many) newly created + /// `Chunk` that contains the `items`. pub fn replace_gap_at( &mut self, items: I, @@ -333,12 +333,14 @@ impl LinkedChunk { // Re-box the chunk, and let Rust does its job. // - // SAFETY: `chunk` is unlinked but it still exists in memory! We have its - // pointer, which is valid and well aligned. + // SAFETY: `chunk` is unlinked and not borrowed anymore. `LinkedChunk` doesn't + // use it anymore, it's a leak. It is time to re-`Box` it and drop it. let _chunk_boxed = unsafe { Box::from_raw(chunk_ptr.as_ptr()) }; Ok( - // SAFETY: `new_chunk_ptr` is valid, non-null and well-aligned. + // SAFETY: `new_chunk_ptr` is valid, non-null and well-aligned. It's taken from + // `chunk`, and that's how the entire `LinkedChunk` type works. Pointer construction + // safety is guaranteed by `Chunk::new_items_leaked` and `Chunk::new_gap_leaked`. unsafe { new_chunk_ptr.as_ref() }, ) }