From b2dd5ce02d4eeb8b457f0bd8cf0d344b4e1f3299 Mon Sep 17 00:00:00 2001 From: Shrey Patel Date: Tue, 19 Aug 2025 17:05:01 +0100 Subject: [PATCH] feat(search): add deletion from index --- crates/matrix-sdk-search/src/index.rs | 20 ++++++-- crates/matrix-sdk-search/src/schema.rs | 65 +++++++++++++++----------- crates/matrix-sdk-search/src/writer.rs | 29 +++++++----- 3 files changed, 71 insertions(+), 43 deletions(-) diff --git a/crates/matrix-sdk-search/src/index.rs b/crates/matrix-sdk-search/src/index.rs index 9d8167f9c..7a399ff2a 100644 --- a/crates/matrix-sdk-search/src/index.rs +++ b/crates/matrix-sdk-search/src/index.rs @@ -34,6 +34,8 @@ use crate::{ /// A struct to represent the operations on a [`RoomIndex`] pub(crate) enum RoomIndexOperation { Add(TantivyDocument), + Remove(OwnedEventId), + Noop, } /// A struct that holds all data pertaining to a particular room's @@ -66,8 +68,8 @@ impl RoomIndex { let query_parser = QueryParser::for_index(&index, schema.default_search_fields()); Ok(Self { + writer: SearchIndexWriter::new(writer, schema.clone()), schema, - writer: writer.into(), reader, query_parser, room_id: room_id.to_owned(), @@ -128,16 +130,24 @@ impl RoomIndex { /// /// This which will add/remove/edit an event in the index based on the /// event type. - pub fn handle_event(&mut self, event: AnySyncMessageLikeEvent) -> Result<(), IndexError> { + pub fn handle_event( + &mut self, + event: AnySyncMessageLikeEvent, + rules: &RedactionRules, + ) -> Result<(), IndexError> { let event_id = event.event_id().to_owned(); - match self.schema.handle_event(event)? { + match self.schema.handle_event(event, rules)? { RoomIndexOperation::Add(document) => { if !self.contains(&event_id) { - self.writer.add_document(document)?; + self.writer.add(document)?; } } - } + RoomIndexOperation::Remove(event_id) => { + self.writer.remove(&event_id); + } + RoomIndexOperation::Noop => {} + }; Ok(()) } diff --git a/crates/matrix-sdk-search/src/schema.rs b/crates/matrix-sdk-search/src/schema.rs index 3cee31aa4..0d28754eb 100644 --- a/crates/matrix-sdk-search/src/schema.rs +++ b/crates/matrix-sdk-search/src/schema.rs @@ -12,14 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -use ruma::events::{ - AnySyncMessageLikeEvent, MessageLikeEventContent, RedactContent, - RedactedMessageLikeEventContent, SyncMessageLikeEvent, room::message::MessageType, +use ruma::{ + events::{ + AnySyncMessageLikeEvent, SyncMessageLikeEvent, + room::{ + message::{MessageType, RoomMessageEventContent}, + redaction::SyncRoomRedactionEvent, + }, + }, + room_version_rules::RedactionRules, }; use tantivy::{ DateTime, TantivyDocument, doc, schema::{DateOptions, DateTimePrecision, Field, INDEXED, STORED, STRING, Schema, TEXT}, }; +use tracing::trace; use crate::{ error::{IndexError, IndexSchemaError}, @@ -48,21 +55,19 @@ pub(crate) struct RoomMessageSchema { } impl RoomMessageSchema { - /// Given an [`AnySyncMessageLikeEvent`] and a function to convert the - /// content into a String to be indexed, return a [`TantivyDocument`] to - /// index. - fn make_doc( + /// Given an [`SyncMessageLikeEvent`] and a + /// function to convert the content into a String to be indexed, return + /// a [`TantivyDocument`] to index. + fn make_doc( &self, - event: SyncMessageLikeEvent, - get_body_from_content: F, - ) -> Result - where - ::Redacted: RedactedMessageLikeEventContent, - F: FnOnce(&C) -> Result, - { + event: SyncMessageLikeEvent, + ) -> Result { let unredacted = event.as_original().ok_or(IndexError::CannotIndexRedactedMessage)?; - let body = get_body_from_content(&unredacted.content)?; + let body = match &unredacted.content.msgtype { + MessageType::Text(content) => Ok(content.body.clone()), + _ => Err(IndexError::MessageTypeNotSupported), + }?; Ok(doc!( self.event_id_field => unredacted.event_id.to_string(), @@ -116,22 +121,28 @@ impl MatrixSearchIndexSchema for RoomMessageSchema { fn handle_event( &self, event: AnySyncMessageLikeEvent, + rules: &RedactionRules, ) -> Result { match event { // m.room.message behaviour - AnySyncMessageLikeEvent::RoomMessage(event) => self - .make_doc(event, |content| match &content.msgtype { - MessageType::Text(content) => Ok(content.body.clone()), - _ => Err(IndexError::MessageTypeNotSupported), - }) - .map(RoomIndexOperation::Add), + AnySyncMessageLikeEvent::RoomMessage(event) => { + self.make_doc(event).map(RoomIndexOperation::Add) + } - // new MSC-1767 m.message behaviour - AnySyncMessageLikeEvent::Message(event) => self - .make_doc(event, |content| { - content.text.find_plain().ok_or(IndexError::EmptyMessage).map(|v| v.to_owned()) - }) - .map(RoomIndexOperation::Add), + AnySyncMessageLikeEvent::RoomRedaction(redaction_event) => { + if let SyncRoomRedactionEvent::Original(redaction_event) = redaction_event { + if let Some(redacted_event_id) = redaction_event.redacts(rules) { + Ok(RoomIndexOperation::Remove(redacted_event_id)) + } else { + // If not acting on anything, we can just ignore it. + trace!("Room redaction in indexing redacts nothing, ignoring."); + Ok(RoomIndexOperation::Noop) + } + } else { + // If redaction itself is redacted, we can ignore it. + Ok(RoomIndexOperation::Noop) + } + } _ => Err(IndexError::MessageTypeNotSupported), } diff --git a/crates/matrix-sdk-search/src/writer.rs b/crates/matrix-sdk-search/src/writer.rs index 146f2d98a..e0a9b6ed4 100644 --- a/crates/matrix-sdk-search/src/writer.rs +++ b/crates/matrix-sdk-search/src/writer.rs @@ -12,25 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -use tantivy::{IndexWriter, TantivyDocument, TantivyError}; +use ruma::EventId; +use tantivy::{IndexWriter, TantivyDocument, TantivyError, Term}; -use crate::{OpStamp, error::IndexError}; +use crate::{ + OpStamp, + error::IndexError, + schema::{MatrixSearchIndexSchema, RoomMessageSchema}, +}; pub(crate) struct SearchIndexWriter { inner: IndexWriter, last_commit_opstamp: OpStamp, -} - -impl From for SearchIndexWriter { - fn from(writer: IndexWriter) -> Self { - SearchIndexWriter { last_commit_opstamp: writer.commit_opstamp(), inner: writer } - } + schema: RoomMessageSchema, } impl SearchIndexWriter { - pub(crate) fn add_document(&self, document: TantivyDocument) -> Result { - Ok(self.inner.add_document(document)?) // TODO: This is blocking. Handle - // it. + pub(crate) fn new(writer: IndexWriter, schema: RoomMessageSchema) -> Self { + Self { last_commit_opstamp: writer.commit_opstamp(), inner: writer, schema } + } + + pub(crate) fn add(&self, document: TantivyDocument) -> Result { + Ok(self.inner.add_document(document)?) // TODO: This is blocking. Handle it. + } + + pub(crate) fn remove(&self, event_id: &EventId) { + self.inner.delete_term(Term::from_field_text(self.schema.primary_key(), event_id.as_str())); } pub(crate) fn commit(&mut self) -> Result {