feat(search): add deletion from index

This commit is contained in:
Shrey Patel
2025-08-19 17:05:01 +01:00
committed by Jorge Martin Espinosa
parent 1f2b4f87bc
commit b2dd5ce02d
3 changed files with 71 additions and 43 deletions

View File

@@ -34,6 +34,8 @@ use crate::{
/// A struct to represent the operations on a [`RoomIndex`]
pub(crate) enum RoomIndexOperation {
Add(TantivyDocument),
Remove(OwnedEventId),
Noop,
}
/// A struct that holds all data pertaining to a particular room's
@@ -66,8 +68,8 @@ impl RoomIndex {
let query_parser = QueryParser::for_index(&index, schema.default_search_fields());
Ok(Self {
writer: SearchIndexWriter::new(writer, schema.clone()),
schema,
writer: writer.into(),
reader,
query_parser,
room_id: room_id.to_owned(),
@@ -128,16 +130,24 @@ impl RoomIndex {
///
/// This which will add/remove/edit an event in the index based on the
/// event type.
pub fn handle_event(&mut self, event: AnySyncMessageLikeEvent) -> Result<(), IndexError> {
pub fn handle_event(
&mut self,
event: AnySyncMessageLikeEvent,
rules: &RedactionRules,
) -> Result<(), IndexError> {
let event_id = event.event_id().to_owned();
match self.schema.handle_event(event)? {
match self.schema.handle_event(event, rules)? {
RoomIndexOperation::Add(document) => {
if !self.contains(&event_id) {
self.writer.add_document(document)?;
self.writer.add(document)?;
}
}
}
RoomIndexOperation::Remove(event_id) => {
self.writer.remove(&event_id);
}
RoomIndexOperation::Noop => {}
};
Ok(())
}

View File

@@ -12,14 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use ruma::events::{
AnySyncMessageLikeEvent, MessageLikeEventContent, RedactContent,
RedactedMessageLikeEventContent, SyncMessageLikeEvent, room::message::MessageType,
use ruma::{
events::{
AnySyncMessageLikeEvent, SyncMessageLikeEvent,
room::{
message::{MessageType, RoomMessageEventContent},
redaction::SyncRoomRedactionEvent,
},
},
room_version_rules::RedactionRules,
};
use tantivy::{
DateTime, TantivyDocument, doc,
schema::{DateOptions, DateTimePrecision, Field, INDEXED, STORED, STRING, Schema, TEXT},
};
use tracing::trace;
use crate::{
error::{IndexError, IndexSchemaError},
@@ -48,21 +55,19 @@ pub(crate) struct RoomMessageSchema {
}
impl RoomMessageSchema {
/// Given an [`AnySyncMessageLikeEvent`] and a function to convert the
/// content into a String to be indexed, return a [`TantivyDocument`] to
/// index.
fn make_doc<C: MessageLikeEventContent + RedactContent, F>(
/// Given an [`SyncMessageLikeEvent<RoomMessageEventContent>`] and a
/// function to convert the content into a String to be indexed, return
/// a [`TantivyDocument`] to index.
fn make_doc(
&self,
event: SyncMessageLikeEvent<C>,
get_body_from_content: F,
) -> Result<TantivyDocument, IndexError>
where
<C as RedactContent>::Redacted: RedactedMessageLikeEventContent,
F: FnOnce(&C) -> Result<String, IndexError>,
{
event: SyncMessageLikeEvent<RoomMessageEventContent>,
) -> Result<TantivyDocument, IndexError> {
let unredacted = event.as_original().ok_or(IndexError::CannotIndexRedactedMessage)?;
let body = get_body_from_content(&unredacted.content)?;
let body = match &unredacted.content.msgtype {
MessageType::Text(content) => Ok(content.body.clone()),
_ => Err(IndexError::MessageTypeNotSupported),
}?;
Ok(doc!(
self.event_id_field => unredacted.event_id.to_string(),
@@ -116,22 +121,28 @@ impl MatrixSearchIndexSchema for RoomMessageSchema {
fn handle_event(
&self,
event: AnySyncMessageLikeEvent,
rules: &RedactionRules,
) -> Result<RoomIndexOperation, IndexError> {
match event {
// m.room.message behaviour
AnySyncMessageLikeEvent::RoomMessage(event) => self
.make_doc(event, |content| match &content.msgtype {
MessageType::Text(content) => Ok(content.body.clone()),
_ => Err(IndexError::MessageTypeNotSupported),
})
.map(RoomIndexOperation::Add),
AnySyncMessageLikeEvent::RoomMessage(event) => {
self.make_doc(event).map(RoomIndexOperation::Add)
}
// new MSC-1767 m.message behaviour
AnySyncMessageLikeEvent::Message(event) => self
.make_doc(event, |content| {
content.text.find_plain().ok_or(IndexError::EmptyMessage).map(|v| v.to_owned())
})
.map(RoomIndexOperation::Add),
AnySyncMessageLikeEvent::RoomRedaction(redaction_event) => {
if let SyncRoomRedactionEvent::Original(redaction_event) = redaction_event {
if let Some(redacted_event_id) = redaction_event.redacts(rules) {
Ok(RoomIndexOperation::Remove(redacted_event_id))
} else {
// If not acting on anything, we can just ignore it.
trace!("Room redaction in indexing redacts nothing, ignoring.");
Ok(RoomIndexOperation::Noop)
}
} else {
// If redaction itself is redacted, we can ignore it.
Ok(RoomIndexOperation::Noop)
}
}
_ => Err(IndexError::MessageTypeNotSupported),
}

View File

@@ -12,25 +12,32 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use tantivy::{IndexWriter, TantivyDocument, TantivyError};
use ruma::EventId;
use tantivy::{IndexWriter, TantivyDocument, TantivyError, Term};
use crate::{OpStamp, error::IndexError};
use crate::{
OpStamp,
error::IndexError,
schema::{MatrixSearchIndexSchema, RoomMessageSchema},
};
pub(crate) struct SearchIndexWriter {
inner: IndexWriter,
last_commit_opstamp: OpStamp,
}
impl From<IndexWriter> for SearchIndexWriter {
fn from(writer: IndexWriter) -> Self {
SearchIndexWriter { last_commit_opstamp: writer.commit_opstamp(), inner: writer }
}
schema: RoomMessageSchema,
}
impl SearchIndexWriter {
pub(crate) fn add_document(&self, document: TantivyDocument) -> Result<OpStamp, IndexError> {
Ok(self.inner.add_document(document)?) // TODO: This is blocking. Handle
// it.
pub(crate) fn new(writer: IndexWriter, schema: RoomMessageSchema) -> Self {
Self { last_commit_opstamp: writer.commit_opstamp(), inner: writer, schema }
}
pub(crate) fn add(&self, document: TantivyDocument) -> Result<OpStamp, IndexError> {
Ok(self.inner.add_document(document)?) // TODO: This is blocking. Handle it.
}
pub(crate) fn remove(&self, event_id: &EventId) {
self.inner.delete_term(Term::from_field_text(self.schema.primary_key(), event_id.as_str()));
}
pub(crate) fn commit(&mut self) -> Result<OpStamp, TantivyError> {