Add tombstoned attachment pruner (#1991)

This commit is contained in:
Leendert de Borst
2026-05-03 22:11:00 +02:00
committed by Leendert de Borst
parent 30e2d6dec3
commit 849b2ba8fc
8 changed files with 272 additions and 16 deletions

View File

@@ -432,11 +432,13 @@ export class AttachmentQueries {
VALUES (?, ?, ?, ?, ?, ?, ?)`;
/**
* Soft delete an attachment.
* Soft delete an attachment. Also zeroes the Blob bytes so storage is reclaimed
* immediately while the row remains as a tombstone for LWW sync.
*/
public static readonly SOFT_DELETE = `
UPDATE Attachments
SET IsDeleted = 1,
Blob = X'',
UpdatedAt = ?
WHERE Id = ?`;
}

View File

@@ -261,7 +261,7 @@ class VaultMutate(
* if any rows were pruned, persist the cleaned version and reload the
* in-memory database so subsequent reads see the pruned state.
*
* All errors are swallowed: pruning is best-effort and must never block
* All errors are swallowed: pruning is best-effort and must never block
* the surrounding upload.
*/
private fun pruneLocalVault() {

View File

@@ -433,7 +433,14 @@ class ItemRepository(database: VaultDatabase) : BaseRepository(database) {
// Soft delete related data
softDeleteByForeignKey("TotpCodes", "ItemId", itemId)
softDeleteByForeignKey("Attachments", "ItemId", itemId)
// Soft delete attachments AND zero their blob bytes — tombstone stays
// for sync but storage is reclaimed immediately.
executeUpdate(
"UPDATE Attachments SET IsDeleted = 1, Blob = X'', UpdatedAt = ? WHERE ItemId = ? AND IsDeleted = 0",
arrayOf(now, itemId),
)
softDeleteByForeignKey("Passkeys", "ItemId", itemId)
if (tableExists("ItemTags")) {

View File

@@ -192,7 +192,14 @@ public class ItemRepository: BaseRepository {
// Soft delete related data
try softDeleteByForeignKey(table: "TotpCodes", foreignKey: "ItemId", foreignKeyValue: itemId)
try softDeleteByForeignKey(table: "Attachments", foreignKey: "ItemId", foreignKeyValue: itemId)
// Soft delete attachments AND zero their blob bytes so storage is reclaimed
// immediately while the row remains as a tombstone for LWW sync.
try client.executeUpdate(
"UPDATE Attachments SET IsDeleted = 1, Blob = X'', UpdatedAt = ? WHERE ItemId = ? AND IsDeleted = 0",
params: [now, itemId]
)
try softDeleteByForeignKey(table: "Passkeys", foreignKey: "ItemId", foreignKeyValue: itemId)
if try tableExists("ItemTags") {

View File

@@ -322,7 +322,14 @@ export class ItemRepository extends BaseRepository {
// Soft delete related data
await this.softDeleteByForeignKey('TotpCodes', 'ItemId', itemId);
await this.softDeleteByForeignKey('Attachments', 'ItemId', itemId);
// Soft delete attachments AND zero their blob bytes — tombstone stays for
// sync but storage is reclaimed immediately. X'' is SQLite's empty-blob literal.
await this.client.executeUpdate(
`UPDATE Attachments SET IsDeleted = 1, Blob = X'', UpdatedAt = ? WHERE ItemId = ? AND IsDeleted = 0`,
[now, itemId]
);
await this.softDeleteByForeignKey('Passkeys', 'ItemId', itemId);
if (await this.tableExists('ItemTags')) {
await this.softDeleteByForeignKey('ItemTags', 'ItemId', itemId);
@@ -460,7 +467,10 @@ export class ItemRepository extends BaseRepository {
(totp) => [totp.Name, totp.SecretKey, now, totp.Id]
);
// 5. Handle Attachments (insert new, update existing, soft-delete removed)
// 5. Handle Attachments (insert new, update existing, soft-delete removed).
// Override the default soft-delete SQL so removed attachments also have
// their Blob bytes cleared. The tombstone row remains for LWW sync, but
// storage drops on next save.
await this.syncRelatedEntities(
'Attachments',
'ItemId',
@@ -470,7 +480,8 @@ export class ItemRepository extends BaseRepository {
(att) => [att.Id, att.Filename, att.Blob as Uint8Array, item.Id, now, now, 0],
`INSERT INTO Attachments (Id, Filename, Blob, ItemId, CreatedAt, UpdatedAt, IsDeleted) VALUES (?, ?, ?, ?, ?, ?, ?)`,
`UPDATE Attachments SET Filename = ?, Blob = ?, UpdatedAt = ? WHERE Id = ?`,
(att) => [att.Filename, att.Blob as Uint8Array, now, att.Id]
(att) => [att.Filename, att.Blob as Uint8Array, now, att.Id],
`UPDATE Attachments SET IsDeleted = 1, Blob = X'', UpdatedAt = ? WHERE Id = ?`
);
return 1;
@@ -729,18 +740,22 @@ export class ItemRepository extends BaseRepository {
toParams: (entity: T) => (string | number | null | Uint8Array)[],
insertQuery: string,
updateQuery?: string,
toUpdateParams?: (entity: T) => (string | number | null | Uint8Array)[]
toUpdateParams?: (entity: T) => (string | number | null | Uint8Array)[],
// Optional override for the soft-delete SQL. Useful for tables that carry
// blob payloads (e.g. Attachments) and want the bytes cleared in the same
// statement. Must accept params [updatedAt, id] in that order.
softDeleteSql?: string
): Promise<void> {
const now = this.now();
const currentIds = currentEntities.map(e => e.Id);
const deleteSql = softDeleteSql
?? `UPDATE ${tableName} SET IsDeleted = 1, UpdatedAt = ? WHERE Id = ?`;
// Delete entities that were removed
const toDelete = originalIds.filter(id => !currentIds.includes(id));
for (const id of toDelete) {
await this.client.executeUpdate(
`UPDATE ${tableName} SET IsDeleted = 1, UpdatedAt = ? WHERE Id = ?`,
[now, id]
);
await this.client.executeUpdate(deleteSql, [now, id]);
}
// Update existing entities when their data changed

View File

@@ -9,7 +9,7 @@
<thead class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-700 dark:text-gray-400">
<tr>
<th scope="col" class="px-6 py-3">Filename</th>
<th scope="col" class="px-6 py-3">Created At</th>
<th scope="col" class="px-6 py-3">Size</th>
</tr>
</thead>
<tbody>
@@ -27,7 +27,7 @@
}
</td>
<td class="px-6 py-4">
@attachment.CreatedAt.ToLocalTime().ToString("g")
@FormatSize(attachment.Blob?.Length ?? 0)
</td>
</tr>
}
@@ -48,6 +48,24 @@
[Parameter]
public ICollection<Attachment> Attachments { get; set; } = new List<Attachment>();
private static string FormatSize(int bytes)
{
const double Kib = 1024d;
const double Mib = Kib * 1024d;
if (bytes < Kib)
{
return string.Create(System.Globalization.CultureInfo.CurrentCulture, $"{bytes} B");
}
if (bytes < Mib)
{
return string.Create(System.Globalization.CultureInfo.CurrentCulture, $"{(bytes / Kib):F1} KB");
}
return string.Create(System.Globalization.CultureInfo.CurrentCulture, $"{(bytes / Mib):F1} MB");
}
private async Task DownloadAttachment(Attachment attachment)
{
try

View File

@@ -580,6 +580,9 @@ public sealed class ItemService(HttpClient httpClient, DbService dbService, Conf
{
attachment.IsDeleted = true;
attachment.UpdatedAt = deleteDateTime;
// Reclaim attachment bytes immediately. Tombstone row stays for sync.
attachment.Blob = Array.Empty<byte>();
}
foreach (var totp in item.TotpCodes)
@@ -642,6 +645,9 @@ public sealed class ItemService(HttpClient httpClient, DbService dbService, Conf
{
attachment.IsDeleted = true;
attachment.UpdatedAt = deleteDateTime;
// Reclaim attachment bytes immediately. Tombstone row stays for sync.
attachment.Blob = Array.Empty<byte>();
}
foreach (var totp in item.TotpCodes)
@@ -1215,6 +1221,11 @@ public sealed class ItemService(HttpClient httpClient, DbService dbService, Conf
{
attachmentToRemove.IsDeleted = true;
attachmentToRemove.UpdatedAt = updateDateTime;
// Drop the blob bytes immediately. The tombstone row stays so the deletion
// syncs to other devices via LWW; an empty blob keeps the column non-null
// while reclaiming the storage on next save.
attachmentToRemove.Blob = Array.Empty<byte>();
}
// Process attachments from the new item (excluding deleted ones - they're handled above)

View File

@@ -69,6 +69,9 @@ pub struct PruneStats {
/// Number of orphan logos soft-deleted (no remaining active item references them)
#[serde(default)]
pub logos_pruned: u32,
/// Number of tombstoned attachments whose blob bytes were cleared.
#[serde(default)]
pub attachment_blobs_cleared: u32,
}
/// Output of the prune operation.
@@ -176,12 +179,13 @@ pub fn prune_vault(input: PruneInput) -> VaultResult<PruneOutput> {
}
}
// Mark related Attachments as deleted
// Mark related Attachments as deleted and drop their blob bytes. Leaves the
// column non-null while reclaiming the storage on the next save.
if let Some(attachments_table) = input.tables.iter().find(|t| t.name == "Attachments") {
let related_count = count_related_records(&attachments_table.records, "ItemId", item_id);
if related_count > 0 {
statements.push(SqlStatement {
sql: "UPDATE Attachments SET IsDeleted = 1, UpdatedAt = ? WHERE ItemId = ? AND IsDeleted = 0".to_string(),
sql: "UPDATE Attachments SET IsDeleted = 1, Blob = X'', UpdatedAt = ? WHERE ItemId = ? AND IsDeleted = 0".to_string(),
params: vec![
serde_json::json!(now_str),
serde_json::json!(item_id),
@@ -271,6 +275,38 @@ pub fn prune_vault(input: PruneInput) -> VaultResult<PruneOutput> {
}
}
// Pass 3 — sweep tombstoned attachments that still carry blob bytes.
// Older client versions could leave attachment with IsDeleted=1 but
// a non-empty Blob, which inflates the encrypted vault for no reason.
// This pass empties those blobs in place. The attachments tombstoned by
// Pass 1 in this same call are already cleared there, so this pass only
// catches historical leftovers.
if let Some(attachments_table) = input.tables.iter().find(|t| t.name == "Attachments") {
for attachment in &attachments_table.records {
let is_deleted = attachment.get("IsDeleted")
.map(|v| v.as_i64() == Some(1) || v.as_bool() == Some(true))
.unwrap_or(false);
if !is_deleted {
continue;
}
if !attachment_has_blob_bytes(attachment) {
continue;
}
if let Some(attachment_id) = attachment.get("Id").and_then(|v| v.as_str()) {
statements.push(SqlStatement {
sql: "UPDATE Attachments SET Blob = X'', UpdatedAt = ? WHERE Id = ?".to_string(),
params: vec![
serde_json::json!(now_str),
serde_json::json!(attachment_id),
],
});
stats.attachment_blobs_cleared += 1;
}
}
}
Ok(PruneOutput {
success: true,
statements,
@@ -278,6 +314,17 @@ pub fn prune_vault(input: PruneInput) -> VaultResult<PruneOutput> {
})
}
/// True if the attachment's Blob field is present and non-empty.
fn attachment_has_blob_bytes(attachment: &Record) -> bool {
match attachment.get("Blob") {
None => false,
Some(serde_json::Value::Null) => false,
Some(serde_json::Value::String(s)) => !s.is_empty(),
Some(serde_json::Value::Array(a)) => !a.is_empty(),
Some(_) => true,
}
}
/// Prune vault using JSON strings.
/// Convenience function for FFI.
pub fn prune_vault_json(input_json: &str) -> VaultResult<String> {
@@ -348,6 +395,21 @@ mod tests {
record
}
fn make_attachment_record(
id: &str,
item_id: &str,
is_deleted: bool,
blob: serde_json::Value,
) -> Record {
let mut record = HashMap::new();
record.insert("Id".to_string(), serde_json::json!(id));
record.insert("ItemId".to_string(), serde_json::json!(item_id));
record.insert("UpdatedAt".to_string(), serde_json::json!("2024-01-01T00:00:00Z"));
record.insert("IsDeleted".to_string(), serde_json::json!(if is_deleted { 1 } else { 0 }));
record.insert("Blob".to_string(), blob);
record
}
fn make_item_with_logo(
id: &str,
logo_id: Option<&str>,
@@ -679,4 +741,138 @@ mod tests {
assert_eq!(output.stats.logos_pruned, 0);
assert_eq!(logo_update_count(&output), 0);
}
#[test]
fn test_trash_purge_clears_attachment_blobs() {
let now = Utc::now();
let now_str = now.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
let old_date = (now - Duration::days(60)).format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
let input = PruneInput {
tables: vec![
TableData {
name: "Items".to_string(),
records: vec![make_item_record("item-1", Some(&old_date), false)],
},
TableData {
name: "Attachments".to_string(),
records: vec![make_attachment_record(
"att-1",
"item-1",
false,
serde_json::json!("aGVsbG8="),
)],
},
],
retention_days: 30,
current_time: now_str,
};
let output = prune_vault(input).unwrap();
assert_eq!(output.stats.items_pruned, 1);
assert_eq!(output.stats.attachments_pruned, 1);
// The trash-purge UPDATE for Attachments should now also clear the blob.
let attachment_update = output.statements.iter()
.find(|s| s.sql.starts_with("UPDATE Attachments"))
.expect("expected an UPDATE Attachments statement");
assert!(attachment_update.sql.contains("Blob = X''"),
"attachment trash purge must zero the blob: {}", attachment_update.sql);
// The pass-3 sweeper should NOT also fire for the same row in this call.
assert_eq!(output.stats.attachment_blobs_cleared, 0);
}
#[test]
fn test_sweeper_clears_blob_on_already_tombstoned_attachment() {
let now_str = Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
let input = PruneInput {
tables: vec![
TableData {
name: "Items".to_string(),
records: vec![],
},
TableData {
name: "Attachments".to_string(),
records: vec![make_attachment_record(
"att-old",
"item-1",
true,
serde_json::json!("aGVsbG8="),
)],
},
],
retention_days: 30,
current_time: now_str,
};
let output = prune_vault(input).unwrap();
assert_eq!(output.stats.attachment_blobs_cleared, 1);
let stmt = output.statements.iter()
.find(|s| s.sql.starts_with("UPDATE Attachments SET Blob = X''"))
.expect("expected the sweeper UPDATE");
// params: [updated_at, attachment_id]
assert_eq!(stmt.params.len(), 2);
assert_eq!(stmt.params[1], serde_json::json!("att-old"));
}
#[test]
fn test_sweeper_skips_already_empty_blob() {
let now_str = Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
let input = PruneInput {
tables: vec![
TableData {
name: "Items".to_string(),
records: vec![],
},
TableData {
name: "Attachments".to_string(),
records: vec![
// Empty string (already cleared) — should be skipped.
make_attachment_record("att-empty-string", "item-1", true, serde_json::json!("")),
// Empty array form — should also be skipped.
make_attachment_record("att-empty-array", "item-1", true, serde_json::json!([])),
// Null Blob — should also be skipped.
make_attachment_record("att-null", "item-1", true, serde_json::Value::Null),
],
},
],
retention_days: 30,
current_time: now_str,
};
let output = prune_vault(input).unwrap();
assert_eq!(output.stats.attachment_blobs_cleared, 0);
assert!(output.statements.is_empty());
}
#[test]
fn test_sweeper_skips_active_attachment_with_blob() {
let now_str = Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
let input = PruneInput {
tables: vec![
TableData {
name: "Items".to_string(),
records: vec![],
},
TableData {
name: "Attachments".to_string(),
records: vec![make_attachment_record(
"att-active",
"item-1",
false,
serde_json::json!("aGVsbG8="),
)],
},
],
retention_days: 30,
current_time: now_str,
};
let output = prune_vault(input).unwrap();
assert_eq!(output.stats.attachment_blobs_cleared, 0);
assert!(output.statements.is_empty());
}
}