refactor(db): slightly improve insert performance (#10318)

This just removes an unnecessary foreign key constraint, where we already do the garbage collection manually in the database service. However, as part of getting here I tried a couple of other variants along the way: - Changing the order of the primary key from `(hash, blocklist_hash, idx)` to `(blocklist_hash, idx, hash)` so that inserts would be naturally ordered. However this requires a new index `on blocks (hash)` so that we can still look up blocks by hash, and turns out to be strictly worse than what we already have. - Removing the primary key entirely and the `WITHOUT ROWID` to make it a rowid table without any required order, and an index as above. This is faster when the table is small, but becomes slower when it's large (due to dual indexes I guess). These are the benchmark results from current `main`, the second alternative below ("Index(hash)") and this proposal that retains the combined primary key ("combined"). Overall it ends up being about 65% faster. <img width="764" height="452" alt="Screenshot 2025-08-29 at 14 36 28" src="https://github.com/user-attachments/assets/bff3f9d1-916a-485f-91b7-b54b477f5aac" /> Ref #10264
2025-12-23 22:18:14 -05:00 · 2025-08-29 15:26:23 +02:00
parent b59443f136
commit c918299eab
3 changed files with 25 additions and 19 deletions
--- a/internal/db/sqlite/db_bench_test.go
+++ b/internal/db/sqlite/db_bench_test.go
@@ -7,7 +7,6 @@
 package sqlite

 import (
-	"context"
 	"fmt"
 	"testing"
 	"time"
@@ -30,19 +29,14 @@ func BenchmarkUpdate(b *testing.B) {
 			b.Fatal(err)
 		}
 	})
-	svc := db.Service(time.Hour).(*Service)

 	fs := make([]protocol.FileInfo, 100)
+
 	seed := 0
+	size := 1000
+	const numBlocks = 1000

-	size := 10000
 	for size < 200_000 {
-		t0 := time.Now()
-		if err := svc.periodic(context.Background()); err != nil {
-			b.Fatal(err)
-		}
-		b.Log("garbage collect in", time.Since(t0))
-
 		for {
 			local, err := db.CountLocal(folderID, protocol.LocalDeviceID)
 			if err != nil {
@@ -53,7 +47,7 @@ func BenchmarkUpdate(b *testing.B) {
 			}
 			fs := make([]protocol.FileInfo, 1000)
 			for i := range fs {
-				fs[i] = genFile(rand.String(24), 64, 0)
+				fs[i] = genFile(rand.String(24), numBlocks, 0)
 			}
 			if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil {
 				b.Fatal(err)
@@ -63,7 +57,7 @@ func BenchmarkUpdate(b *testing.B) {
 		b.Run(fmt.Sprintf("n=Insert100Loc/size=%d", size), func(b *testing.B) {
 			for range b.N {
 				for i := range fs {
-					fs[i] = genFile(rand.String(24), 64, 0)
+					fs[i] = genFile(rand.String(24), numBlocks, 0)
 				}
 				if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil {
 					b.Fatal(err)
@@ -146,6 +140,20 @@ func BenchmarkUpdate(b *testing.B) {
 			b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "files/s")
 		})

+		b.Run(fmt.Sprintf("n=AllLocalBlocksWithHash/size=%d", size), func(b *testing.B) {
+			count := 0
+			for range b.N {
+				it, errFn := db.AllLocalBlocksWithHash(folderID, globalFi.Blocks[0].Hash)
+				for range it {
+					count++
+				}
+				if err := errFn(); err != nil {
+					b.Fatal(err)
+				}
+			}
+			b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "blocks/s")
+		})
+
 		b.Run(fmt.Sprintf("n=GetDeviceSequenceLoc/size=%d", size), func(b *testing.B) {
 			for range b.N {
 				_, err := db.GetDeviceSequence(folderID, protocol.LocalDeviceID)
@@ -193,7 +201,7 @@ func BenchmarkUpdate(b *testing.B) {
 			b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "files/s")
 		})

-		size <<= 1
+		size += 1000
 	}
 }

--- a/internal/db/sqlite/folderdb_update.go
+++ b/internal/db/sqlite/folderdb_update.go
@@ -114,11 +114,9 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro
 			if err != nil {
 				return wrap(err, "marshal blocklist")
 			}
-			if _, err := insertBlockListStmt.Exec(f.BlocksHash, bs); err != nil {
+			if res, err := insertBlockListStmt.Exec(f.BlocksHash, bs); err != nil {
 				return wrap(err, "insert blocklist")
-			}
-
-			if device == protocol.LocalDeviceID {
+			} else if aff, _ := res.RowsAffected(); aff != 0 && device == protocol.LocalDeviceID {
 				// Insert all blocks
 				if err := s.insertBlocksLocked(txp, f.BlocksHash, f.Blocks); err != nil {
 					return wrap(err, "insert blocks")
--- a/internal/db/sqlite/sql/schema/folder/50-blocks.sql
+++ b/internal/db/sqlite/sql/schema/folder/50-blocks.sql
@@ -21,14 +21,14 @@ CREATE TABLE IF NOT EXISTS blocklists (
 --
 -- For all local files we store the blocks individually for quick lookup. A
 -- given block can exist in multiple blocklists and at multiple offsets in a
-- blocklist.
+-- blocklist. We eschew most indexes here as inserting millions of blocks is
+-- common and performance is critical.
 CREATE TABLE IF NOT EXISTS blocks (
    hash BLOB NOT NULL,
    blocklist_hash BLOB NOT NULL,
    idx INTEGER NOT NULL,
    offset INTEGER NOT NULL,
    size INTEGER NOT NULL,
-    PRIMARY KEY (hash, blocklist_hash, idx),
-    FOREIGN KEY(blocklist_hash) REFERENCES blocklists(blocklist_hash) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED
+    PRIMARY KEY(hash, blocklist_hash, idx)
 ) STRICT, WITHOUT ROWID
 ;