From c918299eab1a8fae2299bb64bb3cf471f2defb5f Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Fri, 29 Aug 2025 15:26:23 +0200 Subject: [PATCH] refactor(db): slightly improve insert performance (#10318) This just removes an unnecessary foreign key constraint, where we already do the garbage collection manually in the database service. However, as part of getting here I tried a couple of other variants along the way: - Changing the order of the primary key from `(hash, blocklist_hash, idx)` to `(blocklist_hash, idx, hash)` so that inserts would be naturally ordered. However this requires a new index `on blocks (hash)` so that we can still look up blocks by hash, and turns out to be strictly worse than what we already have. - Removing the primary key entirely and the `WITHOUT ROWID` to make it a rowid table without any required order, and an index as above. This is faster when the table is small, but becomes slower when it's large (due to dual indexes I guess). These are the benchmark results from current `main`, the second alternative below ("Index(hash)") and this proposal that retains the combined primary key ("combined"). Overall it ends up being about 65% faster. Screenshot 2025-08-29 at 14 36 28 Ref #10264 --- internal/db/sqlite/db_bench_test.go | 32 ++++++++++++------- internal/db/sqlite/folderdb_update.go | 6 ++-- .../db/sqlite/sql/schema/folder/50-blocks.sql | 6 ++-- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/internal/db/sqlite/db_bench_test.go b/internal/db/sqlite/db_bench_test.go index 7af664373..8478c0cba 100644 --- a/internal/db/sqlite/db_bench_test.go +++ b/internal/db/sqlite/db_bench_test.go @@ -7,7 +7,6 @@ package sqlite import ( - "context" "fmt" "testing" "time" @@ -30,19 +29,14 @@ func BenchmarkUpdate(b *testing.B) { b.Fatal(err) } }) - svc := db.Service(time.Hour).(*Service) fs := make([]protocol.FileInfo, 100) + seed := 0 + size := 1000 + const numBlocks = 1000 - size := 10000 for size < 200_000 { - t0 := time.Now() - if err := svc.periodic(context.Background()); err != nil { - b.Fatal(err) - } - b.Log("garbage collect in", time.Since(t0)) - for { local, err := db.CountLocal(folderID, protocol.LocalDeviceID) if err != nil { @@ -53,7 +47,7 @@ func BenchmarkUpdate(b *testing.B) { } fs := make([]protocol.FileInfo, 1000) for i := range fs { - fs[i] = genFile(rand.String(24), 64, 0) + fs[i] = genFile(rand.String(24), numBlocks, 0) } if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil { b.Fatal(err) @@ -63,7 +57,7 @@ func BenchmarkUpdate(b *testing.B) { b.Run(fmt.Sprintf("n=Insert100Loc/size=%d", size), func(b *testing.B) { for range b.N { for i := range fs { - fs[i] = genFile(rand.String(24), 64, 0) + fs[i] = genFile(rand.String(24), numBlocks, 0) } if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil { b.Fatal(err) @@ -146,6 +140,20 @@ func BenchmarkUpdate(b *testing.B) { b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "files/s") }) + b.Run(fmt.Sprintf("n=AllLocalBlocksWithHash/size=%d", size), func(b *testing.B) { + count := 0 + for range b.N { + it, errFn := db.AllLocalBlocksWithHash(folderID, globalFi.Blocks[0].Hash) + for range it { + count++ + } + if err := errFn(); err != nil { + b.Fatal(err) + } + } + b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "blocks/s") + }) + b.Run(fmt.Sprintf("n=GetDeviceSequenceLoc/size=%d", size), func(b *testing.B) { for range b.N { _, err := db.GetDeviceSequence(folderID, protocol.LocalDeviceID) @@ -193,7 +201,7 @@ func BenchmarkUpdate(b *testing.B) { b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "files/s") }) - size <<= 1 + size += 1000 } } diff --git a/internal/db/sqlite/folderdb_update.go b/internal/db/sqlite/folderdb_update.go index 76f54d1d3..53ef44569 100644 --- a/internal/db/sqlite/folderdb_update.go +++ b/internal/db/sqlite/folderdb_update.go @@ -114,11 +114,9 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro if err != nil { return wrap(err, "marshal blocklist") } - if _, err := insertBlockListStmt.Exec(f.BlocksHash, bs); err != nil { + if res, err := insertBlockListStmt.Exec(f.BlocksHash, bs); err != nil { return wrap(err, "insert blocklist") - } - - if device == protocol.LocalDeviceID { + } else if aff, _ := res.RowsAffected(); aff != 0 && device == protocol.LocalDeviceID { // Insert all blocks if err := s.insertBlocksLocked(txp, f.BlocksHash, f.Blocks); err != nil { return wrap(err, "insert blocks") diff --git a/internal/db/sqlite/sql/schema/folder/50-blocks.sql b/internal/db/sqlite/sql/schema/folder/50-blocks.sql index 3c2ca8bc1..18c3817fe 100644 --- a/internal/db/sqlite/sql/schema/folder/50-blocks.sql +++ b/internal/db/sqlite/sql/schema/folder/50-blocks.sql @@ -21,14 +21,14 @@ CREATE TABLE IF NOT EXISTS blocklists ( -- -- For all local files we store the blocks individually for quick lookup. A -- given block can exist in multiple blocklists and at multiple offsets in a --- blocklist. +-- blocklist. We eschew most indexes here as inserting millions of blocks is +-- common and performance is critical. CREATE TABLE IF NOT EXISTS blocks ( hash BLOB NOT NULL, blocklist_hash BLOB NOT NULL, idx INTEGER NOT NULL, offset INTEGER NOT NULL, size INTEGER NOT NULL, - PRIMARY KEY (hash, blocklist_hash, idx), - FOREIGN KEY(blocklist_hash) REFERENCES blocklists(blocklist_hash) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED + PRIMARY KEY(hash, blocklist_hash, idx) ) STRICT, WITHOUT ROWID ;