mirror of
https://github.com/syncthing/syncthing.git
synced 2025-12-23 22:18:14 -05:00
refactor(db): slightly improve insert performance (#10318)
This just removes an unnecessary foreign key constraint, where we
already do the garbage collection manually in the database service.
However, as part of getting here I tried a couple of other variants
along the way:
- Changing the order of the primary key from `(hash, blocklist_hash,
idx)` to `(blocklist_hash, idx, hash)` so that inserts would be
naturally ordered. However this requires a new index `on blocks (hash)`
so that we can still look up blocks by hash, and turns out to be
strictly worse than what we already have.
- Removing the primary key entirely and the `WITHOUT ROWID` to make it a
rowid table without any required order, and an index as above. This is
faster when the table is small, but becomes slower when it's large (due
to dual indexes I guess).
These are the benchmark results from current `main`, the second
alternative below ("Index(hash)") and this proposal that retains the
combined primary key ("combined"). Overall it ends up being about 65%
faster.
<img width="764" height="452" alt="Screenshot 2025-08-29 at 14 36 28"
src="https://github.com/user-attachments/assets/bff3f9d1-916a-485f-91b7-b54b477f5aac"
/>
Ref #10264
This commit is contained in:
@@ -7,7 +7,6 @@
|
||||
package sqlite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -30,19 +29,14 @@ func BenchmarkUpdate(b *testing.B) {
|
||||
b.Fatal(err)
|
||||
}
|
||||
})
|
||||
svc := db.Service(time.Hour).(*Service)
|
||||
|
||||
fs := make([]protocol.FileInfo, 100)
|
||||
|
||||
seed := 0
|
||||
size := 1000
|
||||
const numBlocks = 1000
|
||||
|
||||
size := 10000
|
||||
for size < 200_000 {
|
||||
t0 := time.Now()
|
||||
if err := svc.periodic(context.Background()); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.Log("garbage collect in", time.Since(t0))
|
||||
|
||||
for {
|
||||
local, err := db.CountLocal(folderID, protocol.LocalDeviceID)
|
||||
if err != nil {
|
||||
@@ -53,7 +47,7 @@ func BenchmarkUpdate(b *testing.B) {
|
||||
}
|
||||
fs := make([]protocol.FileInfo, 1000)
|
||||
for i := range fs {
|
||||
fs[i] = genFile(rand.String(24), 64, 0)
|
||||
fs[i] = genFile(rand.String(24), numBlocks, 0)
|
||||
}
|
||||
if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil {
|
||||
b.Fatal(err)
|
||||
@@ -63,7 +57,7 @@ func BenchmarkUpdate(b *testing.B) {
|
||||
b.Run(fmt.Sprintf("n=Insert100Loc/size=%d", size), func(b *testing.B) {
|
||||
for range b.N {
|
||||
for i := range fs {
|
||||
fs[i] = genFile(rand.String(24), 64, 0)
|
||||
fs[i] = genFile(rand.String(24), numBlocks, 0)
|
||||
}
|
||||
if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil {
|
||||
b.Fatal(err)
|
||||
@@ -146,6 +140,20 @@ func BenchmarkUpdate(b *testing.B) {
|
||||
b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "files/s")
|
||||
})
|
||||
|
||||
b.Run(fmt.Sprintf("n=AllLocalBlocksWithHash/size=%d", size), func(b *testing.B) {
|
||||
count := 0
|
||||
for range b.N {
|
||||
it, errFn := db.AllLocalBlocksWithHash(folderID, globalFi.Blocks[0].Hash)
|
||||
for range it {
|
||||
count++
|
||||
}
|
||||
if err := errFn(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "blocks/s")
|
||||
})
|
||||
|
||||
b.Run(fmt.Sprintf("n=GetDeviceSequenceLoc/size=%d", size), func(b *testing.B) {
|
||||
for range b.N {
|
||||
_, err := db.GetDeviceSequence(folderID, protocol.LocalDeviceID)
|
||||
@@ -193,7 +201,7 @@ func BenchmarkUpdate(b *testing.B) {
|
||||
b.ReportMetric(float64(count)/b.Elapsed().Seconds(), "files/s")
|
||||
})
|
||||
|
||||
size <<= 1
|
||||
size += 1000
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -114,11 +114,9 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro
|
||||
if err != nil {
|
||||
return wrap(err, "marshal blocklist")
|
||||
}
|
||||
if _, err := insertBlockListStmt.Exec(f.BlocksHash, bs); err != nil {
|
||||
if res, err := insertBlockListStmt.Exec(f.BlocksHash, bs); err != nil {
|
||||
return wrap(err, "insert blocklist")
|
||||
}
|
||||
|
||||
if device == protocol.LocalDeviceID {
|
||||
} else if aff, _ := res.RowsAffected(); aff != 0 && device == protocol.LocalDeviceID {
|
||||
// Insert all blocks
|
||||
if err := s.insertBlocksLocked(txp, f.BlocksHash, f.Blocks); err != nil {
|
||||
return wrap(err, "insert blocks")
|
||||
|
||||
@@ -21,14 +21,14 @@ CREATE TABLE IF NOT EXISTS blocklists (
|
||||
--
|
||||
-- For all local files we store the blocks individually for quick lookup. A
|
||||
-- given block can exist in multiple blocklists and at multiple offsets in a
|
||||
-- blocklist.
|
||||
-- blocklist. We eschew most indexes here as inserting millions of blocks is
|
||||
-- common and performance is critical.
|
||||
CREATE TABLE IF NOT EXISTS blocks (
|
||||
hash BLOB NOT NULL,
|
||||
blocklist_hash BLOB NOT NULL,
|
||||
idx INTEGER NOT NULL,
|
||||
offset INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
PRIMARY KEY (hash, blocklist_hash, idx),
|
||||
FOREIGN KEY(blocklist_hash) REFERENCES blocklists(blocklist_hash) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED
|
||||
PRIMARY KEY(hash, blocklist_hash, idx)
|
||||
) STRICT, WITHOUT ROWID
|
||||
;
|
||||
|
||||
Reference in New Issue
Block a user