From 9ee208b441decde2383936ee3f5593e761d35395 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Fri, 12 Sep 2025 09:27:41 +0000 Subject: [PATCH] chore(sqlite): use normalised tables for file names and versions (#10383) This changes the files table to use normalisation for the names and versions. The idea is that these are often common between all remote devices, and repeating an integer is more efficient than repeating a long string. A new benchmark bears this out; for a database with 100k files shared between 31 devices, with some worst case assumption on version vector size, the database is reduced in size by 50% and the test finishes quicker: Current: db_bench_test.go:322: Total size: 6263.70 MiB --- PASS: TestBenchmarkSizeManyFilesRemotes (1084.89s) New: db_bench_test.go:326: Total size: 3049.95 MiB --- PASS: TestBenchmarkSizeManyFilesRemotes (776.97s) The other benchmarks end up about the same within the margin of variability, with one possible exception being that RemoteNeed seems to be a little slower on average: old files/s new files/s Update/n=RemoteNeed/size=1000-8 5.051k 4.654k Update/n=RemoteNeed/size=2000-8 5.201k 4.384k Update/n=RemoteNeed/size=4000-8 4.943k 4.242k Update/n=RemoteNeed/size=8000-8 5.099k 3.527k Update/n=RemoteNeed/size=16000-8 3.686k 3.847k Update/n=RemoteNeed/size=30000-8 4.456k 3.482k I'm not sure why, possibly that query can be optimised anyhow. Signed-off-by: Jakob Borg --- cmd/syncthing/perfstats_unix.go | 21 +------ internal/db/sqlite/basedb.go | 52 +++++++++++++++- internal/db/sqlite/db_bench_test.go | 62 ++++++++++++++++++- internal/db/sqlite/db_service.go | 31 ++++++++++ internal/db/sqlite/folderdb_counts.go | 4 +- internal/db/sqlite/folderdb_global.go | 35 ++++++----- internal/db/sqlite/folderdb_local.go | 20 +++--- internal/db/sqlite/folderdb_open.go | 13 ++-- internal/db/sqlite/folderdb_update.go | 62 +++++++++++++++---- .../migrations/folder/05-normalize-files.sql | 53 ++++++++++++++++ .../db/sqlite/sql/schema/folder/20-files.sql | 32 +++++++--- lib/osutil/osutil.go | 19 ++++++ 12 files changed, 332 insertions(+), 72 deletions(-) create mode 100644 internal/db/sqlite/sql/migrations/folder/05-normalize-files.sql diff --git a/cmd/syncthing/perfstats_unix.go b/cmd/syncthing/perfstats_unix.go index 890320b94..b6230dc4e 100644 --- a/cmd/syncthing/perfstats_unix.go +++ b/cmd/syncthing/perfstats_unix.go @@ -18,6 +18,7 @@ import ( "github.com/syncthing/syncthing/lib/build" "github.com/syncthing/syncthing/lib/locations" + "github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/protocol" "golang.org/x/exp/constraints" ) @@ -61,7 +62,7 @@ func savePerfStats(file string) { rss, rate(prevIn, in, timeDiff, 1e3), rate(prevOut, out, timeDiff, 1e3), - dirsize(locations.Get(locations.Database))/1024, + osutil.DirSize(locations.Get(locations.Database))/1024, ) prevTime = t @@ -84,21 +85,3 @@ func rate[T number](prev, cur T, d time.Duration, div float64) float64 { rate := float64(diff) / d.Seconds() / div return rate } - -func dirsize(location string) int64 { - entries, err := os.ReadDir(location) - if err != nil { - return 0 - } - - var size int64 - for _, entry := range entries { - fi, err := entry.Info() - if err != nil { - continue - } - size += fi.Size() - } - - return size -} diff --git a/internal/db/sqlite/basedb.go b/internal/db/sqlite/basedb.go index 90ccc767d..b6c32de8b 100644 --- a/internal/db/sqlite/basedb.go +++ b/internal/db/sqlite/basedb.go @@ -7,6 +7,7 @@ package sqlite import ( + "context" "database/sql" "embed" "io/fs" @@ -26,7 +27,7 @@ import ( ) const ( - currentSchemaVersion = 4 + currentSchemaVersion = 5 applicationIDMain = 0x53546d6e // "STmn", Syncthing main database applicationIDFolder = 0x53546664 // "STfd", Syncthing folder database ) @@ -87,7 +88,31 @@ func openBase(path string, maxConns int, pragmas, schemaScripts, migrationScript }, } - tx, err := db.sql.Beginx() + // Create a specific connection for the schema setup and migration to + // run in. We do this because we need to disable foreign keys for the + // duration, which is a thing that needs to happen outside of a + // transaction and affects the connection it's run on. So we need to a) + // make sure all our commands run on this specific connection (which the + // transaction accomplishes naturally) and b) make sure these pragmas + // don't leak to anyone else afterwards. + ctx := context.TODO() + conn, err := db.sql.Connx(ctx) + if err != nil { + return nil, wrap(err) + } + defer func() { + _, _ = conn.ExecContext(ctx, "PRAGMA foreign_keys = ON") + _, _ = conn.ExecContext(ctx, "PRAGMA legacy_alter_table = OFF") + conn.Close() + }() + if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = OFF"); err != nil { + return nil, wrap(err) + } + if _, err := conn.ExecContext(ctx, "PRAGMA legacy_alter_table = ON"); err != nil { + return nil, wrap(err) + } + + tx, err := conn.BeginTxx(ctx, nil) if err != nil { return nil, wrap(err) } @@ -124,6 +149,22 @@ func openBase(path string, maxConns int, pragmas, schemaScripts, migrationScript return nil, wrap(err) } } + + // Run the initial schema scripts once more. This is generally a + // no-op. However, dropping a table removes associated triggers etc, + // and that's a thing we sometimes do in migrations. To avoid having + // to repeat the setup of associated triggers and indexes in the + // migration, we re-run the initial schema scripts. + for _, script := range schemaScripts { + if err := db.runScripts(tx, script); err != nil { + return nil, wrap(err) + } + } + + // Finally, ensure nothing we've done along the way has violated key integrity. + if _, err := conn.ExecContext(ctx, "PRAGMA foreign_key_check"); err != nil { + return nil, wrap(err) + } } // Set the current schema version, if not already set @@ -271,7 +312,12 @@ nextScript: // also statement-internal semicolons in the triggers. for _, stmt := range strings.Split(string(bs), "\n;") { if _, err := tx.Exec(s.expandTemplateVars(stmt)); err != nil { - return wrap(err, stmt) + if strings.Contains(stmt, "syncthing:ignore-failure") { + // We're ok with this failing. Just note it. + slog.Debug("Script failed, but with ignore-failure annotation", slog.String("script", scr), slogutil.Error(wrap(err, stmt))) + } else { + return wrap(err, stmt) + } } } } diff --git a/internal/db/sqlite/db_bench_test.go b/internal/db/sqlite/db_bench_test.go index 5c4f07b14..b35432e33 100644 --- a/internal/db/sqlite/db_bench_test.go +++ b/internal/db/sqlite/db_bench_test.go @@ -8,11 +8,13 @@ package sqlite import ( "fmt" + "os" "testing" "time" "github.com/syncthing/syncthing/internal/timeutil" "github.com/syncthing/syncthing/lib/config" + "github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/protocol" "github.com/syncthing/syncthing/lib/rand" ) @@ -223,7 +225,7 @@ func BenchmarkUpdate(b *testing.B) { } func TestBenchmarkDropAllRemote(t *testing.T) { - if testing.Short() { + if testing.Short() || os.Getenv("LONG_TEST") == "" { t.Skip("slow test") } @@ -266,3 +268,61 @@ func TestBenchmarkDropAllRemote(t *testing.T) { d := time.Since(t0) t.Log("drop all took", d) } + +func TestBenchmarkSizeManyFilesRemotes(t *testing.T) { + // Reports the database size for a setup with many files and many remote + // devices each announcing every files, with fairly long file names and + // "worst case" version vectors. + + if testing.Short() || os.Getenv("LONG_TEST") == "" { + t.Skip("slow test") + } + + dir := t.TempDir() + db, err := Open(dir) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if err := db.Close(); err != nil { + t.Fatal(err) + } + }) + + // This is equivalent to about 800 GiB in 100k files (i.e., 8 MiB per + // file), shared between 31 devices where each have touched every file. + const numFiles = 1e5 + const numRemotes = 30 + const numBlocks = 64 + const filenameLen = 64 + + fs := make([]protocol.FileInfo, 1000) + n := 0 + seq := 0 + for n < numFiles { + for i := range fs { + seq++ + fs[i] = genFile(rand.String(filenameLen), numBlocks, seq) + for r := range numRemotes { + fs[i].Version = fs[i].Version.Update(42 + protocol.ShortID(r)) + } + } + if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil { + t.Fatal(err) + } + for r := range numRemotes { + if err := db.Update(folderID, protocol.DeviceID{byte(42 + r)}, fs); err != nil { + t.Fatal(err) + } + } + n += len(fs) + t.Log(n, (numRemotes+1)*n) + } + + if err := db.Close(); err != nil { + t.Fatal(err) + } + + size := osutil.DirSize(dir) + t.Logf("Total size: %.02f MiB", float64(size)/1024/1024) +} diff --git a/internal/db/sqlite/db_service.go b/internal/db/sqlite/db_service.go index 47b5d30fa..792a1eca3 100644 --- a/internal/db/sqlite/db_service.go +++ b/internal/db/sqlite/db_service.go @@ -125,6 +125,9 @@ func (s *Service) periodic(ctx context.Context) error { if err := garbageCollectOldDeletedLocked(ctx, fdb); err != nil { return wrap(err) } + if err := garbageCollectNamesAndVersions(ctx, fdb); err != nil { + return wrap(err) + } if err := garbageCollectBlocklistsAndBlocksLocked(ctx, fdb); err != nil { return wrap(err) } @@ -152,6 +155,34 @@ func tidy(ctx context.Context, db *sqlx.DB) error { return nil } +func garbageCollectNamesAndVersions(ctx context.Context, fdb *folderDB) error { + l := slog.With("folder", fdb.folderID, "fdb", fdb.baseName) + + res, err := fdb.stmt(` + DELETE FROM file_names + WHERE NOT EXISTS (SELECT 1 FROM files f WHERE f.name_idx = idx) + `).Exec() + if err != nil { + return wrap(err, "delete names") + } + if aff, err := res.RowsAffected(); err == nil { + l.DebugContext(ctx, "Removed old file names", "affected", aff) + } + + res, err = fdb.stmt(` + DELETE FROM file_versions + WHERE NOT EXISTS (SELECT 1 FROM files f WHERE f.version_idx = idx) + `).Exec() + if err != nil { + return wrap(err, "delete versions") + } + if aff, err := res.RowsAffected(); err == nil { + l.DebugContext(ctx, "Removed old file versions", "affected", aff) + } + + return nil +} + func garbageCollectOldDeletedLocked(ctx context.Context, fdb *folderDB) error { l := slog.With("folder", fdb.folderID, "fdb", fdb.baseName) if fdb.deleteRetention <= 0 { diff --git a/internal/db/sqlite/folderdb_counts.go b/internal/db/sqlite/folderdb_counts.go index d83da2793..e7394d835 100644 --- a/internal/db/sqlite/folderdb_counts.go +++ b/internal/db/sqlite/folderdb_counts.go @@ -84,7 +84,7 @@ func (s *folderDB) needSizeRemote(device protocol.DeviceID) (db.Counts, error) { WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND NOT g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND NOT EXISTS ( SELECT 1 FROM FILES f INNER JOIN devices d ON d.idx = f.device_idx - WHERE f.name = g.name AND f.version = g.version AND d.device_id = ? + WHERE f.name_idx = g.name_idx AND f.version_idx = g.version_idx AND d.device_id = ? ) GROUP BY g.type, g.local_flags, g.deleted @@ -94,7 +94,7 @@ func (s *folderDB) needSizeRemote(device protocol.DeviceID) (db.Counts, error) { WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND EXISTS ( SELECT 1 FROM FILES f INNER JOIN devices d ON d.idx = f.device_idx - WHERE f.name = g.name AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0 + WHERE f.name_idx = g.name_idx AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0 ) GROUP BY g.type, g.local_flags, g.deleted `).Select(&res, device.String(), diff --git a/internal/db/sqlite/folderdb_global.go b/internal/db/sqlite/folderdb_global.go index 256c164bc..e3d90de30 100644 --- a/internal/db/sqlite/folderdb_global.go +++ b/internal/db/sqlite/folderdb_global.go @@ -27,7 +27,8 @@ func (s *folderDB) GetGlobalFile(file string) (protocol.FileInfo, bool, error) { SELECT fi.fiprotobuf, bl.blprotobuf FROM fileinfos fi INNER JOIN files f on fi.sequence = f.sequence LEFT JOIN blocklists bl ON bl.blocklist_hash = f.blocklist_hash - WHERE f.name = ? AND f.local_flags & {{.FlagLocalGlobal}} != 0 + INNER JOIN file_names n ON f.name_idx = n.idx + WHERE n.name = ? AND f.local_flags & {{.FlagLocalGlobal}} != 0 `).Get(&ind, file) if errors.Is(err, sql.ErrNoRows) { return protocol.FileInfo{}, false, nil @@ -49,8 +50,9 @@ func (s *folderDB) GetGlobalAvailability(file string) ([]protocol.DeviceID, erro err := s.stmt(` SELECT d.device_id FROM files f INNER JOIN devices d ON d.idx = f.device_idx - INNER JOIN files g ON g.version = f.version AND g.name = f.name - WHERE g.name = ? AND g.local_flags & {{.FlagLocalGlobal}} != 0 AND f.device_idx != {{.LocalDeviceIdx}} + INNER JOIN files g ON g.version_idx = f.version_idx AND g.name_idx = f.name_idx + INNER JOIN file_names n ON f.name_idx = n.idx + WHERE n.name = ? AND g.local_flags & {{.FlagLocalGlobal}} != 0 AND f.device_idx != {{.LocalDeviceIdx}} ORDER BY d.device_id `).Select(&devStrs, file) if errors.Is(err, sql.ErrNoRows) { @@ -74,9 +76,10 @@ func (s *folderDB) GetGlobalAvailability(file string) ([]protocol.DeviceID, erro func (s *folderDB) AllGlobalFiles() (iter.Seq[db.FileMetadata], func() error) { it, errFn := iterStructs[db.FileMetadata](s.stmt(` - SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f + SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f + INNER JOIN file_names n ON f.name_idx = n.idx WHERE f.local_flags & {{.FlagLocalGlobal}} != 0 - ORDER BY f.name + ORDER BY n.name `).Queryx()) return itererr.Map(it, errFn, func(m db.FileMetadata) (db.FileMetadata, error) { m.Name = osutil.NativeFilename(m.Name) @@ -93,9 +96,10 @@ func (s *folderDB) AllGlobalFilesPrefix(prefix string) (iter.Seq[db.FileMetadata end := prefixEnd(prefix) it, errFn := iterStructs[db.FileMetadata](s.stmt(` - SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f - WHERE f.name >= ? AND f.name < ? AND f.local_flags & {{.FlagLocalGlobal}} != 0 - ORDER BY f.name + SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f + INNER JOIN file_names n ON f.name_idx = n.idx + WHERE n.name >= ? AND n.name < ? AND f.local_flags & {{.FlagLocalGlobal}} != 0 + ORDER BY n.name `).Queryx(prefix, end)) return itererr.Map(it, errFn, func(m db.FileMetadata) (db.FileMetadata, error) { m.Name = osutil.NativeFilename(m.Name) @@ -109,7 +113,7 @@ func (s *folderDB) AllNeededGlobalFiles(device protocol.DeviceID, order config.P case config.PullOrderRandom: selectOpts = "ORDER BY RANDOM()" case config.PullOrderAlphabetic: - selectOpts = "ORDER BY g.name ASC" + selectOpts = "ORDER BY n.name ASC" case config.PullOrderSmallestFirst: selectOpts = "ORDER BY g.size ASC" case config.PullOrderLargestFirst: @@ -137,9 +141,10 @@ func (s *folderDB) AllNeededGlobalFiles(device protocol.DeviceID, order config.P func (s *folderDB) neededGlobalFilesLocal(selectOpts string) (iter.Seq[protocol.FileInfo], func() error) { // Select all the non-ignored files with the need bit set. it, errFn := iterStructs[indirectFI](s.stmt(` - SELECT fi.fiprotobuf, bl.blprotobuf, g.name, g.size, g.modified FROM fileinfos fi + SELECT fi.fiprotobuf, bl.blprotobuf, n.name, g.size, g.modified FROM fileinfos fi INNER JOIN files g on fi.sequence = g.sequence LEFT JOIN blocklists bl ON bl.blocklist_hash = g.blocklist_hash + INNER JOIN file_names n ON g.name_idx = n.idx WHERE g.local_flags & {{.FlagLocalIgnored}} = 0 AND g.local_flags & {{.FlagLocalNeeded}} != 0 ` + selectOpts).Queryx()) return itererr.Map(it, errFn, indirectFI.FileInfo) @@ -155,24 +160,26 @@ func (s *folderDB) neededGlobalFilesRemote(device protocol.DeviceID, selectOpts // non-deleted and valid remote file (of any version) it, errFn := iterStructs[indirectFI](s.stmt(` - SELECT fi.fiprotobuf, bl.blprotobuf, g.name, g.size, g.modified FROM fileinfos fi + SELECT fi.fiprotobuf, bl.blprotobuf, n.name, g.size, g.modified FROM fileinfos fi INNER JOIN files g on fi.sequence = g.sequence LEFT JOIN blocklists bl ON bl.blocklist_hash = g.blocklist_hash + INNER JOIN file_names n ON g.name_idx = n.idx WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND NOT g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND NOT EXISTS ( SELECT 1 FROM FILES f INNER JOIN devices d ON d.idx = f.device_idx - WHERE f.name = g.name AND f.version = g.version AND d.device_id = ? + WHERE f.name_idx = g.name_idx AND f.version_idx = g.version_idx AND d.device_id = ? ) UNION ALL - SELECT fi.fiprotobuf, bl.blprotobuf, g.name, g.size, g.modified FROM fileinfos fi + SELECT fi.fiprotobuf, bl.blprotobuf, n.name, g.size, g.modified FROM fileinfos fi INNER JOIN files g on fi.sequence = g.sequence LEFT JOIN blocklists bl ON bl.blocklist_hash = g.blocklist_hash + INNER JOIN file_names n ON g.name_idx = n.idx WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND EXISTS ( SELECT 1 FROM FILES f INNER JOIN devices d ON d.idx = f.device_idx - WHERE f.name = g.name AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0 + WHERE f.name_idx = g.name_idx AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0 ) `+selectOpts).Queryx( device.String(), diff --git a/internal/db/sqlite/folderdb_local.go b/internal/db/sqlite/folderdb_local.go index 53319a79d..6c1286937 100644 --- a/internal/db/sqlite/folderdb_local.go +++ b/internal/db/sqlite/folderdb_local.go @@ -32,7 +32,8 @@ func (s *folderDB) GetDeviceFile(device protocol.DeviceID, file string) (protoco INNER JOIN files f on fi.sequence = f.sequence LEFT JOIN blocklists bl ON bl.blocklist_hash = f.blocklist_hash INNER JOIN devices d ON f.device_idx = d.idx - WHERE d.device_id = ? AND f.name = ? + INNER JOIN file_names n ON f.name_idx = n.idx + WHERE d.device_id = ? AND n.name = ? `).Get(&ind, device.String(), file) if errors.Is(err, sql.ErrNoRows) { return protocol.FileInfo{}, false, nil @@ -87,14 +88,16 @@ func (s *folderDB) AllLocalFilesWithPrefix(device protocol.DeviceID, prefix stri INNER JOIN files f on fi.sequence = f.sequence LEFT JOIN blocklists bl ON bl.blocklist_hash = f.blocklist_hash INNER JOIN devices d ON d.idx = f.device_idx - WHERE d.device_id = ? AND f.name >= ? AND f.name < ? + INNER JOIN file_names n ON f.name_idx = n.idx + WHERE d.device_id = ? AND n.name >= ? AND n.name < ? `, device.String(), prefix, end)) return itererr.Map(it, errFn, indirectFI.FileInfo) } func (s *folderDB) AllLocalFilesWithBlocksHash(h []byte) (iter.Seq[db.FileMetadata], func() error) { return iterStructs[db.FileMetadata](s.stmt(` - SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f + SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f + INNER JOIN file_names n ON f.name_idx = n.idx WHERE f.device_idx = {{.LocalDeviceIdx}} AND f.blocklist_hash = ? `).Queryx(h)) } @@ -104,7 +107,8 @@ func (s *folderDB) AllLocalBlocksWithHash(hash []byte) (iter.Seq[db.BlockMapEntr // & blocklists is deferred (garbage collected) while the files list is // not. This filters out blocks that are in fact deleted. return iterStructs[db.BlockMapEntry](s.stmt(` - SELECT f.blocklist_hash as blocklisthash, b.idx as blockindex, b.offset, b.size, f.name as filename FROM files f + SELECT f.blocklist_hash as blocklisthash, b.idx as blockindex, b.offset, b.size, n.name as filename FROM files f + INNER JOIN file_names n ON f.name_idx = n.idx LEFT JOIN blocks b ON f.blocklist_hash = b.blocklist_hash WHERE f.device_idx = {{.LocalDeviceIdx}} AND b.hash = ? `).Queryx(hash)) @@ -170,10 +174,12 @@ func (s *folderDB) DebugFilePattern(out io.Writer, name string) error { } name = "%" + name + "%" res := itererr.Zip(iterStructs[hashFileMetadata](s.stmt(` - SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags, f.version, f.blocklist_hash as blocklisthash, d.device_id as deviceid FROM files f + SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags, v.version, f.blocklist_hash as blocklisthash, d.device_id as deviceid FROM files f INNER JOIN devices d ON d.idx = f.device_idx - WHERE f.name LIKE ? - ORDER BY f.name, f.device_idx + INNER JOIN file_names n ON n.idx = f.name_idx + INNER JOIN file_versions v ON v.idx = f.version_idx + WHERE n.name LIKE ? + ORDER BY n.name, f.device_idx `).Queryx(name))) delMap := map[bool]string{ diff --git a/internal/db/sqlite/folderdb_open.go b/internal/db/sqlite/folderdb_open.go index 11c602fa4..2540508e5 100644 --- a/internal/db/sqlite/folderdb_open.go +++ b/internal/db/sqlite/folderdb_open.go @@ -95,16 +95,13 @@ func openFolderDBForMigration(folder, path string, deleteRetention time.Duration func (s *folderDB) deviceIdxLocked(deviceID protocol.DeviceID) (int64, error) { devStr := deviceID.String() - if _, err := s.stmt(` - INSERT OR IGNORE INTO devices(device_id) - VALUES (?) - `).Exec(devStr); err != nil { - return 0, wrap(err) - } var idx int64 if err := s.stmt(` - SELECT idx FROM devices - WHERE device_id = ? + INSERT INTO devices(device_id) + VALUES (?) + ON CONFLICT(device_id) DO UPDATE + SET device_id = excluded.device_id + RETURNING idx `).Get(&idx, devStr); err != nil { return 0, wrap(err) } diff --git a/internal/db/sqlite/folderdb_update.go b/internal/db/sqlite/folderdb_update.go index 53ef44569..27ee48f01 100644 --- a/internal/db/sqlite/folderdb_update.go +++ b/internal/db/sqlite/folderdb_update.go @@ -46,9 +46,33 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro defer tx.Rollback() //nolint:errcheck txp := &txPreparedStmts{Tx: tx} + //nolint:sqlclosecheck + insertNameStmt, err := txp.Preparex(` + INSERT INTO file_names(name) + VALUES (?) + ON CONFLICT(name) DO UPDATE + SET name = excluded.name + RETURNING idx + `) + if err != nil { + return wrap(err, "prepare insert name") + } + + //nolint:sqlclosecheck + insertVersionStmt, err := txp.Preparex(` + INSERT INTO file_versions (version) + VALUES (?) + ON CONFLICT(version) DO UPDATE + SET version = excluded.version + RETURNING idx + `) + if err != nil { + return wrap(err, "prepare insert version") + } + //nolint:sqlclosecheck insertFileStmt, err := txp.Preparex(` - INSERT OR REPLACE INTO files (device_idx, remote_sequence, name, type, modified, size, version, deleted, local_flags, blocklist_hash) + INSERT OR REPLACE INTO files (device_idx, remote_sequence, type, modified, size, deleted, local_flags, blocklist_hash, name_idx, version_idx) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING sequence `) @@ -102,8 +126,19 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro prevRemoteSeq = f.Sequence remoteSeq = &f.Sequence } + + var nameIdx int64 + if err := insertNameStmt.Get(&nameIdx, f.Name); err != nil { + return wrap(err, "insert name") + } + + var versionIdx int64 + if err := insertVersionStmt.Get(&versionIdx, f.Version.String()); err != nil { + return wrap(err, "insert version") + } + var localSeq int64 - if err := insertFileStmt.Get(&localSeq, deviceIdx, remoteSeq, f.Name, f.Type, f.ModTime().UnixNano(), f.Size, f.Version.String(), f.IsDeleted(), f.LocalFlags, blockshash); err != nil { + if err := insertFileStmt.Get(&localSeq, deviceIdx, remoteSeq, f.Type, f.ModTime().UnixNano(), f.Size, f.IsDeleted(), f.LocalFlags, blockshash, nameIdx, versionIdx); err != nil { return wrap(err, "insert file") } @@ -246,7 +281,9 @@ func (s *folderDB) DropFilesNamed(device protocol.DeviceID, names []string) erro query, args, err := sqlx.In(` DELETE FROM files - WHERE device_idx = ? AND name IN (?) + WHERE device_idx = ? AND name_idx IN ( + SELECT idx FROM file_names WHERE name IN (?) + ) `, deviceIdx, names) if err != nil { return wrap(err) @@ -299,12 +336,13 @@ func (s *folderDB) recalcGlobalForFolder(txp *txPreparedStmts) error { // recalculate. //nolint:sqlclosecheck namesStmt, err := txp.Preparex(` - SELECT f.name FROM files f + SELECT n.name FROM files f + INNER JOIN file_names n ON n.idx = f.name_idx WHERE NOT EXISTS ( SELECT 1 FROM files g - WHERE g.name = f.name AND g.local_flags & ? != 0 + WHERE g.name_idx = f.name_idx AND g.local_flags & ? != 0 ) - GROUP BY name + GROUP BY n.name `) if err != nil { return wrap(err) @@ -329,11 +367,13 @@ func (s *folderDB) recalcGlobalForFolder(txp *txPreparedStmts) error { func (s *folderDB) recalcGlobalForFile(txp *txPreparedStmts, file string) error { //nolint:sqlclosecheck selStmt, err := txp.Preparex(` - SELECT name, device_idx, sequence, modified, version, deleted, local_flags FROM files - WHERE name = ? + SELECT n.name, f.device_idx, f.sequence, f.modified, v.version, f.deleted, f.local_flags FROM files f + INNER JOIN file_versions v ON v.idx = f.version_idx + INNER JOIN file_names n ON n.idx = f.name_idx + WHERE n.name = ? `) if err != nil { - return wrap(err) + return wrap(err, "prepare select") } es, err := itererr.Collect(iterStructs[fileRow](selStmt.Queryx(file))) if err != nil { @@ -389,10 +429,10 @@ func (s *folderDB) recalcGlobalForFile(txp *txPreparedStmts, file string) error //nolint:sqlclosecheck upStmt, err = txp.Preparex(` UPDATE files SET local_flags = local_flags & ? - WHERE name = ? AND sequence != ? AND local_flags & ? != 0 + WHERE name_idx = (SELECT idx FROM file_names WHERE name = ?) AND sequence != ? AND local_flags & ? != 0 `) if err != nil { - return wrap(err) + return wrap(err, "prepare update") } if _, err := upStmt.Exec(^(protocol.FlagLocalNeeded | protocol.FlagLocalGlobal), global.Name, global.Sequence, protocol.FlagLocalNeeded|protocol.FlagLocalGlobal); err != nil { return wrap(err) diff --git a/internal/db/sqlite/sql/migrations/folder/05-normalize-files.sql b/internal/db/sqlite/sql/migrations/folder/05-normalize-files.sql new file mode 100644 index 000000000..44b9094b8 --- /dev/null +++ b/internal/db/sqlite/sql/migrations/folder/05-normalize-files.sql @@ -0,0 +1,53 @@ +-- Copyright (C) 2025 The Syncthing Authors. +-- +-- This Source Code Form is subject to the terms of the Mozilla Public +-- License, v. 2.0. If a copy of the MPL was not distributed with this file, +-- You can obtain one at https://mozilla.org/MPL/2.0/. + +-- Grab all unique names into the names table + +INSERT INTO file_names (idx, name) SELECT DISTINCT null, name FROM files +; + +-- Grab all unique versions into the versions table + +INSERT INTO file_versions (idx, version) SELECT DISTINCT null, version FROM files +; + +-- Create the new files table + +DROP TABLE IF EXISTS files_v5 +; + +CREATE TABLE files_v5 ( + device_idx INTEGER NOT NULL, + sequence INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + remote_sequence INTEGER, + name_idx INTEGER NOT NULL, -- changed + type INTEGER NOT NULL, + modified INTEGER NOT NULL, + size INTEGER NOT NULL, + version_idx INTEGER NOT NULL, -- changed + deleted INTEGER NOT NULL, + local_flags INTEGER NOT NULL, + blocklist_hash BLOB, + FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE, + FOREIGN KEY(name_idx) REFERENCES file_names(idx), -- added + FOREIGN KEY(version_idx) REFERENCES file_versions(idx) -- added +) STRICT +; + +-- Populate the new files table and move it in place + +INSERT INTO files_v5 + SELECT f.device_idx, f.sequence, f.remote_sequence, n.idx as name_idx, f.type, f.modified, f.size, v.idx as version_idx, f.deleted, f.local_flags, f.blocklist_hash + FROM files f + INNER JOIN file_names n ON n.name = f.name + INNER JOIN file_versions v ON v.version = f.version +; + +DROP TABLE files +; + +ALTER TABLE files_v5 RENAME TO files +; diff --git a/internal/db/sqlite/sql/schema/folder/20-files.sql b/internal/db/sqlite/sql/schema/folder/20-files.sql index 6ac042948..5f7f6dcf0 100644 --- a/internal/db/sqlite/sql/schema/folder/20-files.sql +++ b/internal/db/sqlite/sql/schema/folder/20-files.sql @@ -25,15 +25,27 @@ CREATE TABLE IF NOT EXISTS files ( device_idx INTEGER NOT NULL, -- actual device ID or LocalDeviceID sequence INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, -- our local database sequence, for each and every entry remote_sequence INTEGER, -- remote device's sequence number, null for local or synthetic entries - name TEXT NOT NULL COLLATE BINARY, + name_idx INTEGER NOT NULL, type INTEGER NOT NULL, -- protocol.FileInfoType modified INTEGER NOT NULL, -- Unix nanos size INTEGER NOT NULL, - version TEXT NOT NULL COLLATE BINARY, + version_idx INTEGER NOT NULL, deleted INTEGER NOT NULL, -- boolean local_flags INTEGER NOT NULL, blocklist_hash BLOB, -- null when there are no blocks - FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE + FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE, + FOREIGN KEY(name_idx) REFERENCES file_names(idx), + FOREIGN KEY(version_idx) REFERENCES file_versions(idx) +) STRICT +; +CREATE TABLE IF NOT EXISTS file_names ( + idx INTEGER NOT NULL PRIMARY KEY, + name TEXT NOT NULL UNIQUE COLLATE BINARY +) STRICT +; +CREATE TABLE IF NOT EXISTS file_versions ( + idx INTEGER NOT NULL PRIMARY KEY, + version TEXT NOT NULL UNIQUE COLLATE BINARY ) STRICT ; -- FileInfos store the actual protobuf object. We do this separately to keep @@ -49,11 +61,17 @@ CREATE UNIQUE INDEX IF NOT EXISTS files_remote_sequence ON files (device_idx, re WHERE remote_sequence IS NOT NULL ; -- There can be only one file per folder, device, and name -CREATE UNIQUE INDEX IF NOT EXISTS files_device_name ON files (device_idx, name) -; --- We want to be able to look up & iterate files based on just folder and name -CREATE INDEX IF NOT EXISTS files_name_only ON files (name) +CREATE UNIQUE INDEX IF NOT EXISTS files_device_name ON files (device_idx, name_idx) ; -- We want to be able to look up & iterate files based on blocks hash CREATE INDEX IF NOT EXISTS files_blocklist_hash_only ON files (blocklist_hash, device_idx) WHERE blocklist_hash IS NOT NULL ; +-- We need to look by name_idx or version_idx for garbage collection. +-- This will fail pre-migration for v4 schemas, which is fine. +-- syncthing:ignore-failure +CREATE INDEX IF NOT EXISTS files_name_idx_only ON files (name_idx) +; +-- This will fail pre-migration for v4 schemas, which is fine. +-- syncthing:ignore-failure +CREATE INDEX IF NOT EXISTS files_version_idx_only ON files (version_idx) +; diff --git a/lib/osutil/osutil.go b/lib/osutil/osutil.go index 047b5101d..c70afdf92 100644 --- a/lib/osutil/osutil.go +++ b/lib/osutil/osutil.go @@ -8,6 +8,7 @@ package osutil import ( + "os" "path/filepath" "strings" "sync" @@ -142,3 +143,21 @@ func IsDeleted(ffs fs.Filesystem, name string) bool { } return false } + +func DirSize(location string) int64 { + entries, err := os.ReadDir(location) + if err != nil { + return 0 + } + + var size int64 + for _, entry := range entries { + fi, err := entry.Info() + if err != nil { + continue + } + size += fi.Size() + } + + return size +}