chore(sqlite): use normalised tables for file names and versions (#10383)

This changes the files table to use normalisation for the names and
versions. The idea is that these are often common between all remote
devices, and repeating an integer is more efficient than repeating a
long string. A new benchmark bears this out; for a database with 100k
files shared between 31 devices, with some worst case assumption on
version vector size, the database is reduced in size by 50% and the test
finishes quicker:

    Current:
        db_bench_test.go:322: Total size: 6263.70 MiB
    --- PASS: TestBenchmarkSizeManyFilesRemotes (1084.89s)

    New:
        db_bench_test.go:326: Total size: 3049.95 MiB
    --- PASS: TestBenchmarkSizeManyFilesRemotes (776.97s)

The other benchmarks end up about the same within the margin of
variability, with one possible exception being that RemoteNeed seems to
be a little slower on average:

                                          old files/s   new files/s
    Update/n=RemoteNeed/size=1000-8            5.051k        4.654k
    Update/n=RemoteNeed/size=2000-8            5.201k        4.384k
    Update/n=RemoteNeed/size=4000-8            4.943k        4.242k
    Update/n=RemoteNeed/size=8000-8            5.099k        3.527k
    Update/n=RemoteNeed/size=16000-8           3.686k        3.847k
    Update/n=RemoteNeed/size=30000-8           4.456k        3.482k

I'm not sure why, possibly that query can be optimised anyhow.

Signed-off-by: Jakob Borg <jakob@kastelo.net>
This commit is contained in:
Jakob Borg
2025-09-12 09:27:41 +00:00
committed by GitHub
parent dd90e8ec7a
commit 9ee208b441
12 changed files with 332 additions and 72 deletions

View File

@@ -7,6 +7,7 @@
package sqlite
import (
"context"
"database/sql"
"embed"
"io/fs"
@@ -26,7 +27,7 @@ import (
)
const (
currentSchemaVersion = 4
currentSchemaVersion = 5
applicationIDMain = 0x53546d6e // "STmn", Syncthing main database
applicationIDFolder = 0x53546664 // "STfd", Syncthing folder database
)
@@ -87,7 +88,31 @@ func openBase(path string, maxConns int, pragmas, schemaScripts, migrationScript
},
}
tx, err := db.sql.Beginx()
// Create a specific connection for the schema setup and migration to
// run in. We do this because we need to disable foreign keys for the
// duration, which is a thing that needs to happen outside of a
// transaction and affects the connection it's run on. So we need to a)
// make sure all our commands run on this specific connection (which the
// transaction accomplishes naturally) and b) make sure these pragmas
// don't leak to anyone else afterwards.
ctx := context.TODO()
conn, err := db.sql.Connx(ctx)
if err != nil {
return nil, wrap(err)
}
defer func() {
_, _ = conn.ExecContext(ctx, "PRAGMA foreign_keys = ON")
_, _ = conn.ExecContext(ctx, "PRAGMA legacy_alter_table = OFF")
conn.Close()
}()
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = OFF"); err != nil {
return nil, wrap(err)
}
if _, err := conn.ExecContext(ctx, "PRAGMA legacy_alter_table = ON"); err != nil {
return nil, wrap(err)
}
tx, err := conn.BeginTxx(ctx, nil)
if err != nil {
return nil, wrap(err)
}
@@ -124,6 +149,22 @@ func openBase(path string, maxConns int, pragmas, schemaScripts, migrationScript
return nil, wrap(err)
}
}
// Run the initial schema scripts once more. This is generally a
// no-op. However, dropping a table removes associated triggers etc,
// and that's a thing we sometimes do in migrations. To avoid having
// to repeat the setup of associated triggers and indexes in the
// migration, we re-run the initial schema scripts.
for _, script := range schemaScripts {
if err := db.runScripts(tx, script); err != nil {
return nil, wrap(err)
}
}
// Finally, ensure nothing we've done along the way has violated key integrity.
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_key_check"); err != nil {
return nil, wrap(err)
}
}
// Set the current schema version, if not already set
@@ -271,7 +312,12 @@ nextScript:
// also statement-internal semicolons in the triggers.
for _, stmt := range strings.Split(string(bs), "\n;") {
if _, err := tx.Exec(s.expandTemplateVars(stmt)); err != nil {
return wrap(err, stmt)
if strings.Contains(stmt, "syncthing:ignore-failure") {
// We're ok with this failing. Just note it.
slog.Debug("Script failed, but with ignore-failure annotation", slog.String("script", scr), slogutil.Error(wrap(err, stmt)))
} else {
return wrap(err, stmt)
}
}
}
}

View File

@@ -8,11 +8,13 @@ package sqlite
import (
"fmt"
"os"
"testing"
"time"
"github.com/syncthing/syncthing/internal/timeutil"
"github.com/syncthing/syncthing/lib/config"
"github.com/syncthing/syncthing/lib/osutil"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/rand"
)
@@ -223,7 +225,7 @@ func BenchmarkUpdate(b *testing.B) {
}
func TestBenchmarkDropAllRemote(t *testing.T) {
if testing.Short() {
if testing.Short() || os.Getenv("LONG_TEST") == "" {
t.Skip("slow test")
}
@@ -266,3 +268,61 @@ func TestBenchmarkDropAllRemote(t *testing.T) {
d := time.Since(t0)
t.Log("drop all took", d)
}
func TestBenchmarkSizeManyFilesRemotes(t *testing.T) {
// Reports the database size for a setup with many files and many remote
// devices each announcing every files, with fairly long file names and
// "worst case" version vectors.
if testing.Short() || os.Getenv("LONG_TEST") == "" {
t.Skip("slow test")
}
dir := t.TempDir()
db, err := Open(dir)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := db.Close(); err != nil {
t.Fatal(err)
}
})
// This is equivalent to about 800 GiB in 100k files (i.e., 8 MiB per
// file), shared between 31 devices where each have touched every file.
const numFiles = 1e5
const numRemotes = 30
const numBlocks = 64
const filenameLen = 64
fs := make([]protocol.FileInfo, 1000)
n := 0
seq := 0
for n < numFiles {
for i := range fs {
seq++
fs[i] = genFile(rand.String(filenameLen), numBlocks, seq)
for r := range numRemotes {
fs[i].Version = fs[i].Version.Update(42 + protocol.ShortID(r))
}
}
if err := db.Update(folderID, protocol.LocalDeviceID, fs); err != nil {
t.Fatal(err)
}
for r := range numRemotes {
if err := db.Update(folderID, protocol.DeviceID{byte(42 + r)}, fs); err != nil {
t.Fatal(err)
}
}
n += len(fs)
t.Log(n, (numRemotes+1)*n)
}
if err := db.Close(); err != nil {
t.Fatal(err)
}
size := osutil.DirSize(dir)
t.Logf("Total size: %.02f MiB", float64(size)/1024/1024)
}

View File

@@ -125,6 +125,9 @@ func (s *Service) periodic(ctx context.Context) error {
if err := garbageCollectOldDeletedLocked(ctx, fdb); err != nil {
return wrap(err)
}
if err := garbageCollectNamesAndVersions(ctx, fdb); err != nil {
return wrap(err)
}
if err := garbageCollectBlocklistsAndBlocksLocked(ctx, fdb); err != nil {
return wrap(err)
}
@@ -152,6 +155,34 @@ func tidy(ctx context.Context, db *sqlx.DB) error {
return nil
}
func garbageCollectNamesAndVersions(ctx context.Context, fdb *folderDB) error {
l := slog.With("folder", fdb.folderID, "fdb", fdb.baseName)
res, err := fdb.stmt(`
DELETE FROM file_names
WHERE NOT EXISTS (SELECT 1 FROM files f WHERE f.name_idx = idx)
`).Exec()
if err != nil {
return wrap(err, "delete names")
}
if aff, err := res.RowsAffected(); err == nil {
l.DebugContext(ctx, "Removed old file names", "affected", aff)
}
res, err = fdb.stmt(`
DELETE FROM file_versions
WHERE NOT EXISTS (SELECT 1 FROM files f WHERE f.version_idx = idx)
`).Exec()
if err != nil {
return wrap(err, "delete versions")
}
if aff, err := res.RowsAffected(); err == nil {
l.DebugContext(ctx, "Removed old file versions", "affected", aff)
}
return nil
}
func garbageCollectOldDeletedLocked(ctx context.Context, fdb *folderDB) error {
l := slog.With("folder", fdb.folderID, "fdb", fdb.baseName)
if fdb.deleteRetention <= 0 {

View File

@@ -84,7 +84,7 @@ func (s *folderDB) needSizeRemote(device protocol.DeviceID) (db.Counts, error) {
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND NOT g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND NOT EXISTS (
SELECT 1 FROM FILES f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name = g.name AND f.version = g.version AND d.device_id = ?
WHERE f.name_idx = g.name_idx AND f.version_idx = g.version_idx AND d.device_id = ?
)
GROUP BY g.type, g.local_flags, g.deleted
@@ -94,7 +94,7 @@ func (s *folderDB) needSizeRemote(device protocol.DeviceID) (db.Counts, error) {
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND EXISTS (
SELECT 1 FROM FILES f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name = g.name AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0
WHERE f.name_idx = g.name_idx AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0
)
GROUP BY g.type, g.local_flags, g.deleted
`).Select(&res, device.String(),

View File

@@ -27,7 +27,8 @@ func (s *folderDB) GetGlobalFile(file string) (protocol.FileInfo, bool, error) {
SELECT fi.fiprotobuf, bl.blprotobuf FROM fileinfos fi
INNER JOIN files f on fi.sequence = f.sequence
LEFT JOIN blocklists bl ON bl.blocklist_hash = f.blocklist_hash
WHERE f.name = ? AND f.local_flags & {{.FlagLocalGlobal}} != 0
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE n.name = ? AND f.local_flags & {{.FlagLocalGlobal}} != 0
`).Get(&ind, file)
if errors.Is(err, sql.ErrNoRows) {
return protocol.FileInfo{}, false, nil
@@ -49,8 +50,9 @@ func (s *folderDB) GetGlobalAvailability(file string) ([]protocol.DeviceID, erro
err := s.stmt(`
SELECT d.device_id FROM files f
INNER JOIN devices d ON d.idx = f.device_idx
INNER JOIN files g ON g.version = f.version AND g.name = f.name
WHERE g.name = ? AND g.local_flags & {{.FlagLocalGlobal}} != 0 AND f.device_idx != {{.LocalDeviceIdx}}
INNER JOIN files g ON g.version_idx = f.version_idx AND g.name_idx = f.name_idx
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE n.name = ? AND g.local_flags & {{.FlagLocalGlobal}} != 0 AND f.device_idx != {{.LocalDeviceIdx}}
ORDER BY d.device_id
`).Select(&devStrs, file)
if errors.Is(err, sql.ErrNoRows) {
@@ -74,9 +76,10 @@ func (s *folderDB) GetGlobalAvailability(file string) ([]protocol.DeviceID, erro
func (s *folderDB) AllGlobalFiles() (iter.Seq[db.FileMetadata], func() error) {
it, errFn := iterStructs[db.FileMetadata](s.stmt(`
SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f
SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE f.local_flags & {{.FlagLocalGlobal}} != 0
ORDER BY f.name
ORDER BY n.name
`).Queryx())
return itererr.Map(it, errFn, func(m db.FileMetadata) (db.FileMetadata, error) {
m.Name = osutil.NativeFilename(m.Name)
@@ -93,9 +96,10 @@ func (s *folderDB) AllGlobalFilesPrefix(prefix string) (iter.Seq[db.FileMetadata
end := prefixEnd(prefix)
it, errFn := iterStructs[db.FileMetadata](s.stmt(`
SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f
WHERE f.name >= ? AND f.name < ? AND f.local_flags & {{.FlagLocalGlobal}} != 0
ORDER BY f.name
SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE n.name >= ? AND n.name < ? AND f.local_flags & {{.FlagLocalGlobal}} != 0
ORDER BY n.name
`).Queryx(prefix, end))
return itererr.Map(it, errFn, func(m db.FileMetadata) (db.FileMetadata, error) {
m.Name = osutil.NativeFilename(m.Name)
@@ -109,7 +113,7 @@ func (s *folderDB) AllNeededGlobalFiles(device protocol.DeviceID, order config.P
case config.PullOrderRandom:
selectOpts = "ORDER BY RANDOM()"
case config.PullOrderAlphabetic:
selectOpts = "ORDER BY g.name ASC"
selectOpts = "ORDER BY n.name ASC"
case config.PullOrderSmallestFirst:
selectOpts = "ORDER BY g.size ASC"
case config.PullOrderLargestFirst:
@@ -137,9 +141,10 @@ func (s *folderDB) AllNeededGlobalFiles(device protocol.DeviceID, order config.P
func (s *folderDB) neededGlobalFilesLocal(selectOpts string) (iter.Seq[protocol.FileInfo], func() error) {
// Select all the non-ignored files with the need bit set.
it, errFn := iterStructs[indirectFI](s.stmt(`
SELECT fi.fiprotobuf, bl.blprotobuf, g.name, g.size, g.modified FROM fileinfos fi
SELECT fi.fiprotobuf, bl.blprotobuf, n.name, g.size, g.modified FROM fileinfos fi
INNER JOIN files g on fi.sequence = g.sequence
LEFT JOIN blocklists bl ON bl.blocklist_hash = g.blocklist_hash
INNER JOIN file_names n ON g.name_idx = n.idx
WHERE g.local_flags & {{.FlagLocalIgnored}} = 0 AND g.local_flags & {{.FlagLocalNeeded}} != 0
` + selectOpts).Queryx())
return itererr.Map(it, errFn, indirectFI.FileInfo)
@@ -155,24 +160,26 @@ func (s *folderDB) neededGlobalFilesRemote(device protocol.DeviceID, selectOpts
// non-deleted and valid remote file (of any version)
it, errFn := iterStructs[indirectFI](s.stmt(`
SELECT fi.fiprotobuf, bl.blprotobuf, g.name, g.size, g.modified FROM fileinfos fi
SELECT fi.fiprotobuf, bl.blprotobuf, n.name, g.size, g.modified FROM fileinfos fi
INNER JOIN files g on fi.sequence = g.sequence
LEFT JOIN blocklists bl ON bl.blocklist_hash = g.blocklist_hash
INNER JOIN file_names n ON g.name_idx = n.idx
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND NOT g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND NOT EXISTS (
SELECT 1 FROM FILES f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name = g.name AND f.version = g.version AND d.device_id = ?
WHERE f.name_idx = g.name_idx AND f.version_idx = g.version_idx AND d.device_id = ?
)
UNION ALL
SELECT fi.fiprotobuf, bl.blprotobuf, g.name, g.size, g.modified FROM fileinfos fi
SELECT fi.fiprotobuf, bl.blprotobuf, n.name, g.size, g.modified FROM fileinfos fi
INNER JOIN files g on fi.sequence = g.sequence
LEFT JOIN blocklists bl ON bl.blocklist_hash = g.blocklist_hash
INNER JOIN file_names n ON g.name_idx = n.idx
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND EXISTS (
SELECT 1 FROM FILES f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name = g.name AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0
WHERE f.name_idx = g.name_idx AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0
)
`+selectOpts).Queryx(
device.String(),

View File

@@ -32,7 +32,8 @@ func (s *folderDB) GetDeviceFile(device protocol.DeviceID, file string) (protoco
INNER JOIN files f on fi.sequence = f.sequence
LEFT JOIN blocklists bl ON bl.blocklist_hash = f.blocklist_hash
INNER JOIN devices d ON f.device_idx = d.idx
WHERE d.device_id = ? AND f.name = ?
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE d.device_id = ? AND n.name = ?
`).Get(&ind, device.String(), file)
if errors.Is(err, sql.ErrNoRows) {
return protocol.FileInfo{}, false, nil
@@ -87,14 +88,16 @@ func (s *folderDB) AllLocalFilesWithPrefix(device protocol.DeviceID, prefix stri
INNER JOIN files f on fi.sequence = f.sequence
LEFT JOIN blocklists bl ON bl.blocklist_hash = f.blocklist_hash
INNER JOIN devices d ON d.idx = f.device_idx
WHERE d.device_id = ? AND f.name >= ? AND f.name < ?
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE d.device_id = ? AND n.name >= ? AND n.name < ?
`, device.String(), prefix, end))
return itererr.Map(it, errFn, indirectFI.FileInfo)
}
func (s *folderDB) AllLocalFilesWithBlocksHash(h []byte) (iter.Seq[db.FileMetadata], func() error) {
return iterStructs[db.FileMetadata](s.stmt(`
SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f
SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags FROM files f
INNER JOIN file_names n ON f.name_idx = n.idx
WHERE f.device_idx = {{.LocalDeviceIdx}} AND f.blocklist_hash = ?
`).Queryx(h))
}
@@ -104,7 +107,8 @@ func (s *folderDB) AllLocalBlocksWithHash(hash []byte) (iter.Seq[db.BlockMapEntr
// & blocklists is deferred (garbage collected) while the files list is
// not. This filters out blocks that are in fact deleted.
return iterStructs[db.BlockMapEntry](s.stmt(`
SELECT f.blocklist_hash as blocklisthash, b.idx as blockindex, b.offset, b.size, f.name as filename FROM files f
SELECT f.blocklist_hash as blocklisthash, b.idx as blockindex, b.offset, b.size, n.name as filename FROM files f
INNER JOIN file_names n ON f.name_idx = n.idx
LEFT JOIN blocks b ON f.blocklist_hash = b.blocklist_hash
WHERE f.device_idx = {{.LocalDeviceIdx}} AND b.hash = ?
`).Queryx(hash))
@@ -170,10 +174,12 @@ func (s *folderDB) DebugFilePattern(out io.Writer, name string) error {
}
name = "%" + name + "%"
res := itererr.Zip(iterStructs[hashFileMetadata](s.stmt(`
SELECT f.sequence, f.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags, f.version, f.blocklist_hash as blocklisthash, d.device_id as deviceid FROM files f
SELECT f.sequence, n.name, f.type, f.modified as modnanos, f.size, f.deleted, f.local_flags as localflags, v.version, f.blocklist_hash as blocklisthash, d.device_id as deviceid FROM files f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name LIKE ?
ORDER BY f.name, f.device_idx
INNER JOIN file_names n ON n.idx = f.name_idx
INNER JOIN file_versions v ON v.idx = f.version_idx
WHERE n.name LIKE ?
ORDER BY n.name, f.device_idx
`).Queryx(name)))
delMap := map[bool]string{

View File

@@ -95,16 +95,13 @@ func openFolderDBForMigration(folder, path string, deleteRetention time.Duration
func (s *folderDB) deviceIdxLocked(deviceID protocol.DeviceID) (int64, error) {
devStr := deviceID.String()
if _, err := s.stmt(`
INSERT OR IGNORE INTO devices(device_id)
VALUES (?)
`).Exec(devStr); err != nil {
return 0, wrap(err)
}
var idx int64
if err := s.stmt(`
SELECT idx FROM devices
WHERE device_id = ?
INSERT INTO devices(device_id)
VALUES (?)
ON CONFLICT(device_id) DO UPDATE
SET device_id = excluded.device_id
RETURNING idx
`).Get(&idx, devStr); err != nil {
return 0, wrap(err)
}

View File

@@ -46,9 +46,33 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro
defer tx.Rollback() //nolint:errcheck
txp := &txPreparedStmts{Tx: tx}
//nolint:sqlclosecheck
insertNameStmt, err := txp.Preparex(`
INSERT INTO file_names(name)
VALUES (?)
ON CONFLICT(name) DO UPDATE
SET name = excluded.name
RETURNING idx
`)
if err != nil {
return wrap(err, "prepare insert name")
}
//nolint:sqlclosecheck
insertVersionStmt, err := txp.Preparex(`
INSERT INTO file_versions (version)
VALUES (?)
ON CONFLICT(version) DO UPDATE
SET version = excluded.version
RETURNING idx
`)
if err != nil {
return wrap(err, "prepare insert version")
}
//nolint:sqlclosecheck
insertFileStmt, err := txp.Preparex(`
INSERT OR REPLACE INTO files (device_idx, remote_sequence, name, type, modified, size, version, deleted, local_flags, blocklist_hash)
INSERT OR REPLACE INTO files (device_idx, remote_sequence, type, modified, size, deleted, local_flags, blocklist_hash, name_idx, version_idx)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
RETURNING sequence
`)
@@ -102,8 +126,19 @@ func (s *folderDB) Update(device protocol.DeviceID, fs []protocol.FileInfo) erro
prevRemoteSeq = f.Sequence
remoteSeq = &f.Sequence
}
var nameIdx int64
if err := insertNameStmt.Get(&nameIdx, f.Name); err != nil {
return wrap(err, "insert name")
}
var versionIdx int64
if err := insertVersionStmt.Get(&versionIdx, f.Version.String()); err != nil {
return wrap(err, "insert version")
}
var localSeq int64
if err := insertFileStmt.Get(&localSeq, deviceIdx, remoteSeq, f.Name, f.Type, f.ModTime().UnixNano(), f.Size, f.Version.String(), f.IsDeleted(), f.LocalFlags, blockshash); err != nil {
if err := insertFileStmt.Get(&localSeq, deviceIdx, remoteSeq, f.Type, f.ModTime().UnixNano(), f.Size, f.IsDeleted(), f.LocalFlags, blockshash, nameIdx, versionIdx); err != nil {
return wrap(err, "insert file")
}
@@ -246,7 +281,9 @@ func (s *folderDB) DropFilesNamed(device protocol.DeviceID, names []string) erro
query, args, err := sqlx.In(`
DELETE FROM files
WHERE device_idx = ? AND name IN (?)
WHERE device_idx = ? AND name_idx IN (
SELECT idx FROM file_names WHERE name IN (?)
)
`, deviceIdx, names)
if err != nil {
return wrap(err)
@@ -299,12 +336,13 @@ func (s *folderDB) recalcGlobalForFolder(txp *txPreparedStmts) error {
// recalculate.
//nolint:sqlclosecheck
namesStmt, err := txp.Preparex(`
SELECT f.name FROM files f
SELECT n.name FROM files f
INNER JOIN file_names n ON n.idx = f.name_idx
WHERE NOT EXISTS (
SELECT 1 FROM files g
WHERE g.name = f.name AND g.local_flags & ? != 0
WHERE g.name_idx = f.name_idx AND g.local_flags & ? != 0
)
GROUP BY name
GROUP BY n.name
`)
if err != nil {
return wrap(err)
@@ -329,11 +367,13 @@ func (s *folderDB) recalcGlobalForFolder(txp *txPreparedStmts) error {
func (s *folderDB) recalcGlobalForFile(txp *txPreparedStmts, file string) error {
//nolint:sqlclosecheck
selStmt, err := txp.Preparex(`
SELECT name, device_idx, sequence, modified, version, deleted, local_flags FROM files
WHERE name = ?
SELECT n.name, f.device_idx, f.sequence, f.modified, v.version, f.deleted, f.local_flags FROM files f
INNER JOIN file_versions v ON v.idx = f.version_idx
INNER JOIN file_names n ON n.idx = f.name_idx
WHERE n.name = ?
`)
if err != nil {
return wrap(err)
return wrap(err, "prepare select")
}
es, err := itererr.Collect(iterStructs[fileRow](selStmt.Queryx(file)))
if err != nil {
@@ -389,10 +429,10 @@ func (s *folderDB) recalcGlobalForFile(txp *txPreparedStmts, file string) error
//nolint:sqlclosecheck
upStmt, err = txp.Preparex(`
UPDATE files SET local_flags = local_flags & ?
WHERE name = ? AND sequence != ? AND local_flags & ? != 0
WHERE name_idx = (SELECT idx FROM file_names WHERE name = ?) AND sequence != ? AND local_flags & ? != 0
`)
if err != nil {
return wrap(err)
return wrap(err, "prepare update")
}
if _, err := upStmt.Exec(^(protocol.FlagLocalNeeded | protocol.FlagLocalGlobal), global.Name, global.Sequence, protocol.FlagLocalNeeded|protocol.FlagLocalGlobal); err != nil {
return wrap(err)

View File

@@ -0,0 +1,53 @@
-- Copyright (C) 2025 The Syncthing Authors.
--
-- This Source Code Form is subject to the terms of the Mozilla Public
-- License, v. 2.0. If a copy of the MPL was not distributed with this file,
-- You can obtain one at https://mozilla.org/MPL/2.0/.
-- Grab all unique names into the names table
INSERT INTO file_names (idx, name) SELECT DISTINCT null, name FROM files
;
-- Grab all unique versions into the versions table
INSERT INTO file_versions (idx, version) SELECT DISTINCT null, version FROM files
;
-- Create the new files table
DROP TABLE IF EXISTS files_v5
;
CREATE TABLE files_v5 (
device_idx INTEGER NOT NULL,
sequence INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
remote_sequence INTEGER,
name_idx INTEGER NOT NULL, -- changed
type INTEGER NOT NULL,
modified INTEGER NOT NULL,
size INTEGER NOT NULL,
version_idx INTEGER NOT NULL, -- changed
deleted INTEGER NOT NULL,
local_flags INTEGER NOT NULL,
blocklist_hash BLOB,
FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE,
FOREIGN KEY(name_idx) REFERENCES file_names(idx), -- added
FOREIGN KEY(version_idx) REFERENCES file_versions(idx) -- added
) STRICT
;
-- Populate the new files table and move it in place
INSERT INTO files_v5
SELECT f.device_idx, f.sequence, f.remote_sequence, n.idx as name_idx, f.type, f.modified, f.size, v.idx as version_idx, f.deleted, f.local_flags, f.blocklist_hash
FROM files f
INNER JOIN file_names n ON n.name = f.name
INNER JOIN file_versions v ON v.version = f.version
;
DROP TABLE files
;
ALTER TABLE files_v5 RENAME TO files
;

View File

@@ -25,15 +25,27 @@ CREATE TABLE IF NOT EXISTS files (
device_idx INTEGER NOT NULL, -- actual device ID or LocalDeviceID
sequence INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, -- our local database sequence, for each and every entry
remote_sequence INTEGER, -- remote device's sequence number, null for local or synthetic entries
name TEXT NOT NULL COLLATE BINARY,
name_idx INTEGER NOT NULL,
type INTEGER NOT NULL, -- protocol.FileInfoType
modified INTEGER NOT NULL, -- Unix nanos
size INTEGER NOT NULL,
version TEXT NOT NULL COLLATE BINARY,
version_idx INTEGER NOT NULL,
deleted INTEGER NOT NULL, -- boolean
local_flags INTEGER NOT NULL,
blocklist_hash BLOB, -- null when there are no blocks
FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE
FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE,
FOREIGN KEY(name_idx) REFERENCES file_names(idx),
FOREIGN KEY(version_idx) REFERENCES file_versions(idx)
) STRICT
;
CREATE TABLE IF NOT EXISTS file_names (
idx INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL UNIQUE COLLATE BINARY
) STRICT
;
CREATE TABLE IF NOT EXISTS file_versions (
idx INTEGER NOT NULL PRIMARY KEY,
version TEXT NOT NULL UNIQUE COLLATE BINARY
) STRICT
;
-- FileInfos store the actual protobuf object. We do this separately to keep
@@ -49,11 +61,17 @@ CREATE UNIQUE INDEX IF NOT EXISTS files_remote_sequence ON files (device_idx, re
WHERE remote_sequence IS NOT NULL
;
-- There can be only one file per folder, device, and name
CREATE UNIQUE INDEX IF NOT EXISTS files_device_name ON files (device_idx, name)
;
-- We want to be able to look up & iterate files based on just folder and name
CREATE INDEX IF NOT EXISTS files_name_only ON files (name)
CREATE UNIQUE INDEX IF NOT EXISTS files_device_name ON files (device_idx, name_idx)
;
-- We want to be able to look up & iterate files based on blocks hash
CREATE INDEX IF NOT EXISTS files_blocklist_hash_only ON files (blocklist_hash, device_idx) WHERE blocklist_hash IS NOT NULL
;
-- We need to look by name_idx or version_idx for garbage collection.
-- This will fail pre-migration for v4 schemas, which is fine.
-- syncthing:ignore-failure
CREATE INDEX IF NOT EXISTS files_name_idx_only ON files (name_idx)
;
-- This will fail pre-migration for v4 schemas, which is fine.
-- syncthing:ignore-failure
CREATE INDEX IF NOT EXISTS files_version_idx_only ON files (version_idx)
;