mirror of
https://github.com/syncthing/syncthing.git
synced 2025-12-31 09:59:14 -05:00
This changes the files table to use normalisation for the names and
versions. The idea is that these are often common between all remote
devices, and repeating an integer is more efficient than repeating a
long string. A new benchmark bears this out; for a database with 100k
files shared between 31 devices, with some worst case assumption on
version vector size, the database is reduced in size by 50% and the test
finishes quicker:
Current:
db_bench_test.go:322: Total size: 6263.70 MiB
--- PASS: TestBenchmarkSizeManyFilesRemotes (1084.89s)
New:
db_bench_test.go:326: Total size: 3049.95 MiB
--- PASS: TestBenchmarkSizeManyFilesRemotes (776.97s)
The other benchmarks end up about the same within the margin of
variability, with one possible exception being that RemoteNeed seems to
be a little slower on average:
old files/s new files/s
Update/n=RemoteNeed/size=1000-8 5.051k 4.654k
Update/n=RemoteNeed/size=2000-8 5.201k 4.384k
Update/n=RemoteNeed/size=4000-8 4.943k 4.242k
Update/n=RemoteNeed/size=8000-8 5.099k 3.527k
Update/n=RemoteNeed/size=16000-8 3.686k 3.847k
Update/n=RemoteNeed/size=30000-8 4.456k 3.482k
I'm not sure why, possibly that query can be optimised anyhow.
Signed-off-by: Jakob Borg <jakob@kastelo.net>
129 lines
3.9 KiB
Go
129 lines
3.9 KiB
Go
// Copyright (C) 2025 The Syncthing Authors.
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
// You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
package sqlite
|
|
|
|
import (
|
|
"github.com/syncthing/syncthing/internal/db"
|
|
"github.com/syncthing/syncthing/lib/protocol"
|
|
)
|
|
|
|
type countsRow struct {
|
|
Type protocol.FileInfoType
|
|
Count int
|
|
Size int64
|
|
Deleted bool
|
|
LocalFlags protocol.FlagLocal `db:"local_flags"`
|
|
}
|
|
|
|
func (s *folderDB) CountLocal(device protocol.DeviceID) (db.Counts, error) {
|
|
var res []countsRow
|
|
if err := s.stmt(`
|
|
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
|
|
INNER JOIN devices d ON d.idx = s.device_idx
|
|
WHERE d.device_id = ? AND s.local_flags & {{.FlagLocalIgnored}} = 0
|
|
`).Select(&res, device.String()); err != nil {
|
|
return db.Counts{}, wrap(err)
|
|
}
|
|
return summarizeCounts(res), nil
|
|
}
|
|
|
|
func (s *folderDB) CountNeed(device protocol.DeviceID) (db.Counts, error) {
|
|
if device == protocol.LocalDeviceID {
|
|
return s.needSizeLocal()
|
|
}
|
|
return s.needSizeRemote(device)
|
|
}
|
|
|
|
func (s *folderDB) CountGlobal() (db.Counts, error) {
|
|
var res []countsRow
|
|
err := s.stmt(`
|
|
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
|
|
WHERE s.local_flags & {{.FlagLocalGlobal}} != 0 AND s.local_flags & {{.LocalInvalidFlags}} = 0
|
|
`).Select(&res)
|
|
if err != nil {
|
|
return db.Counts{}, wrap(err)
|
|
}
|
|
return summarizeCounts(res), nil
|
|
}
|
|
|
|
func (s *folderDB) CountReceiveOnlyChanged() (db.Counts, error) {
|
|
var res []countsRow
|
|
err := s.stmt(`
|
|
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
|
|
WHERE local_flags & {{.FlagLocalReceiveOnly}} != 0
|
|
`).Select(&res)
|
|
if err != nil {
|
|
return db.Counts{}, wrap(err)
|
|
}
|
|
return summarizeCounts(res), nil
|
|
}
|
|
|
|
func (s *folderDB) needSizeLocal() (db.Counts, error) {
|
|
// The need size for the local device is the sum of entries with the
|
|
// need bit set.
|
|
var res []countsRow
|
|
err := s.stmt(`
|
|
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
|
|
WHERE s.local_flags & {{.FlagLocalNeeded}} != 0
|
|
`).Select(&res)
|
|
if err != nil {
|
|
return db.Counts{}, wrap(err)
|
|
}
|
|
return summarizeCounts(res), nil
|
|
}
|
|
|
|
func (s *folderDB) needSizeRemote(device protocol.DeviceID) (db.Counts, error) {
|
|
var res []countsRow
|
|
// See neededGlobalFilesRemote for commentary as that is the same query without summing
|
|
if err := s.stmt(`
|
|
SELECT g.type, count(*) as count, sum(g.size) as size, g.local_flags, g.deleted FROM files g
|
|
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND NOT g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND NOT EXISTS (
|
|
SELECT 1 FROM FILES f
|
|
INNER JOIN devices d ON d.idx = f.device_idx
|
|
WHERE f.name_idx = g.name_idx AND f.version_idx = g.version_idx AND d.device_id = ?
|
|
)
|
|
GROUP BY g.type, g.local_flags, g.deleted
|
|
|
|
UNION ALL
|
|
|
|
SELECT g.type, count(*) as count, sum(g.size) as size, g.local_flags, g.deleted FROM files g
|
|
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND EXISTS (
|
|
SELECT 1 FROM FILES f
|
|
INNER JOIN devices d ON d.idx = f.device_idx
|
|
WHERE f.name_idx = g.name_idx AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0
|
|
)
|
|
GROUP BY g.type, g.local_flags, g.deleted
|
|
`).Select(&res, device.String(),
|
|
device.String()); err != nil {
|
|
return db.Counts{}, wrap(err)
|
|
}
|
|
|
|
return summarizeCounts(res), nil
|
|
}
|
|
|
|
func summarizeCounts(res []countsRow) db.Counts {
|
|
c := db.Counts{
|
|
DeviceID: protocol.LocalDeviceID,
|
|
}
|
|
for _, r := range res {
|
|
switch {
|
|
case r.Deleted:
|
|
c.Deleted += r.Count
|
|
case r.Type == protocol.FileInfoTypeFile:
|
|
c.Files += r.Count
|
|
c.Bytes += r.Size
|
|
case r.Type == protocol.FileInfoTypeDirectory:
|
|
c.Directories += r.Count
|
|
c.Bytes += r.Size
|
|
case r.Type == protocol.FileInfoTypeSymlink:
|
|
c.Symlinks += r.Count
|
|
c.Bytes += r.Size
|
|
}
|
|
}
|
|
return c
|
|
}
|