Files
syncthing/internal/db/sqlite/folderdb_counts.go
Jakob Borg 9ee208b441 chore(sqlite): use normalised tables for file names and versions (#10383)
This changes the files table to use normalisation for the names and
versions. The idea is that these are often common between all remote
devices, and repeating an integer is more efficient than repeating a
long string. A new benchmark bears this out; for a database with 100k
files shared between 31 devices, with some worst case assumption on
version vector size, the database is reduced in size by 50% and the test
finishes quicker:

    Current:
        db_bench_test.go:322: Total size: 6263.70 MiB
    --- PASS: TestBenchmarkSizeManyFilesRemotes (1084.89s)

    New:
        db_bench_test.go:326: Total size: 3049.95 MiB
    --- PASS: TestBenchmarkSizeManyFilesRemotes (776.97s)

The other benchmarks end up about the same within the margin of
variability, with one possible exception being that RemoteNeed seems to
be a little slower on average:

                                          old files/s   new files/s
    Update/n=RemoteNeed/size=1000-8            5.051k        4.654k
    Update/n=RemoteNeed/size=2000-8            5.201k        4.384k
    Update/n=RemoteNeed/size=4000-8            4.943k        4.242k
    Update/n=RemoteNeed/size=8000-8            5.099k        3.527k
    Update/n=RemoteNeed/size=16000-8           3.686k        3.847k
    Update/n=RemoteNeed/size=30000-8           4.456k        3.482k

I'm not sure why, possibly that query can be optimised anyhow.

Signed-off-by: Jakob Borg <jakob@kastelo.net>
2025-09-12 09:27:41 +00:00

129 lines
3.9 KiB
Go

// Copyright (C) 2025 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package sqlite
import (
"github.com/syncthing/syncthing/internal/db"
"github.com/syncthing/syncthing/lib/protocol"
)
type countsRow struct {
Type protocol.FileInfoType
Count int
Size int64
Deleted bool
LocalFlags protocol.FlagLocal `db:"local_flags"`
}
func (s *folderDB) CountLocal(device protocol.DeviceID) (db.Counts, error) {
var res []countsRow
if err := s.stmt(`
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
INNER JOIN devices d ON d.idx = s.device_idx
WHERE d.device_id = ? AND s.local_flags & {{.FlagLocalIgnored}} = 0
`).Select(&res, device.String()); err != nil {
return db.Counts{}, wrap(err)
}
return summarizeCounts(res), nil
}
func (s *folderDB) CountNeed(device protocol.DeviceID) (db.Counts, error) {
if device == protocol.LocalDeviceID {
return s.needSizeLocal()
}
return s.needSizeRemote(device)
}
func (s *folderDB) CountGlobal() (db.Counts, error) {
var res []countsRow
err := s.stmt(`
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
WHERE s.local_flags & {{.FlagLocalGlobal}} != 0 AND s.local_flags & {{.LocalInvalidFlags}} = 0
`).Select(&res)
if err != nil {
return db.Counts{}, wrap(err)
}
return summarizeCounts(res), nil
}
func (s *folderDB) CountReceiveOnlyChanged() (db.Counts, error) {
var res []countsRow
err := s.stmt(`
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
WHERE local_flags & {{.FlagLocalReceiveOnly}} != 0
`).Select(&res)
if err != nil {
return db.Counts{}, wrap(err)
}
return summarizeCounts(res), nil
}
func (s *folderDB) needSizeLocal() (db.Counts, error) {
// The need size for the local device is the sum of entries with the
// need bit set.
var res []countsRow
err := s.stmt(`
SELECT s.type, s.count, s.size, s.local_flags, s.deleted FROM counts s
WHERE s.local_flags & {{.FlagLocalNeeded}} != 0
`).Select(&res)
if err != nil {
return db.Counts{}, wrap(err)
}
return summarizeCounts(res), nil
}
func (s *folderDB) needSizeRemote(device protocol.DeviceID) (db.Counts, error) {
var res []countsRow
// See neededGlobalFilesRemote for commentary as that is the same query without summing
if err := s.stmt(`
SELECT g.type, count(*) as count, sum(g.size) as size, g.local_flags, g.deleted FROM files g
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND NOT g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND NOT EXISTS (
SELECT 1 FROM FILES f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name_idx = g.name_idx AND f.version_idx = g.version_idx AND d.device_id = ?
)
GROUP BY g.type, g.local_flags, g.deleted
UNION ALL
SELECT g.type, count(*) as count, sum(g.size) as size, g.local_flags, g.deleted FROM files g
WHERE g.local_flags & {{.FlagLocalGlobal}} != 0 AND g.deleted AND g.local_flags & {{.LocalInvalidFlags}} = 0 AND EXISTS (
SELECT 1 FROM FILES f
INNER JOIN devices d ON d.idx = f.device_idx
WHERE f.name_idx = g.name_idx AND d.device_id = ? AND NOT f.deleted AND f.local_flags & {{.LocalInvalidFlags}} = 0
)
GROUP BY g.type, g.local_flags, g.deleted
`).Select(&res, device.String(),
device.String()); err != nil {
return db.Counts{}, wrap(err)
}
return summarizeCounts(res), nil
}
func summarizeCounts(res []countsRow) db.Counts {
c := db.Counts{
DeviceID: protocol.LocalDeviceID,
}
for _, r := range res {
switch {
case r.Deleted:
c.Deleted += r.Count
case r.Type == protocol.FileInfoTypeFile:
c.Files += r.Count
c.Bytes += r.Size
case r.Type == protocol.FileInfoTypeDirectory:
c.Directories += r.Count
c.Bytes += r.Size
case r.Type == protocol.FileInfoTypeSymlink:
c.Symlinks += r.Count
c.Bytes += r.Size
}
}
return c
}