mirror of
https://github.com/syncthing/syncthing.git
synced 2026-01-01 02:19:06 -05:00
This changes the files table to use normalisation for the names and
versions. The idea is that these are often common between all remote
devices, and repeating an integer is more efficient than repeating a
long string. A new benchmark bears this out; for a database with 100k
files shared between 31 devices, with some worst case assumption on
version vector size, the database is reduced in size by 50% and the test
finishes quicker:
Current:
db_bench_test.go:322: Total size: 6263.70 MiB
--- PASS: TestBenchmarkSizeManyFilesRemotes (1084.89s)
New:
db_bench_test.go:326: Total size: 3049.95 MiB
--- PASS: TestBenchmarkSizeManyFilesRemotes (776.97s)
The other benchmarks end up about the same within the margin of
variability, with one possible exception being that RemoteNeed seems to
be a little slower on average:
old files/s new files/s
Update/n=RemoteNeed/size=1000-8 5.051k 4.654k
Update/n=RemoteNeed/size=2000-8 5.201k 4.384k
Update/n=RemoteNeed/size=4000-8 4.943k 4.242k
Update/n=RemoteNeed/size=8000-8 5.099k 3.527k
Update/n=RemoteNeed/size=16000-8 3.686k 3.847k
Update/n=RemoteNeed/size=30000-8 4.456k 3.482k
I'm not sure why, possibly that query can be optimised anyhow.
Signed-off-by: Jakob Borg <jakob@kastelo.net>
54 lines
1.6 KiB
SQL
54 lines
1.6 KiB
SQL
-- Copyright (C) 2025 The Syncthing Authors.
|
|
--
|
|
-- This Source Code Form is subject to the terms of the Mozilla Public
|
|
-- License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
-- You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
-- Grab all unique names into the names table
|
|
|
|
INSERT INTO file_names (idx, name) SELECT DISTINCT null, name FROM files
|
|
;
|
|
|
|
-- Grab all unique versions into the versions table
|
|
|
|
INSERT INTO file_versions (idx, version) SELECT DISTINCT null, version FROM files
|
|
;
|
|
|
|
-- Create the new files table
|
|
|
|
DROP TABLE IF EXISTS files_v5
|
|
;
|
|
|
|
CREATE TABLE files_v5 (
|
|
device_idx INTEGER NOT NULL,
|
|
sequence INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
|
remote_sequence INTEGER,
|
|
name_idx INTEGER NOT NULL, -- changed
|
|
type INTEGER NOT NULL,
|
|
modified INTEGER NOT NULL,
|
|
size INTEGER NOT NULL,
|
|
version_idx INTEGER NOT NULL, -- changed
|
|
deleted INTEGER NOT NULL,
|
|
local_flags INTEGER NOT NULL,
|
|
blocklist_hash BLOB,
|
|
FOREIGN KEY(device_idx) REFERENCES devices(idx) ON DELETE CASCADE,
|
|
FOREIGN KEY(name_idx) REFERENCES file_names(idx), -- added
|
|
FOREIGN KEY(version_idx) REFERENCES file_versions(idx) -- added
|
|
) STRICT
|
|
;
|
|
|
|
-- Populate the new files table and move it in place
|
|
|
|
INSERT INTO files_v5
|
|
SELECT f.device_idx, f.sequence, f.remote_sequence, n.idx as name_idx, f.type, f.modified, f.size, v.idx as version_idx, f.deleted, f.local_flags, f.blocklist_hash
|
|
FROM files f
|
|
INNER JOIN file_names n ON n.name = f.name
|
|
INNER JOIN file_versions v ON v.version = f.version
|
|
;
|
|
|
|
DROP TABLE files
|
|
;
|
|
|
|
ALTER TABLE files_v5 RENAME TO files
|
|
;
|