testsuite: add perftest.py to compare two rsync builds' transfer speed

A standalone dev tool (run directly, not via runtests.py) for catching
performance regressions between rsync releases.  Given two rsync binaries it
builds one deterministic test tree -- heavy-tailed file sizes, a directory
spine, symlinks, hard links and a spread of permission modes, modelled on the
gentestdata generator -- then runs the two binaries ALTERNATELY for N loops,
timing each transfer, and reports the mean and standard deviation per binary.

Each loop times a full copy into an emptied destination and an incremental
no-op against an already-synced one (rsync's scan/file-list/stat overhead,
where many regressions hide); --mode selects.  The first run of each binary is
dropped to reduce page-cache impact, the run order alternates to cancel drift,
and a B-vs-A slowdown is flagged only when it exceeds the run-to-run noise.
This commit is contained in:
Andrew Tridgell
2026-06-10 11:18:54 +10:00
parent b88080bd49
commit 8b042907e5

506
testsuite/perftest.py Executable file
View File

@@ -0,0 +1,506 @@
#!/usr/bin/env python3
"""Compare the transfer performance of two rsync binaries (local <-> local).
This is a standalone dev tool (run it directly, not via runtests.py) for
spotting performance regressions between rsync releases. Given two rsync
binaries it builds one test tree, then runs the two binaries ALTERNATELY for a
number of loops, timing each transfer, and reports the mean and standard
deviation of the transfer time for each binary.
Two transfers are timed each loop (see --mode):
* full -- a fresh copy into an emptied destination (end-to-end read+write).
* noop -- a re-run against an already-synced destination (rsync's own
scan / file-list / stat overhead, where many regressions hide).
The first measured run of each binary is dropped (see --warmup) because it
cold-loads the source into the page cache and is an outlier.
The test tree's shape (heavy-tailed file sizes, a directory spine, symlinks,
hard links and a spread of permission modes) follows the gentestdata.py
generator; it is deterministic for a given --seed.
Examples:
# Quick smoke run, same binary twice (means should match, no regression).
./perftest.py --files 200 --total-size 5M -n 3 ./rsync ./rsync
# Compare a released binary against a fresh build over 8 loops.
./perftest.py -n 8 ../old_versions/rsync_3.4.0 ./rsync
# Heavier tree, no-op (scan-overhead) timing only.
./perftest.py --files 50000 --total-size 2G --mode noop OLD/rsync NEW/rsync
"""
import argparse
import dataclasses
import math
import os
import random
import shlex
import shutil
import statistics
import struct
import subprocess
import sys
import tempfile
import time
# ---------------------------------------------------------------------------
# Test-tree generation (ported from gentestdata.py, kept self-contained).
# ---------------------------------------------------------------------------
# Marker file at the tree root; safe_rmtree only deletes a tree carrying it.
MARKER = ".perftest"
# Permission modes drawn at random for regular files (execs + read-only).
FILE_MODES = [0o644, 0o644, 0o600, 0o640, 0o664, 0o444, 0o755, 0o750, 0o700]
# Directory modes; owner always keeps r-x so the tree stays traversable.
DIR_MODES = [0o755, 0o755, 0o775, 0o750, 0o700, 0o555]
SIZE_SIGMA = 1.8 # sigma of the underlying lognormal size distribution
BASE_BUF_SIZE = 1 << 20 # 1 MiB shared random buffer for file content
def parse_size(s):
"""Parse a human size like 500M, 1.5GiB, 200KB, or a bare byte count."""
s = s.strip()
units = {
"": 1, "B": 1,
"K": 1024, "KIB": 1024, "KB": 1000,
"M": 1024**2, "MIB": 1024**2, "MB": 1000**2,
"G": 1024**3, "GIB": 1024**3, "GB": 1000**3,
"T": 1024**4, "TIB": 1024**4, "TB": 1000**4,
}
num, suffix = s, ""
while num and not (num[-1].isdigit() or num[-1] == "."):
suffix = num[-1] + suffix
num = num[:-1]
suffix = suffix.upper()
if suffix not in units:
raise argparse.ArgumentTypeError(f"unknown size suffix in {s!r}")
try:
value = float(num)
except ValueError:
raise argparse.ArgumentTypeError(f"invalid size {s!r}")
return int(value * units[suffix])
def human(n):
"""Format a byte count for the summary output."""
for unit in ("B", "KiB", "MiB", "GiB", "TiB"):
if abs(n) < 1024 or unit == "TiB":
return f"{n:.1f}{unit}" if unit != "B" else f"{n}B"
n /= 1024
def gen_sizes(n, total, rng):
"""Return n heavy-tailed file sizes (bytes) summing to exactly `total`."""
if n == 0:
return []
weights = [math.exp(rng.gauss(0.0, SIZE_SIGMA)) for _ in range(n)]
wsum = sum(weights)
sizes = [int(w / wsum * total) for w in weights]
drift = total - sum(sizes)
if drift and sizes:
i = max(range(n), key=lambda k: sizes[k])
sizes[i] += drift
return sizes
def build_dirs(root, num_dirs, max_depth, rng):
"""Create `num_dirs` dirs under root, up to `max_depth` deep; return them."""
os.makedirs(root)
dirs = [root]
depth_of = {root: 0}
candidates = [root] if max_depth > 0 else []
counter = 0
cur = root
for d in range(1, max_depth + 1):
cur = os.path.join(cur, f"d{d}")
os.mkdir(cur)
dirs.append(cur)
depth_of[cur] = d
if d < max_depth:
candidates.append(cur)
while len(dirs) < num_dirs and candidates:
parent = rng.choice(candidates)
counter += 1
child = os.path.join(parent, f"dir{counter}")
os.mkdir(child)
d = depth_of[parent] + 1
dirs.append(child)
depth_of[child] = d
if d < max_depth:
candidates.append(child)
return dirs
def write_file(path, size, index, base):
"""Write a regular file of exactly `size` bytes (index/size in first 16)."""
with open(path, "wb") as f:
remaining = size
if remaining >= 16:
f.write(struct.pack("<QQ", index, size))
remaining -= 16
blen = len(base)
while remaining > 0:
chunk = base if remaining >= blen else base[:remaining]
f.write(chunk)
remaining -= len(chunk)
def rel_symlink(target, link_path):
"""Create a relative symlink at link_path pointing at target."""
rel = os.path.relpath(target, os.path.dirname(link_path))
os.symlink(rel, link_path)
def safe_rmtree(path):
"""Remove a tree, even one containing read-only directories."""
for dirpath, _dirnames, _filenames in os.walk(path):
try:
os.chmod(dirpath, 0o700)
except OSError:
pass
shutil.rmtree(path)
def generate_tree(root, args):
"""Build the deterministic source tree at `root`; return a summary string."""
n = args.files
num_dirs = args.dirs if args.dirs is not None else max(args.depth, n // 20, 1)
n_sym = args.symlinks if args.symlinks is not None else (max(1, n // 20) if n else 0)
n_hard = args.hardlinks if args.hardlinks is not None else (max(1, n // 20) if n else 0)
rng = random.Random(args.seed)
base = rng.randbytes(BASE_BUF_SIZE)
dirs = build_dirs(root, num_dirs, args.depth, rng)
with open(os.path.join(root, MARKER), "w") as f:
f.write(f"generated by perftest.py seed={args.seed} files={n} "
f"total={args.total_size}\n")
sizes = gen_sizes(n, args.total_size, rng)
files = []
for i in range(n):
path = os.path.join(rng.choice(dirs), f"file{i}.dat")
write_file(path, sizes[i], i, base)
files.append(path)
hard_made = 0
if files:
for i in range(n_hard):
tgt = rng.choice(files)
link = os.path.join(rng.choice(dirs), f"hlink{i}_{os.path.basename(tgt)}")
try:
os.link(tgt, link)
hard_made += 1
except OSError:
pass
sym_made = 0
for i in range(n_sym):
link = os.path.join(rng.choice(dirs), f"sym{i}")
roll = rng.random()
try:
if roll < 0.15 or not files:
os.symlink(f"../broken-target-{i}", link)
elif roll < 0.30:
rel_symlink(rng.choice(dirs), link)
else:
rel_symlink(rng.choice(files), link)
sym_made += 1
except OSError:
pass
for path in files:
os.chmod(path, rng.choice(FILE_MODES))
for path in sorted((d for d in dirs if d != root),
key=lambda p: p.count(os.sep), reverse=True):
os.chmod(path, rng.choice(DIR_MODES))
return (f"files={n} dirs={len(dirs)} symlinks={sym_made} hardlinks={hard_made} "
f"total={human(sum(sizes))} biggest={human(max(sizes) if sizes else 0)} "
f"seed={args.seed}")
# ---------------------------------------------------------------------------
# Benchmark.
# ---------------------------------------------------------------------------
@dataclasses.dataclass
class Binary:
label: str # "A" / "B"
path: str # absolute path to the rsync binary
version: str # first line of `rsync --version`
def rsync_version(path):
"""Return the first line of `<rsync> --version`, or a placeholder."""
try:
r = subprocess.run([path, "--version"], capture_output=True, text=True, timeout=15)
line = (r.stdout or r.stderr or "").splitlines()
return line[0].strip() if line else "(no --version output)"
except (OSError, subprocess.TimeoutExpired) as e:
return f"(version unavailable: {e})"
def drop_caches():
"""Best-effort: flush dirty pages and drop the page/dentry/inode caches.
Needs root to write /proc/sys/vm/drop_caches; returns True on success.
"""
subprocess.run(["sync"], check=False)
try:
with open("/proc/sys/vm/drop_caches", "w") as f:
f.write("3\n")
return True
except OSError:
return False
def time_transfer(binary, rsync_args, src, dest, timeout):
"""Run one `rsync <args> src/ dest/` and return its wall-clock seconds.
Raises RuntimeError if rsync exits non-zero (a failed transfer can't be
timed meaningfully).
"""
argv = [binary.path, *rsync_args, src + "/", dest + "/"]
t0 = time.monotonic()
r = subprocess.run(argv, capture_output=True, text=True, timeout=timeout)
elapsed = time.monotonic() - t0
if r.returncode != 0:
raise RuntimeError(
f"{binary.label} ({binary.path}) rsync exited {r.returncode}:\n"
f" cmd: {shlex.join(argv)}\n"
f" {(r.stderr or r.stdout).strip()}")
return elapsed
def run_benchmark(binaries, args, src, dest_full, dest_noop):
"""Run the alternating loops; return {label: {mode: [all samples]}}."""
do_full = args.mode in ("both", "full")
do_noop = args.mode in ("both", "noop")
# Pre-populate the shared no-op destination so every timed no-op run finds
# nothing to do. Use binary A; its content is identical for B.
if do_noop:
time_transfer(binaries[0], args.rsync_args, src, dest_noop, args.timeout)
samples = {b.label: {m: [] for m in ("full", "noop")} for b in binaries}
total_loops = args.warmup + args.runs
for loop in range(total_loops):
tag = "warmup" if loop < args.warmup else f"run {loop - args.warmup + 1}/{args.runs}"
# Alternate which binary goes first to cancel first-mover/thermal drift.
order = binaries if loop % 2 == 0 else list(reversed(binaries))
for b in order:
if do_full:
safe_rmtree(dest_full) if os.path.exists(dest_full) else None
os.mkdir(dest_full)
if args.drop_caches:
drop_caches()
t = time_transfer(b, args.rsync_args, src, dest_full, args.timeout)
samples[b.label]["full"].append(t)
_progress(b, "full", tag, t)
if do_noop:
if args.drop_caches:
drop_caches()
t = time_transfer(b, args.rsync_args, src, dest_noop, args.timeout)
samples[b.label]["noop"].append(t)
_progress(b, "noop", tag, t)
return samples
def _progress(binary, mode, tag, t):
excl = " (warmup, excluded)" if tag == "warmup" else ""
print(f" [{tag:>10}] {binary.label} {mode:<4} {t:8.3f}s{excl}")
# ---------------------------------------------------------------------------
# Reporting.
# ---------------------------------------------------------------------------
def _stats(times):
"""(n, mean, stddev, min, median) over the timing samples."""
n = len(times)
if n == 0:
return (0, 0.0, 0.0, 0.0, 0.0)
return (n, statistics.mean(times),
statistics.stdev(times) if n > 1 else 0.0,
min(times), statistics.median(times))
def report(binaries, samples, args):
"""Print the per-binary tables and the A-vs-B comparison; return exit code."""
print("\n" + "=" * 72)
for b in binaries:
print(f"{b.label}: {b.path}\n {b.version}")
print(f"rsync args: {' '.join(args.rsync_args)} "
f"(note: a full copy is not fsync'd unless you add --fsync)")
print("=" * 72)
modes = [m for m in ("full", "noop") if any(samples[b.label][m] for b in binaries)]
hdr = f"{'binary':<7}{'mode':<6}{'runs':>5}{'mean':>11}{'stddev':>11}{'min':>11}{'median':>11}"
for mode in modes:
print(f"\n{hdr}\n{'-' * len(hdr)}")
st = {}
for b in binaries:
# Drop the leading warm-up samples before computing statistics.
kept = samples[b.label][mode][args.warmup:]
st[b.label] = _stats(kept)
n, mean, sd, mn, md = st[b.label]
print(f"{b.label:<7}{mode:<6}{n:>5}{mean:>10.3f}s{sd:>10.3f}s"
f"{mn:>10.3f}s{md:>10.3f}s")
a, c = binaries[0].label, binaries[1].label
(na, ma, sda, *_), (nc, mc, sdc, *_) = st[a], st[c]
if na and nc and ma > 0:
delta = mc - ma
pct = delta / ma * 100.0
noise = max(sda, sdc)
# Flag only when B is slower beyond the run-to-run noise and a small
# relative threshold, so jitter doesn't cry "regression".
if delta > noise and pct > args.threshold:
verdict = f"REGRESSION (slower): {c} is {pct:+.1f}% vs {a}"
elif delta < -noise and -pct > args.threshold:
verdict = f"faster: {c} is {pct:+.1f}% vs {a}"
else:
verdict = f"no significant change: {pct:+.1f}% (within noise)"
print(f" {mode}: {a} {ma:.3f}s vs {c} {mc:.3f}s -> {verdict}")
if args.csv:
_write_csv(args.csv, binaries, samples)
print(f"\nraw per-run timings written to {args.csv}")
return 0
def _write_csv(path, binaries, samples):
with open(path, "w") as f:
f.write("binary,path,mode,run,warmup,seconds\n")
for b in binaries:
for mode in ("full", "noop"):
for i, t in enumerate(samples[b.label][mode]):
f.write(f"{b.label},{b.path},{mode},{i},{int(i == 0)},{t:.6f}\n")
# ---------------------------------------------------------------------------
# Main.
# ---------------------------------------------------------------------------
def main():
ap = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("rsync_a", help="path to the first rsync binary (labelled A)")
ap.add_argument("rsync_b", help="path to the second rsync binary (labelled B)")
ap.add_argument("-n", "--runs", type=int, default=10,
help="measured loops per binary (default: 10)")
ap.add_argument("--warmup", type=int, default=1,
help="leading runs per binary dropped from the stats to "
"reduce cache impact (default: 1)")
ap.add_argument("--mode", choices=("both", "full", "noop"), default="both",
help="full=clean-dest copy, noop=re-sync scan overhead, "
"both (default)")
ap.add_argument("--rsync-args", default="-aH",
help="rsync flags for the timed transfer (default: -aH)")
ap.add_argument("--threshold", type=float, default=2.0,
help="percent slowdown above run-to-run noise before a "
"regression is flagged (default: 2.0)")
# Tree-generation knobs (mirror gentestdata.py).
ap.add_argument("--src", default=None,
help="benchmark this existing tree instead of generating one")
ap.add_argument("-f", "--files", type=int, default=10000,
help="number of regular files to generate (default: 10000)")
ap.add_argument("-s", "--total-size", type=parse_size, default="500M",
help="total size of all regular files (default: 500M)")
ap.add_argument("-d", "--depth", type=int, default=10,
help="maximum directory tree depth (default: 10)")
ap.add_argument("--dirs", type=int, default=None,
help="number of directories (default: max(depth, files/20))")
ap.add_argument("--symlinks", type=int, default=None,
help="number of symlinks (default: files/20)")
ap.add_argument("--hardlinks", type=int, default=None,
help="number of hard links (default: files/20)")
ap.add_argument("--seed", type=int, default=1,
help="PRNG seed for a reproducible tree (default: 1)")
ap.add_argument("--workdir", default=None,
help="scratch root for src/dest dirs (default: a tempdir)")
ap.add_argument("--drop-caches", action="store_true",
help="sync + drop page/dentry/inode caches before each timed "
"run (needs root; cold-cache measurement)")
ap.add_argument("--timeout", type=float, default=3600.0,
help="seconds before a single rsync run is abandoned "
"(default: 3600)")
ap.add_argument("--keep", action="store_true",
help="keep the scratch tree on exit (default: remove it)")
ap.add_argument("--csv", default=None,
help="write raw per-run timings to this CSV file")
args = ap.parse_args()
if args.runs < 2:
ap.error("--runs must be >= 2 (need >=2 samples for a stddev)")
args.rsync_args = shlex.split(args.rsync_args)
binaries = []
for label, p in (("A", args.rsync_a), ("B", args.rsync_b)):
path = os.path.abspath(p)
if not (os.path.isfile(path) and os.access(path, os.X_OK)):
ap.error(f"rsync {label} is not an executable file: {p}")
binaries.append(Binary(label, path, rsync_version(path)))
workdir = tempfile.mkdtemp(prefix="rsync-perftest-",
dir=args.workdir) if not args.keep or not args.workdir \
else os.path.join(args.workdir, "rsync-perftest")
os.makedirs(workdir, exist_ok=True)
dest_full = os.path.join(workdir, "dest_full")
dest_noop = os.path.join(workdir, "dest_noop")
os.makedirs(dest_noop, exist_ok=True)
generated = None
if args.src:
src = os.path.abspath(args.src)
if not os.path.isdir(src):
ap.error(f"--src is not a directory: {args.src}")
print(f"using existing source tree {src}")
else:
src = os.path.join(workdir, "src")
print(f"generating source tree in {src} ...")
t0 = time.monotonic()
summary = generate_tree(src, args)
generated = src
print(f" {summary} ({time.monotonic() - t0:.1f}s)")
print(f"\nbenchmarking: warmup={args.warmup} runs={args.runs} mode={args.mode} "
f"drop_caches={args.drop_caches}\n")
rc = 1
try:
samples = run_benchmark(binaries, args, src, dest_full, dest_noop)
rc = report(binaries, samples, args)
except RuntimeError as e:
print(f"\nbenchmark aborted: {e}", file=sys.stderr)
rc = 2
except KeyboardInterrupt:
print("\ninterrupted", file=sys.stderr)
rc = 130
finally:
if args.keep:
print(f"\nkept scratch tree: {workdir}")
else:
for d in (dest_full, dest_noop, generated):
if d and os.path.exists(d):
safe_rmtree(d)
# Remove the workdir itself if it is now empty (i.e. we made it).
try:
os.rmdir(workdir)
except OSError:
pass
sys.exit(rc)
if __name__ == "__main__":
main()
# vim: sw=4 et ft=python