Files
rsync/testsuite/abdiff.py
Andrew Tridgell 4ef775fa97 abdiff: A/B differential regression hunter for rsync
testsuite/abdiff.py runs the same benign transfer with two rsync binaries
(A = build under test, B = a baseline) and compares the OUTCOME -- exit code,
stderr, --stats "Literal data", the destination tree (content + full metadata),
the --itemize list, and (with --cost) peak process-group RSS. For benign input
the two must be indistinguishable; any divergence is a regression candidate.
It is a developer tool, NOT a runtests.py test (does not end in _test.py).

Capabilities:
- Scenario sweeps over options / path shapes / file types / sizes / modes /
  selection / placement / wire / transports, plus domain-knowledge pairwise +
  combo sweeps and a stochastic fuzzer/role matrix.
- Transport lanes: local, ssh split (lsh.sh), stdio-pipe daemon, a REAL TCP
  daemon (bound port + greeting/handshake/auth challenge-response), and the
  restricted rrsync wrapper (support/rrsh.sh; each binary paired with its own
  version's rrsync via --rrsync-a/--rrsync-b, since rrsync ships in the script).
- Stability gate: each binary is run N times and escalated on a candidate diff;
  nondeterministic scenarios are quarantined FLAKY, never reported as regressions.
- Parallel (-j, default 20) with a per-run findings log; --loop runs until
  --timelimit (or Ctrl-C), feeding the pool a half-random / half-systematic
  stream of new combinations. As root an "all" run also folds in the root-only
  sweeps (priv, daemonchroot).
- General coverage levers: a cost oracle (--cost, peak RSS over the whole process
  group), transport lifted as an orthogonal axis, a resume/redo sweep, and
  type-transition / nanosecond-mtime / scale (--scale N) fixtures.

Documented in testsuite/README.md.
2026-06-11 12:32:54 +10:00

2825 lines
121 KiB
Python

#!/usr/bin/env python3
"""abdiff.py -- differential A/B regression hunter for rsync.
Runs the same transfer with two rsync binaries (A = the build under test, e.g.
./rsync; B = a baseline, e.g. old_versions/rsync_3.4.1) and compares the
OUTCOME: exit code, error output, --stats "Literal data", the destination tree
(content + full metadata), and the --itemize change list.
Core oracle: for a BENIGN input a correctness/behaviour change between the two
builds must be invisible, so A and B must produce an identical destination tree
and both exit 0. Any divergence is a regression candidate (e.g. a refactor that
silently changes what a benign `rsync -a` transfers).
This is a developer tool, NOT a runtests.py test (it does not end in _test.py and
imports nothing from the test harness). Findings are printed and appended to a
log; minimize each into a testsuite/*_test.py.
Usage:
testsuite/abdiff.py [--rsync-a ./rsync] [--rsync-b old_versions/rsync_3.4.1]
[--sweep options|pathshape|all] [--workdir DIR] [--keep]
[--findings abdiff-findings.txt] [--only NAME] [--list]
Exit 0 iff no regression candidates were found.
"""
from __future__ import annotations
import argparse
import hashlib
import itertools
import os
import random
import re
import shutil
import signal
import stat
import subprocess
import sys
import threading
import time
from concurrent.futures import (FIRST_COMPLETED, ThreadPoolExecutor,
as_completed, wait)
from pathlib import Path
# ---------------------------------------------------------------------------
# config / globals (set in main)
RSYNC_A = "./rsync"
RSYNC_B = "old_versions/rsync_3.4.1"
RRSYNC_A = None # rrsync wrapper paired with A/B (None -> in-tree support/rrsync)
RRSYNC_B = None
KEEP = False
REPEAT = 2 # stability gate: run each binary N times; flaky scenarios quarantined
CMD_TIMEOUT = 120 # per-subprocess wall-clock guard, seconds (0 = unlimited)
COST = False # --cost: also compare peak process-group RSS (resource oracle)
SCALE_N = 2000 # --scale: element count for the scale-escalation fixtures
_supports_cache: dict = {}
_supports_lock = threading.Lock()
_tls = threading.local() # per-worker: .measure (bool) + .rss (peak bytes)
_PAGE = os.sysconf("SC_PAGE_SIZE")
def _group_rss(pgid):
"""Summed RSS (bytes) of every live process in process group pgid -- catches
rsync's forked generator/receiver/sender (and ssh/daemon children), which a
direct-child measure (/usr/bin/time) would miss."""
total = 0
for pid in os.listdir("/proc"):
if not pid.isdigit():
continue
try:
with open(f"/proc/{pid}/stat") as f:
fields = f.read().split()
if int(fields[4]) != pgid: # field 5 (0-idx 4) = pgrp
continue
with open(f"/proc/{pid}/statm") as f:
total += int(f.read().split()[1]) * _PAGE # resident pages
except (OSError, ValueError, IndexError):
continue
return total
def sh(cmd, cwd=None, env=None, timeout=None):
"""Run cmd capturing stdout/stderr. Runs in its own process group with a
wall-clock timeout so a wedged rsync (or its ssh/daemon children) can't hang a
worker forever -- on timeout the whole group is killed and rc 124 returned."""
if timeout is None:
timeout = CMD_TIMEOUT
p = subprocess.Popen(cmd, cwd=cwd, env=env, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True, start_new_session=True)
# cost oracle: while the transfer runs, sample the peak summed RSS of its
# whole process group (set per-worker by one_run via _tls.measure).
measure = getattr(_tls, "measure", False)
stop = peak = poller = None
if measure:
peak = [0]
stop = threading.Event()
pgid = os.getpgid(p.pid)
def _poll():
while not stop.is_set():
peak[0] = max(peak[0], _group_rss(pgid))
stop.wait(0.03)
poller = threading.Thread(target=_poll, daemon=True)
poller.start()
try:
out, err = p.communicate(timeout=timeout or None)
rc = p.returncode
except subprocess.TimeoutExpired:
try:
os.killpg(os.getpgid(p.pid), signal.SIGKILL)
except OSError:
pass
out, err = p.communicate()
rc = 124
err = "[abdiff: TIMEOUT]\n" + (err or "")
if measure:
stop.set()
poller.join(timeout=1)
_tls.rss = peak[0] or None
return subprocess.CompletedProcess(cmd, rc, out or "", err or "")
def supports(binary, opt):
key = (binary, opt)
with _supports_lock:
if key in _supports_cache:
return _supports_cache[key]
r = sh([binary, opt, "--version"])
bad = any(m in (r.stderr or "").lower()
for m in ("unknown option", "unrecognized option", "no such option"))
val = not bad
with _supports_lock:
_supports_cache[key] = val
return val
# ---------------------------------------------------------------------------
# tree snapshot + comparison
ERR_MARKERS = ("rsync error", "failed to open", "rsync: ", "Invalid argument",
"No such file", "Operation not permitted", "cannot ")
# Intentional, documented behaviour-change refusals (A errors where B didn't),
# recorded as ALLOW rather than a silent regression. Each entry is
# (substring-in-A's-stderr, human note). Populate as deliberate behaviour changes
# between the two builds are identified.
ALLOWLIST = []
def _xattrs(path):
try:
names = sorted(os.listxattr(path, follow_symlinks=False))
except (OSError, AttributeError):
return {}
out = {}
for n in names:
if n.startswith("system.posix_acl_"):
continue # captured via getfacl
try:
out[n] = os.getxattr(path, n, follow_symlinks=False).hex()
except OSError:
out[n] = "?"
return out
def _acl(path, is_dir):
r = sh(["getfacl", "-pcEn", path]) if shutil.which("getfacl") else None
if not r or r.returncode != 0:
return None
lines = [ln for ln in r.stdout.splitlines() if ln and not ln.startswith("#")]
return "\n".join(sorted(lines)) or None
def snapshot(root: Path):
"""Map rel-path -> attribute dict for every entry under root (root itself
excluded). Symlinks/specials are recorded, never followed."""
root = Path(root)
snap = {}
inode_of = {} # (dev,ino) -> first rel path, for hardlink grouping
if not root.exists():
return snap
stack = [root]
while stack:
d = stack.pop()
try:
entries = sorted(os.scandir(d), key=lambda e: e.name)
except OSError:
continue
for e in entries:
p = Path(e.path)
rel = str(p.relative_to(root))
try:
st = os.lstat(p)
except OSError:
snap[rel] = {"type": "GONE"}
continue
m = st.st_mode
a = {
"mode": stat.S_IMODE(m),
"uid": st.st_uid, "gid": st.st_gid,
"mtime": int(st.st_mtime),
}
if stat.S_ISDIR(m):
a["type"] = "d"
stack.append(p)
elif stat.S_ISLNK(m):
a["type"] = "l"
a["target"] = os.readlink(p)
elif stat.S_ISREG(m):
a["type"] = "f"
a["size"] = st.st_size
a["blocks"] = st.st_blocks # sparseness
# "is this file hardlinked at all" (link-dest / -H) -- a robust
# boolean; raw nlink counts are contaminated when A and B share a
# --link-dest basis dir, but "copied(1) vs linked(>1)" still
# catches a real link-dest/hardlink regression.
a["linked"] = st.st_nlink > 1
if st.st_nlink > 1:
key = (st.st_dev, st.st_ino)
a["hardlink"] = inode_of.setdefault(key, rel)
h = hashlib.sha256()
try:
with open(p, "rb") as fh:
for chunk in iter(lambda: fh.read(1 << 20), b""):
h.update(chunk)
a["sha"] = h.hexdigest()
except OSError as ex:
a["sha"] = f"ERR:{ex.errno}"
elif stat.S_ISFIFO(m):
a["type"] = "p"
elif stat.S_ISSOCK(m):
a["type"] = "s"
elif stat.S_ISBLK(m) or stat.S_ISCHR(m):
a["type"] = "b" if stat.S_ISBLK(m) else "c"
a["rdev"] = (os.major(st.st_rdev), os.minor(st.st_rdev))
else:
a["type"] = "?"
xa = _xattrs(p)
if xa:
a["xattr"] = xa
ac = _acl(p, stat.S_ISDIR(m))
if ac:
a["acl"] = ac
snap[rel] = a
return snap
# which attrs are meaningful depends on the options used; keep it simple and
# compare everything, but let callers ignore mtime when -t isn't in play, or
# for a type whose times rsync intentionally leaves unmanaged (-O dirs, -J
# symlinks) -> those dest mtimes are creation-time and differ between runs.
def diff_snapshots(sa, sb, ignore_mtime=False, ignore_mtime_types=()):
diffs = []
for rel in sorted(set(sa) | set(sb)):
a, b = sa.get(rel), sb.get(rel)
if a is None:
diffs.append(f" only in B(baseline): {rel} ({b.get('type')})")
continue
if b is None:
diffs.append(f" only in A(under-test): {rel} ({a.get('type')})")
continue
for k in sorted(set(a) | set(b)):
if k == "mtime" and (ignore_mtime
or a.get("type") in ignore_mtime_types):
continue
if a.get(k) != b.get(k):
diffs.append(f" {rel}: {k} A={a.get(k)!r} B={b.get(k)!r}")
return diffs
# ---------------------------------------------------------------------------
# running a transfer with one binary
def run_xfer(binary, workdir, opts, src_args, dest, cwd=None, pre=None):
"""Run `binary opts src_args dest` (cwd default=workdir). Returns
(rc, stderr, literal_data, itemize)."""
cwd = cwd or workdir
argv = [binary, "--stats", "-i", *opts, *src_args, dest]
return _parse_out(sh(argv, cwd=cwd))
def _parse_out(r):
literal = None
for ln in r.stdout.splitlines():
if ln.startswith("Literal data:"):
literal = ln.split(":", 1)[1].strip()
itemize = "\n".join(sorted(
ln for ln in r.stdout.splitlines()
if len(ln) > 11 and ln[1] in "fdLDS" and ln[0] in "<>ch.*"
and ln[11:].strip() != "./")) # bench-dependent top-dir time line
return r.returncode, (r.stderr or "").strip(), literal, itemize, (r.stdout or "")
_NOISE = re.compile(r'bytes/sec|speedup is|^sent .*received |^total size is|'
r'^Number of |^Total |^Literal data:|^Matched data:|'
r'^File list |^total:|^created |^deleting ')
def _norm_out(text, wd, dest):
"""Normalised stdout for A/B compare: canonicalise the workdir/dest paths and
drop bench-variant stats lines (keeps itemize/listing/warning lines)."""
text = text.replace(str(dest), "DEST").replace(str(wd), "WD")
return "\n".join(ln for ln in text.splitlines() if ln and not _NOISE.search(ln))
def _norm_err(text, wd, dest):
"""Normalised stderr TEXT for A/B compare: canonicalise paths, strip the
version-dependent role tag ([sender=3.4.x], [client=VERSION]) and at-FILE(LINE)
source location (line numbers shift between versions) so only the MESSAGE is
compared."""
text = text.replace(str(dest), "DEST").replace(str(wd), "WD")
text = re.sub(r'\[(?:sender|receiver|generator|client|server'
r'|Sender|Receiver|Generator|Client|Server)=[^\]]*\]',
'[ROLE]', text)
text = re.sub(r' at [\w./-]+\(\d+\)', ' at LOC', text)
return text.strip()
def _wait_port(port, timeout=10.0):
import socket as _sock
end = time.time() + timeout
while time.time() < end:
try:
_sock.create_connection(("127.0.0.1", port), 0.3).close()
return True
except OSError:
time.sleep(0.05)
return False
def run_daemon_xfer(binary, wd, module_path, opts, src_args, port, chroot="no"):
"""Push src_args into a [m] module served by `binary --daemon` over a PRIVATE
STDIO PIPE (RSYNC_CONNECT_PROG) -- no TCP port, so no port-bind/startup race
(the old TCP path was nondeterministic under load). `port` only uniquifies
the per-invocation config filename. Same (rc, err, lit, item) as run_xfer."""
conf = Path(wd) / f"rsyncd_{port}.conf"
Path(module_path).mkdir(parents=True, exist_ok=True)
conf.write_text(
f"use chroot = {chroot}\n"
f"[m]\n path = {module_path}\n read only = no\n"
f" hosts allow = 127.0.0.1\n")
env = {**os.environ,
"RSYNC_CONNECT_PROG": f"{binary} --config={conf} --daemon"}
argv = [binary, "--stats", "-i", *opts, *src_args, "rsync://localhost/m/"]
return _parse_out(sh(argv, cwd=str(wd), env=env))
def run_daemon_pull(binary, wd, served, opts, localdest, port, chroot="no"):
"""PULL from a read-only [m] module (the daemon SENDER side) over a private
stdio pipe into localdest. `served` is the served directory."""
conf = Path(wd) / f"rsyncd_{port}.conf"
Path(served).mkdir(parents=True, exist_ok=True)
Path(localdest).mkdir(parents=True, exist_ok=True)
conf.write_text(
f"use chroot = {chroot}\n"
f"[m]\n path = {served}\n read only = yes\n"
f" hosts allow = 127.0.0.1\n")
env = {**os.environ,
"RSYNC_CONNECT_PROG": f"{binary} --config={conf} --daemon"}
argv = [binary, "--stats", "-i", *opts, "rsync://localhost/m/",
str(localdest) + "/"]
return _parse_out(sh(argv, cwd=str(wd), env=env))
# sibling helper scripts live in support/ (abdiff.py itself lives in testsuite/)
_SUPPORT = Path(__file__).resolve().parent.parent / "support"
_LSH = str(_SUPPORT / "lsh.sh")
def run_ssh_xfer(binary, wd, opts, src_args, dest):
"""PUSH over a remote-shell split via support/lsh.sh (host 'lh' = no chdir);
the remote side runs the same binary via --rsync-path. Real client+server
processes + protocol, unlike a both-paths-local copy."""
argv = [binary, "--stats", "-i", "-e", f"sh {_LSH}",
f"--rsync-path={binary}", *opts, *src_args, f"lh:{dest}/"]
return _parse_out(sh(argv, cwd=wd))
# --- rrsync lane: route the remote side through the restricted rrsync wrapper --
_RRSH = str(_SUPPORT / "rrsh.sh")
_RRSYNC_SRC = _SUPPORT / "rrsync"
def _patch_rrsync(binary, rrsync_src, wd):
"""A copy of `rrsync_src` (the version's rrsync wrapper) whose RSYNC points at
`binary`, so the rrsync-launched server is the binary under test. rrsync is
SHIPPED PER VERSION, so A and B use their OWN rrsync (the regressions live in
the script, not just the binary) -- keyed per (binary, src) to keep them
distinct in a shared workdir."""
key = abs(hash((binary, str(rrsync_src)))) % 1000000
dst = Path(wd) / f"rrsync-{key}"
if not dst.exists():
txt = Path(rrsync_src).read_text()
txt = re.sub(r"^RSYNC = '[^']*'", f"RSYNC = {binary!r}", txt, count=1,
flags=re.M)
dst.write_text(txt)
dst.chmod(0o755)
return dst
def run_rrsync_push(binary, rrsync_src, wd, opts, src_args, dest):
"""PUSH through `rrsync <restricted>` (the dest's parent is the restricted
root; the client writes into the <dest-name>/ subdir, so rrsync's subdir
restrictions are exercised). Exercises rrsync option/path validation."""
dest = Path(dest)
rr = _patch_rrsync(binary, rrsync_src, wd)
dest.parent.mkdir(parents=True, exist_ok=True)
argv = [binary, "--stats", "-i", "-e", f"sh {_RRSH} {rr} {dest.parent}",
*opts, *src_args, f"lh:{dest.name}/"]
return _parse_out(sh(argv, cwd=str(wd)))
def run_rrsync_pull(binary, rrsync_src, wd, served, opts, localdest):
"""PULL through `rrsync <restricted>` (rrsync's --sender side): the served
dir's parent is the restricted root, the client reads the <served-name>/
subdir into localdest."""
served = Path(served)
rr = _patch_rrsync(binary, rrsync_src, wd)
Path(localdest).mkdir(parents=True, exist_ok=True)
argv = [binary, "--stats", "-i", "-e", f"sh {_RRSH} {rr} {served.parent}",
*opts, f"lh:{served.name}/", str(localdest) + "/"]
return _parse_out(sh(argv, cwd=str(wd)))
# --- real-TCP-daemon lane: a genuine `rsync --daemon` on a bound port ----------
_port_lock = threading.Lock()
_next_port = [40000]
def _alloc_port():
"""A free localhost TCP port, bind-probed under a lock so concurrent workers
don't collide (the real-daemon path needs a real port, unlike the stdio one)."""
import socket as _s
with _port_lock:
for _ in range(4000):
p = _next_port[0]
_next_port[0] = 40000 if p >= 60000 else p + 1
s = _s.socket(_s.AF_INET, _s.SOCK_STREAM)
try:
s.setsockopt(_s.SOL_SOCKET, _s.SO_REUSEADDR, 1)
s.bind(("127.0.0.1", p))
return p
except OSError:
continue
finally:
s.close()
return _next_port[0]
def _tcp_daemon(binary, wd, module_path, opts, src_args, localdest=None,
pull=False, chroot="no", auth=False):
"""Start a real `binary --daemon` on a bound TCP port and run the client over
rsync://127.0.0.1:PORT/m/ -- exercises the genuine socket path / greeting /
handshake (and, with auth, the challenge-response) that the stdio-pipe daemon
lane bypasses."""
port = _alloc_port()
conf = Path(wd) / f"tcpd_{port}.conf"
Path(module_path).mkdir(parents=True, exist_ok=True)
authlines = ""
env = dict(os.environ)
if auth:
sp = Path(wd) / f"tcpd_{port}.secrets"
sp.write_text("abuser:abpass\n")
sp.chmod(0o600)
authlines = f" auth users = abuser\n secrets file = {sp}\n"
env["RSYNC_PASSWORD"] = "abpass"
conf.write_text(
f"use chroot = {chroot}\nport = {port}\n"
f"log file = {wd}/tcpd_{port}.log\npid file = {wd}/tcpd_{port}.pid\n"
f"[m]\n path = {module_path}\n read only = {'yes' if pull else 'no'}\n"
f" hosts allow = 127.0.0.1\n{authlines}")
proc = subprocess.Popen(
[binary, "--daemon", "--no-detach", f"--config={conf}",
f"--port={port}", "--address=127.0.0.1"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
start_new_session=True)
try:
if not _wait_port(port):
return (99, "tcp daemon failed to start", None, "", "")
user = "abuser@" if auth else ""
url = f"rsync://{user}127.0.0.1:{port}/m/"
if pull:
Path(localdest).mkdir(parents=True, exist_ok=True)
argv = [binary, "--stats", "-i", *opts, url, str(localdest) + "/"]
else:
argv = [binary, "--stats", "-i", *opts, *src_args, url]
return _parse_out(sh(argv, cwd=str(wd), env=env))
finally:
proc.terminate()
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
# ---------------------------------------------------------------------------
# fixtures
def _write(p: Path, data: bytes):
p.parent.mkdir(parents=True, exist_ok=True)
p.write_bytes(data)
def build_kitchen(src: Path):
"""A benign 'kitchen-sink' tree: regular/empty/large/sparse files, nested
and empty dirs, in-tree + dangling symlinks, a dir-symlink, a hardlink pair,
odd modes, and a user.* xattr. No attacker paths."""
src.mkdir(parents=True, exist_ok=True)
_write(src / "empty", b"")
_write(src / "small.txt", b"hello world\n")
_write(src / "data.bin", bytes((i * 7) & 0xFF for i in range(200000)))
_write(src / "dir/a.txt", b"a" * 100)
_write(src / "dir/sub/b.txt", b"b" * 100)
(src / "emptydir").mkdir(exist_ok=True)
# sparse file: hole + data
with open(src / "sparse.bin", "wb") as f:
f.seek(1 << 20)
f.write(b"END")
# hardlink pair
_write(src / "hl_a", b"hardlinked\n")
try:
os.link(src / "hl_a", src / "hl_b")
except OSError:
pass
# symlinks
os.symlink("small.txt", src / "rel_link") # in-tree relative
os.symlink("dir", src / "dir_link") # dir symlink
os.symlink("nonexistent", src / "dangling") # dangling
# odd modes
os.chmod(src / "small.txt", 0o4755) # setuid
os.chmod(src / "dir", 0o2775) # setgid dir
# xattr (best effort)
try:
os.setxattr(src / "data.bin", "user.abtest", b"v1")
except OSError:
pass
def build_relfile(src: Path):
"""Minimal nested file for path-shape tests."""
_write(src / "sub/deep/file", b"relative content\n")
os.symlink("file", src / "sub/deep/link")
# ---------------------------------------------------------------------------
# scenarios
class Scenario:
def __init__(self, name, setup, opts, src_args, dest="destX/",
cwd_is_workdir=True, pre_dest=None, abspath=False,
dest_prep=None, snap_dest=None, dest_arg=None, daemon=None,
ssh=False):
self.name = name
self.setup = setup # fn(src_dir)
self.opts = opts # list[str]
self.src_args = src_args # fn(workdir)->list[str] OR list[str]
self.dest = dest
self.cwd_is_workdir = cwd_is_workdir
self.pre_dest = pre_dest # fn(dest_dir) to pre-populate (delete/update)
self.abspath = abspath
# dest_prep(dest_path): create the dest specially (e.g. as a symlink to a
# real dir) instead of letting rsync create it. snap_dest(dest_path)->Path
# picks what to snapshot (e.g. the symlink's real target).
self.dest_prep = dest_prep
self.snap_dest = snap_dest
# dest_arg(dest_base)->str: the actual rsync destination argument (e.g.
# write THROUGH an in-tree dir-symlink: dest_base/link/). Default is
# dest_base + "/".
self.dest_arg = dest_arg
# daemon: None for a local transfer, or {"chroot": "no"|"yes"} to PUSH
# src_args into a [m] daemon module whose path is the dest dir.
self.daemon = daemon
# ssh: True to PUSH over a remote-shell split (support/lsh.sh, host "lh")
# -- separate client+server processes, real protocol + arg passing.
self.ssh = ssh
# rrsync: None, or {"pull": bool} to route through the restricted rrsync
# wrapper (support/rrsync) as an sshd forced-command would -- exercises
# rrsync's own option/path validation. ssh/daemon-style transport.
self.rrsync = None
def _liftable(scn):
"""A benign push-into-dest scenario whose transport can be swapped for free:
no dest_prep/dest_arg (those need local dest-path semantics), not already a
daemon/ssh scenario, and a list src_args that ends by pushing src/ -> dest."""
return (not scn.dest_prep and not scn.dest_arg and not scn.abspath
and scn.daemon is None and not scn.ssh and scn.rrsync is None
and isinstance(scn.src_args, list) and scn.src_args
and scn.src_args[-1] in ("src/", "src"))
def _clone_transport(scn, mode):
import copy
c = copy.copy(scn)
c.name = f"{scn.name}@{mode}"
if mode == "ssh":
c.ssh = True
elif mode == "daemon":
c.daemon = {"chroot": "no"}
return c
def lift_transports(scns, modes=("ssh", "daemon")):
"""Make transport an ORTHOGONAL axis: keep each local scenario and, for the
liftable ones, also run it over ssh and a daemon module. This is where the
daemon/ssh-only regression family hides -- a feature broken only over the
wire is invisible to a local-only sweep."""
out = []
for scn in scns:
out.append(scn)
if _liftable(scn):
out += [_clone_transport(scn, m) for m in modes]
return out
def options_sweep():
"""-a plus one option at a time, over the kitchen-sink, relative trailing
slash source -> dest. The bread-and-butter single-option regression check."""
base = ["-a"]
variants = [
["-a"], ["-aH"], ["-aHS"], ["-a", "--sparse"], ["-a", "--inplace"],
["-a", "-A"], ["-a", "-X"], ["-a", "-AX"], ["-a", "-U"], ["-a", "-N"],
["-a", "-l"], ["-a", "-L"], ["-a", "-k"], ["-a", "-K"],
["-a", "--copy-unsafe-links"], ["-a", "--safe-links"],
["-a", "--munge-links"], ["-a", "-z"], ["-a", "--compress-choice=zstd"],
["-a", "--compress-choice=zlib"], ["-a", "-c"],
["-a", "--checksum-choice=md5"], ["-a", "-W"], ["-a", "--no-whole-file"],
["-a", "-O"], ["-a", "-J"], ["-a", "--numeric-ids"], ["-a", "-E"],
["-a", "--no-inc-recursive"], ["-a", "--fake-super"],
["-a", "--chmod=u+rwx"], ["-rlptD"], ["-rtz"],
["-a", "-B", "1024"], ["-a", "--max-size=1000"], ["-a", "--min-size=50"],
["-a", "--exclude=*.bin"], ["-a", "-C"], ["-a", "--prune-empty-dirs"],
]
scns = []
for v in variants:
nm = "opt:" + "_".join(x.lstrip("-") for x in v if x != "-a") or "opt:a"
scns.append(Scenario("opt:" + "+".join(v), build_kitchen, v,
["src/"], "dest/"))
return scns
def pathshape_sweep():
"""The --relative class: same content under many source-path shapes."""
scns = []
def absfile(wd):
return [str(Path(wd) / "src/sub/deep/file")]
scns += [
Scenario("path:rel-dir-slash", build_relfile, ["-a"], ["src/"], "dest/"),
Scenario("path:rel-dir-noslash", build_relfile, ["-a"], ["src"], "dest/"),
Scenario("path:rel-file", build_relfile, ["-a"], ["src/sub/deep/file"], "dest/"),
Scenario("path:abs-file", build_relfile, ["-a"], absfile, "dest/"),
Scenario("path:abs-dir", build_relfile, ["-a"],
lambda wd: [str(Path(wd) / "src") + "/"], "dest/"),
Scenario("path:R-rel-file", build_relfile, ["-aR"], ["src/sub/deep/file"], "dest/"),
Scenario("path:R-abs-file", build_relfile, ["-aR"], absfile, "dest/"),
Scenario("path:R-dot", build_relfile, ["-aR"], ["./src/sub/deep/file"], "dest/"),
Scenario("path:R-rel-dir", build_relfile, ["-aR"], ["src/sub/"], "dest/"),
Scenario("path:R-noimplied", build_relfile, ["-aR", "--no-implied-dirs"],
["src/sub/deep/file"], "dest/"),
Scenario("path:multi-src", build_relfile, ["-a"],
["src/sub/deep/file", "src/sub/deep/link"], "dest/"),
Scenario("path:link-as-src", build_relfile, ["-a"], ["src/sub/deep/link"], "dest/"),
Scenario("path:L-link-as-src", build_relfile, ["-aL"], ["src/sub/deep/link"], "dest/"),
]
return scns
T_OLD = 1000000000 # fixed timestamps so pre-state is identical for A and B
T_NEW = 1700000000
def _ut(p, t=T_NEW):
os.utime(p, (t, t))
def build_recvtree(src: Path):
"""Small, interpretable source tree for receiver/stateful scenarios."""
_write(src / "file1.txt", b"NEW content line\n" * 3)
_write(src / "dir/file2.txt", b"data2\n")
_write(src / "big.bin", bytes((i * 3) & 0xFF for i in range(60000)))
os.symlink("file1.txt", src / "slink")
_write(src / "hl1", b"hard\n")
try:
os.link(src / "hl1", src / "hl2")
except OSError:
pass
for f in ("file1.txt", "dir/file2.txt", "big.bin", "hl1", "hl2"):
_ut(src / f)
_ut(src / "dir")
_ut(src)
def setup_with_basis(src: Path):
"""src plus an identical sibling basis/ (for --link-dest/--copy-dest/etc.)."""
build_recvtree(src)
basis = src.parent / "basis"
build_recvtree(basis)
def stale_dest(dest: Path):
"""Pre-populate a dest as an OLDER state: file1 differs (older mtime),
big.bin differs (older), an extra obsolete file, file2 already current."""
_write(dest / "file1.txt", b"OLD content\n")
_ut(dest / "file1.txt", T_OLD)
_write(dest / "dir/file2.txt", b"data2\n")
_ut(dest / "dir/file2.txt")
_ut(dest / "dir")
_write(dest / "big.bin", bytes((i * 5) & 0xFF for i in range(60000)))
_ut(dest / "big.bin", T_OLD)
_write(dest / "obsolete.txt", b"remove me\n")
_ut(dest / "obsolete.txt", T_OLD)
# ===========================================================================
# Domain-knowledge-driven scenario generation.
# "Edges of interest": equivalence-class boundary representatives, not volume
# (empty-dir vs 1-file matters; 10 vs 11 doesn't; mode 0 vs 0400 vs 0200 matters;
# 100 files of one mode don't). Each option is modelled by its precondition (the
# dest/src state that makes it active) and, for options taking a dir, whether the
# aux location sits INSIDE or OUTSIDE the tree.
# ===========================================================================
# interesting permission edges (no-perm / read / write / exec / special bits)
MODES = [0o000, 0o400, 0o200, 0o644, 0o755, 0o4755, 0o2755, 0o1777]
# size edges around rsync's block boundary (BLOCK_SIZE 700): empty/1B/under/at/over/multi
SIZES = [0, 1, 699, 700, 701, 100003]
def _mk_reg(p, n, mode=0o644, t=T_NEW, fill=7):
p.parent.mkdir(parents=True, exist_ok=True)
with open(p, "wb") as f:
f.write(bytes((i * fill) & 0xFF for i in range(n)))
os.chmod(p, mode)
_ut(p, t)
def _mk_sparse(p, hole=1 << 20, tail=b"end"):
p.parent.mkdir(parents=True, exist_ok=True)
with open(p, "wb") as f:
f.seek(hole)
f.write(tail)
_ut(p)
def _mk_fifo(p):
p.parent.mkdir(parents=True, exist_ok=True)
os.mkfifo(p)
def _mk_sock(p):
import socket as _s
p.parent.mkdir(parents=True, exist_ok=True)
s = _s.socket(_s.AF_UNIX)
try:
s.bind(str(p))
finally:
s.close()
def mode_sweep():
"""One representative file per interesting permission edge (+ a setgid/sticky
dir), plain -a and -a --chmod. Boundary values, not 100 files of one mode."""
s = []
for m in MODES:
def setup(src, m=m):
_mk_reg(src / "f", 64, mode=m)
os.mkdir(src / "d")
os.chmod(src / "d", 0o2755 if m == 0o2755 else
(0o1777 if m == 0o1777 else 0o755))
_ut(src / "d")
s.append(Scenario(f"mode:{m:04o}", setup, ["-a"], ["src/"], "dest/"))
s.append(Scenario("mode:chmod-Dg-Fo", lambda src: _mk_reg(src / "f", 8),
["-a", "--chmod=D2755,F644"], ["src/"], "dest/"))
return s
def size_sweep():
"""One file per size edge around the block boundary, plain and --inplace
(delta path), -c (whole-file checksum), -S on a sparse file."""
s = []
for n in SIZES:
s.append(Scenario(f"size:{n}", lambda src, n=n: _mk_reg(src / "f", n),
["-a"], ["src/"], "dest/"))
s.append(Scenario(f"size:{n}+inplace",
lambda src, n=n: _mk_reg(src / "f", n),
["-a", "--inplace", "--no-whole-file"], ["src/"], "dest/"))
s.append(Scenario("size:sparse", lambda src: _mk_sparse(src / "sp.bin"),
["-aS"], ["src/"], "dest/"))
s.append(Scenario("size:sparse-inplace", lambda src: _mk_sparse(src / "sp.bin"),
["-aS", "--inplace"], ["src/"], "dest/"))
return s
def filetype_sweep():
"""One representative per file type/symlink shape, each with the option(s)
that actually exercise it. Specials/devices are in priv_sweep (root)."""
def base(src):
_mk_reg(src / "anchor", 16)
def f_emptydir(src):
base(src)
os.makedirs(src / "empty")
_ut(src / "empty")
def f_intree(src):
base(src)
_mk_reg(src / "real", 20)
os.symlink("real", src / "lnk") # in-tree relative
def f_dirlink(src):
base(src)
_mk_reg(src / "rd/inner", 20)
os.symlink("rd", src / "dl") # symlink to a dir
def f_abslink(src):
base(src)
os.symlink("/etc/hostname", src / "abs") # out-of-tree absolute
def f_dangling(src):
base(src)
os.symlink("nonexistent", src / "dead")
def f_chain(src):
base(src)
_mk_reg(src / "real", 20)
os.symlink("real", src / "l1")
os.symlink("l1", src / "l2") # symlink chain
def f_hardlinks(src):
_mk_reg(src / "a", 40)
os.link(src / "a", src / "b")
os.link(src / "a", src / "c")
base(src)
def f_fifo(src):
base(src)
_mk_fifo(src / "pipe")
def f_sock(src):
base(src)
_mk_sock(src / "sock")
return [
Scenario("ft:emptydir", f_emptydir, ["-a"], ["src/"], "dest/"),
Scenario("ft:intree-l", f_intree, ["-a"], ["src/"], "dest/"),
Scenario("ft:intree-L", f_intree, ["-aL"], ["src/"], "dest/"),
Scenario("ft:dirlink-l", f_dirlink, ["-a"], ["src/"], "dest/"),
Scenario("ft:dirlink-k", f_dirlink, ["-ak"], ["src/"], "dest/"),
Scenario("ft:dirlink-L", f_dirlink, ["-aL"], ["src/"], "dest/"),
Scenario("ft:abslink-l", f_abslink, ["-a"], ["src/"], "dest/"),
Scenario("ft:abslink-L", f_abslink, ["-aL"], ["src/"], "dest/"),
Scenario("ft:abslink-safe", f_abslink, ["-a", "--safe-links"], ["src/"], "dest/"),
Scenario("ft:abslink-munge", f_abslink, ["-a", "--munge-links"], ["src/"], "dest/"),
Scenario("ft:dangling", f_dangling, ["-a"], ["src/"], "dest/"),
Scenario("ft:chain-l", f_chain, ["-a"], ["src/"], "dest/"),
Scenario("ft:chain-L", f_chain, ["-aL"], ["src/"], "dest/"),
Scenario("ft:hardlinks", f_hardlinks, ["-aH"], ["src/"], "dest/"),
Scenario("ft:fifo", f_fifo, ["-a"], ["src/"], "dest/"),
Scenario("ft:sock", f_sock, ["-a"], ["src/"], "dest/"),
]
# --- preconditions: dest/src state that makes an option actually ACTIVE -------
def _pc_tree(src):
_mk_reg(src / "f", 100)
_mk_reg(src / "dir/g", 50)
_ut(src / "dir")
_ut(src)
def _setup_samemeta(src):
_mk_reg(src / "f", 100, t=T_NEW, fill=7)
_mk_reg(src / "keep", 20, t=T_NEW, fill=7)
def _pre_samemeta(dest): # same size+mtime, DIFFERENT content
_mk_reg(dest / "f", 100, t=T_NEW, fill=200)
_mk_reg(dest / "keep", 20, t=T_NEW, fill=200)
def _setup_older(src):
_mk_reg(src / "f", 100, t=T_OLD, fill=7)
def _pre_newer(dest): # dest newer + different (for -u)
_mk_reg(dest / "f", 60, t=T_NEW, fill=200)
def _setup_mixed(src): # an existing file + a new file
_mk_reg(src / "exist", 30, t=T_NEW, fill=7)
_mk_reg(src / "newfile", 30, t=T_NEW, fill=7)
def _pre_existing(dest): # only "exist" present (older, different)
_mk_reg(dest / "exist", 99, t=T_OLD, fill=200)
def _setup_sizes(src):
_mk_reg(src / "small", 50)
_mk_reg(src / "big", 5000)
def _setup_emptydirs(src):
os.makedirs(src / "empty/sub")
_mk_reg(src / "keep/f", 10)
_ut(src / "keep")
_ut(src / "empty/sub")
_ut(src / "empty")
def _setup_append(src):
_mk_reg(src / "f", 200, t=T_NEW, fill=7)
def _pre_append(dest): # dest is a shorter prefix (for --append)
with open(dest / "f", "wb") as fh:
fh.write(bytes((i * 7) & 0xFF for i in range(80)))
_ut(dest / "f", T_OLD)
def _setup_kdest(src):
_mk_reg(src / "dir/f1", 10)
_mk_reg(src / "dir/f2", 10)
_mk_reg(src / "top", 5)
def _prep_kdest(dest): # dest has realdir + dir->realdir symlink
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
def selection_sweep():
"""File-selection / timestamp / size options at the boundary that makes the
comparison non-trivial (same size+mtime but different content, newer dest,
existing-vs-new, size limits)."""
plain = "src/"
s = [
Scenario("sel:checksum", _setup_samemeta, ["-a", "-c"], [plain], "dest/", pre_dest=_pre_samemeta),
Scenario("sel:ignore-times", _setup_samemeta, ["-a", "-I"], [plain], "dest/", pre_dest=_pre_samemeta),
Scenario("sel:size-only", _setup_samemeta, ["-a", "--size-only"], [plain], "dest/", pre_dest=_pre_samemeta),
Scenario("sel:quickcheck", _setup_samemeta, ["-a"], [plain], "dest/", pre_dest=_pre_samemeta),
Scenario("sel:update", _setup_older, ["-a", "-u"], [plain], "dest/", pre_dest=_pre_newer),
Scenario("sel:modify-window", _setup_samemeta, ["-a", "--modify-window=2"], [plain], "dest/", pre_dest=_pre_samemeta),
Scenario("sel:existing", _setup_mixed, ["-a", "--existing"], [plain], "dest/", pre_dest=_pre_existing),
Scenario("sel:ignore-existing", _setup_mixed, ["-a", "--ignore-existing"], [plain], "dest/", pre_dest=_pre_existing),
Scenario("sel:max-size", _setup_sizes, ["-a", "--max-size=1000"], [plain], "dest/"),
Scenario("sel:min-size", _setup_sizes, ["-a", "--min-size=1000"], [plain], "dest/"),
Scenario("sel:times-only", _pc_tree, ["-rlpt"], [plain], "dest/"),
Scenario("sel:atimes", _pc_tree, ["-a", "--atimes"], [plain], "dest/"),
Scenario("sel:crtimes", _pc_tree, ["-a", "--crtimes"], [plain], "dest/"),
Scenario("sel:open-noatime", _pc_tree, ["-a", "--open-noatime"], [plain], "dest/"),
]
return lift_transports(s)
def behavior_sweep():
"""Behaviour options at their active preconditions (overwrite/backup/delete/
inplace/append/keep-dirlinks-dest/prune-empty/mkpath/dirs)."""
return [
Scenario("beh:backup", build_recvtree, ["-ab"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:delete", build_recvtree, ["-a", "--delete"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:delete-before", build_recvtree, ["-a", "--delete-before"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:delete-after", build_recvtree, ["-a", "--delete-after"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:delete-delay", build_recvtree, ["-a", "--delete-delay"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:delete-excluded", build_recvtree,
["-a", "--delete", "--delete-excluded", "--exclude=obsolete.txt"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:prune-empty", _setup_emptydirs, ["-a", "-m"], ["src/"], "dest/"),
Scenario("beh:inplace", build_recvtree, ["-a", "--inplace"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:append", _setup_append, ["-a", "--append"], ["src/"], "dest/", pre_dest=_pre_append),
Scenario("beh:numeric-ids", build_recvtree, ["-a", "--numeric-ids"], ["src/"], "dest/"),
Scenario("beh:delay-updates", build_recvtree, ["-a", "--delay-updates"], ["src/"], "dest/", pre_dest=stale_dest),
Scenario("beh:dirs", _pc_tree, ["-dlpt"], ["src/"], "dest/"),
Scenario("beh:mkpath", _pc_tree, ["-a", "--mkpath"], ["src/"], "dest/",
dest_arg=lambda d: str(d) + "/made/sub/"),
Scenario("beh:keep-dirlinks-dest", _setup_kdest, ["-aK"], ["src/"], "dest/",
dest_prep=_prep_kdest, snap_dest=lambda d: d / "realdir"),
]
def _auxp(dest, where, name):
"""Aux path INSIDE the dest tree, or OUTSIDE it (sibling under the workdir)."""
dest = Path(dest)
return str((dest / name) if where == "inside" else (dest.parent / f"aux_{name}"))
def placement_sweep():
"""Options that take a DIR/path, each with the aux location INSIDE vs OUTSIDE
the destination tree -- the edge most likely to expose confinement/escape
regressions. backup/temp/partial-dir + link/compare/copy-dest."""
s = []
for where in ("inside", "outside"):
s.append(Scenario(f"place:backup-dir-{where}", build_recvtree,
(lambda wd, dest, w=where: ["-ab", f"--backup-dir={_auxp(dest, w, 'bak')}"]),
["src/"], "dest/", pre_dest=stale_dest))
# temp-dir must exist -> pre-create dest (empty) + the temp dir
def _prep_temp(dest, w=where):
dest.mkdir(parents=True, exist_ok=True)
os.makedirs(_auxp(dest, w, "tmp"), exist_ok=True)
s.append(Scenario(f"place:temp-dir-{where}", build_recvtree,
(lambda wd, dest, w=where: ["-a", f"--temp-dir={_auxp(dest, w, 'tmp')}"]),
["src/"], "dest/", dest_prep=_prep_temp))
s.append(Scenario(f"place:partial-dir-{where}", build_recvtree,
(lambda wd, dest, w=where: ["-a", "--partial", f"--partial-dir={_auxp(dest, w, 'part')}"]),
["src/"], "dest/"))
# alt-dest basis (a prior identical copy at wd/basis): absolute vs relative path
for opt in ("link-dest", "compare-dest", "copy-dest"):
s.append(Scenario(f"place:{opt}-abs", setup_with_basis,
(lambda wd, dest, o=opt: ["-a", f"--{o}={wd}/basis"]), ["src/"], "dest/"))
s.append(Scenario(f"place:{opt}-rel", setup_with_basis,
(lambda wd, dest, o=opt: ["-a", f"--{o}=../basis"]), ["src/"], "dest/"))
return lift_transports(s)
def wire_sweep():
"""Protocol / wire / algorithm options (checksum & compress choice, old/
secluded args, iconv, odd block sizes)."""
bt = build_recvtree
return [
Scenario("wire:cc-md5", bt, ["-a", "--checksum-choice=md5"], ["src/"], "dest/"),
Scenario("wire:cc-md4", bt, ["-a", "--checksum-choice=md4"], ["src/"], "dest/"),
Scenario("wire:cc-xxh64", bt, ["-a", "--checksum-choice=xxh64"], ["src/"], "dest/"),
Scenario("wire:zc-zstd", bt, ["-a", "-z", "--compress-choice=zstd"], ["src/"], "dest/"),
Scenario("wire:zc-zlib", bt, ["-a", "-z", "--compress-choice=zlib"], ["src/"], "dest/"),
Scenario("wire:zc-zlibx", bt, ["-a", "-z", "--compress-choice=zlibx"], ["src/"], "dest/"),
Scenario("wire:old-args", bt, ["-a", "--old-args"], ["src/"], "dest/"),
Scenario("wire:secluded-args", bt, ["-a", "-s"], ["src/"], "dest/"),
Scenario("wire:iconv", bt, ["-a", "--iconv=utf8,latin1"], ["src/"], "dest/"),
Scenario("wire:block-1024", bt, ["-a", "-B", "1024"], ["src/"], "dest/"),
Scenario("wire:block-999", bt, ["-a", "-B", "999"], ["src/"], "dest/"),
]
# module-level file-type fixtures (also used by the pairwise sweep)
def _ft_intree(src):
_mk_reg(src / "real", 20)
os.symlink("real", src / "lnk")
_mk_reg(src / "anchor", 10)
def _ft_dirlink(src):
_mk_reg(src / "rd/inner", 20)
os.symlink("rd", src / "dl")
_mk_reg(src / "anchor", 10)
_ut(src / "rd")
def _ft_hardlinks(src):
_mk_reg(src / "a", 40)
os.link(src / "a", src / "b")
os.link(src / "a", src / "c")
_mk_reg(src / "anchor", 10)
def _ft_sparse(src):
_mk_sparse(src / "sp.bin")
_mk_reg(src / "anchor", 10)
def pairwise_sweep():
"""Guided pairwise: curated feature interactions (domain knowledge about where
two options collide), plus an auto option x file-type covering set."""
s = [
Scenario("pair:delete+dirlink-dest", _setup_kdest, ["-aK", "--delete"],
["src/"], "dest/", dest_prep=_prep_kdest, snap_dest=lambda d: d / "realdir"),
Scenario("pair:backup-inside+delete", build_recvtree,
(lambda wd, dest: ["-ab", "--delete", f"--backup-dir={_auxp(dest, 'inside', 'bak')}"]),
["src/"], "dest/", pre_dest=stale_dest),
Scenario("pair:inplace+sparse", _ft_sparse,
["-aS", "--inplace", "--no-whole-file"], ["src/"], "dest/"),
Scenario("pair:link-dest+hardlinks", setup_with_basis,
(lambda wd, dest: ["-aH", f"--link-dest={wd}/basis"]), ["src/"], "dest/"),
Scenario("pair:copy-links+dirlink", _ft_dirlink, ["-aL"], ["src/"], "dest/"),
Scenario("pair:keepdirlinks+delete-excluded", _setup_kdest,
["-aK", "--delete", "--delete-excluded", "--exclude=top"],
["src/"], "dest/", dest_prep=_prep_kdest, snap_dest=lambda d: d / "realdir"),
Scenario("pair:sparse+whole-file", _ft_sparse, ["-aS", "-W"], ["src/"], "dest/"),
Scenario("pair:partial-inside+delete", build_recvtree,
(lambda wd, dest: ["-a", "--delete", "--partial",
f"--partial-dir={_auxp(dest, 'inside', 'part')}"]),
["src/"], "dest/", pre_dest=stale_dest),
]
# auto option x file-type covering set (each option relevant to several types)
fts = [("intree", _ft_intree), ("dirlink", _ft_dirlink),
("hardlinks", _ft_hardlinks), ("sparse", _ft_sparse)]
opts = ["-c", "-z", "-b", "--inplace", "-H", "-L", "-k", "--checksum-choice=md5"]
for ftn, ftfn in fts:
for o in opts:
tag = o.lstrip("-").split("=")[0]
s.append(Scenario(f"pair:{tag}x{ftn}", ftfn, ["-a", o], ["src/"], "dest/"))
return s
def recv_sweep():
"""Receiver/generator + stateful scenarios: existing-dest update/delete,
backup, dest-variants (link/compare/copy-dest), inplace, temp-dir. These
exercise the receiver's existing-dest / basis / temp-dir paths that the
single-transfer-into-empty-dest sweeps don't reach."""
s = []
s += [
Scenario("recv:update", build_recvtree, ["-a"], ["src/"], "dest/",
pre_dest=stale_dest),
Scenario("recv:update-W", build_recvtree, ["-a", "-W"], ["src/"], "dest/",
pre_dest=stale_dest),
Scenario("recv:update-delete", build_recvtree, ["-a", "--delete"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:delete-during", build_recvtree, ["-a", "--delete-during"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:delete-after", build_recvtree, ["-a", "--delete-after"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:delete-delay", build_recvtree, ["-a", "--delete-delay"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:backup", build_recvtree, ["-a", "-b"], ["src/"], "dest/",
pre_dest=stale_dest),
Scenario("recv:backup-dir", build_recvtree, ["-a", "-b", "--backup-dir=bak"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:backup-suffix", build_recvtree, ["-a", "-b", "--suffix=.old"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:inplace", build_recvtree, ["-a", "--inplace", "--no-whole-file"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:delay-updates", build_recvtree, ["-a", "--delay-updates"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:temp-dir", build_recvtree, ["-a", "--temp-dir=tmpd"],
["src/"], "dest/", pre_dest=lambda d: (stale_dest(d), (d / "tmpd").mkdir(exist_ok=True))),
Scenario("recv:partial-dir", build_recvtree, ["-a", "--partial-dir=.part"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("recv:fuzzy", build_recvtree, ["-a", "--fuzzy"], ["src/"], "dest/",
pre_dest=stale_dest),
# dest-variant basis lookups (basis = identical sibling dir)
Scenario("recv:link-dest-rel", setup_with_basis, ["-a", "--link-dest=../basis"],
["src/"], "dest/"),
Scenario("recv:link-dest-abs", setup_with_basis, ["-a"],
lambda wd: ["--link-dest=" + str(Path(wd) / "basis"), "src/"], "dest/"),
Scenario("recv:compare-dest-rel", setup_with_basis,
["-a", "--compare-dest=../basis"], ["src/"], "dest/"),
Scenario("recv:copy-dest-rel", setup_with_basis,
["-a", "--copy-dest=../basis"], ["src/"], "dest/"),
]
return lift_transports(s)
def destshape_sweep():
"""Destination path shapes (symlinked dest dir, --mkpath) that stress the
receiver's destination-path handling."""
def symlinked_dest(dest: Path):
real = Path(str(dest) + "_real")
real.mkdir(parents=True, exist_ok=True)
if not dest.is_symlink():
os.symlink(real.name, dest) # dest -> dest_X_real (in-tree)
def real_of(dest: Path):
return Path(str(dest) + "_real")
def via_symlink_parent(dest: Path):
# dest = .../dest_X ; make its PARENT route through an in-tree symlink:
# realbase/, link->realbase, and rsync writes to link/<dest_X name>
real = Path(str(dest) + "_rb")
real.mkdir(parents=True, exist_ok=True)
link = Path(str(dest) + "_lnk")
if not link.is_symlink():
os.symlink(real.name, link)
return link
s = [
Scenario("dest:symlinked-dir", build_recvtree, ["-a"], ["src/"], "dest/",
dest_prep=symlinked_dest, snap_dest=real_of),
Scenario("dest:mkpath", build_recvtree, ["-a", "--mkpath"], ["src/"],
"dest/new/deep/", snap_dest=lambda d: d),
]
return s
def name_sweep():
"""Unusual but benign filenames (arg-handling / secluded-args)."""
names = ["a space", "two spaces", "café_ünïcode", "semi;colon",
"dollar$sign", "paren(s)", "quote'name", "amp&and", "back\\slash",
"newline\nname", "tab\tname", "trailing ", "leaddash"]
def setup(src: Path):
for i, n in enumerate(names):
_write(src / n, f"content {i}\n".encode())
# a leading-dash file (separate so it can't be mistaken for an option)
_write(src / "-leadingdash.txt", b"dash\n")
os.symlink("a space", src / "link to spaced")
return [Scenario("name:weird", setup, ["-a"], ["src/"], "dest/")]
def filesfrom_sweep():
"""--files-from with relative & absolute name lists, and --from0.
The list files live in the workdir, which is also rsync's cwd, so the
--files-from arg is a BARE filename (cwd-relative) -- referencing it via
str(wd) double-resolves and silently fails when --workdir is relative, which
makes the whole scenario a vacuous pass. The absolute list uses src.resolve()
so the "/" transfer-root case works regardless of workdir."""
def setup(src: Path):
build_recvtree(src)
wd = src.parent
asrc = src.resolve()
(wd / "list_rel.txt").write_text("file1.txt\ndir/file2.txt\nslink\n")
(wd / "list_abs.txt").write_text(
f"{asrc}/file1.txt\n{asrc}/dir/file2.txt\n")
(wd / "list0.txt").write_bytes(b"file1.txt\0dir/file2.txt\0")
s = [
Scenario("ff:rel", setup, ["-a"],
["--files-from=list_rel.txt", "src/"], "dest/"),
Scenario("ff:rel-R", setup, ["-aR"],
["--files-from=list_rel.txt", "src/"], "dest/"),
Scenario("ff:abs", setup, ["-a"],
["--files-from=list_abs.txt", "/"], "dest/"),
Scenario("ff:from0", setup, ["-a", "--from0"],
["--files-from=list0.txt", "src/"], "dest/"),
]
return lift_transports(s)
def build_privtree(src: Path):
"""Root-only fixture: owned files, special perms, FIFO, devices. Falls back
gracefully to what the euid can create."""
build_recvtree(src)
os.chmod(src / "file1.txt", 0o4755) # setuid
os.chmod(src / "dir", 0o2755) # setgid
try:
os.mkfifo(src / "fifo")
except OSError:
pass
if os.geteuid() == 0:
try:
os.mknod(src / "chardev", stat.S_IFCHR | 0o644, os.makedev(1, 3))
os.mknod(src / "blockdev", stat.S_IFBLK | 0o644, os.makedev(7, 0))
except OSError:
pass
# chown a file to a different uid/gid if any exists
try:
os.chown(src / "dir/file2.txt", 1, 1)
except OSError:
pass
def priv_sweep():
"""Run as root (sudo): owner/group, devices/specials, fake-super."""
return [
Scenario("priv:archive", build_privtree, ["-a"], ["src/"], "dest/"),
Scenario("priv:devices", build_privtree, ["-aD"], ["src/"], "dest/"),
Scenario("priv:HD", build_privtree, ["-aHD"], ["src/"], "dest/"),
Scenario("priv:numeric-ids", build_privtree, ["-a", "--numeric-ids"],
["src/"], "dest/"),
Scenario("priv:fake-super", build_privtree, ["-a", "--fake-super"],
["src/"], "dest/"),
Scenario("priv:specials", build_privtree, ["-a", "--specials"],
["src/"], "dest/"),
Scenario("priv:acls-xattrs", build_privtree, ["-aAX"], ["src/"], "dest/"),
]
def intree_sweep():
"""Traverse an IN-TREE dir-symlink as a path component (source via a
dir-symlink, dest through a dir-symlink, keep-dirlinks, alt-basis via a
dir-symlink) -- a path-handling shape that varies across builds/platforms."""
def src_dirlink(src: Path):
_write(src / "real/f1", b"in real\n")
_write(src / "real/sub/f2", b"deep\n")
os.symlink("real", src / "link")
def dirtree(src: Path):
_write(src / "dir/f1", b"one\n")
_write(src / "dir/f2", b"two\n")
_write(src / "top.txt", b"top\n")
def dest_dirlink_prep(dest: Path):
dest.mkdir(parents=True, exist_ok=True)
(dest / "real").mkdir(exist_ok=True)
if not (dest / "link").is_symlink():
os.symlink("real", dest / "link")
def keepdir_prep(dest: Path):
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
def basis_dirlink(src: Path):
build_recvtree(src)
build_recvtree(src.parent / "realbasis")
os.symlink("realbasis", src.parent / "basislink")
return [
Scenario("intree:src-via-dirlink", src_dirlink, ["-a"], ["src/link/"], "dest/"),
Scenario("intree:src-dirlink-noslash", src_dirlink, ["-a"], ["src/link"], "dest/"),
Scenario("intree:dest-via-dirlink", build_recvtree, ["-a"], ["src/"], "dest/",
dest_prep=dest_dirlink_prep,
dest_arg=lambda d: str(d / "link") + "/",
snap_dest=lambda d: d / "real"),
Scenario("intree:keep-dirlinks", dirtree, ["-aK"], ["src/"], "dest/",
dest_prep=keepdir_prep, snap_dest=lambda d: d / "realdir"),
Scenario("intree:link-dest-dirlink", basis_dirlink, ["-a"],
lambda wd: ["--link-dest=" + str(Path(wd) / "basislink"), "src/"],
"dest/"),
Scenario("intree:compare-dest-dirlink", basis_dirlink, ["-a"],
lambda wd: ["--compare-dest=" + str(Path(wd) / "basislink"), "src/"],
"dest/"),
]
def intree2_sweep():
"""More in-tree dir-symlink traversal: -k/copy-dirlinks on the source,
source files under a symlinked PARENT, and -K update/delete through a
symlinked dest dir."""
def src_with_dirlink(src: Path):
_write(src / "realdir/a", b"aa\n")
_write(src / "realdir/b", b"bb\n")
_write(src / "top.txt", b"top\n")
os.symlink("realdir", src / "dl")
def src_symlink_parent(src: Path):
_write(src / "real/sub/file", b"under symlinked parent\n")
os.symlink("real", src / "link")
def kupd_prep(dest: Path):
# symlinked dest dir whose real target holds an OLDER file + an extra
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
_write(dest / "realdir/f1", b"OLD\n")
_ut(dest / "realdir/f1", T_OLD)
_write(dest / "realdir/extra", b"extra\n")
_ut(dest / "realdir/extra", T_OLD)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
def dirtree2(src: Path):
_write(src / "dir/f1", b"new1\n")
_write(src / "dir/f2", b"new2\n")
_ut(src / "dir/f1"); _ut(src / "dir/f2"); _ut(src / "dir")
return [
Scenario("intree2:copy-dirlinks-k", src_with_dirlink, ["-a", "-k"],
["src/"], "dest/"),
Scenario("intree2:copy-links-L", src_with_dirlink, ["-a", "-L"],
["src/"], "dest/"),
Scenario("intree2:src-symlink-parent", src_symlink_parent, ["-a"],
["src/link/sub/file"], "dest/"),
Scenario("intree2:src-symlink-parent-R", src_symlink_parent, ["-aR"],
["src/link/sub/file"], "dest/"),
Scenario("intree2:src-symlink-parent-dir", src_symlink_parent, ["-a"],
["src/link/sub/"], "dest/"),
Scenario("intree2:K-update", dirtree2, ["-aK"], ["src/"], "dest/",
dest_prep=kupd_prep, snap_dest=lambda d: d / "realdir"),
Scenario("intree2:K-delete", dirtree2, ["-aK", "--delete"], ["src/"],
"dest/", dest_prep=kupd_prep, snap_dest=lambda d: d / "realdir"),
]
def proto_sweep():
"""Older protocol versions (negotiation / wire-format regressions)."""
s = []
for p in (29, 30, 31):
for opt in (["-a"], ["-aH"], ["-aHS"], ["-az"],
["-a", "--no-inc-recursive"]):
tag = "+".join(o.lstrip("-") for o in opt if o != "-a") or "a"
s.append(Scenario(f"proto{p}:{tag}", build_kitchen,
opt + [f"--protocol={p}"], ["src/"], "dest/"))
return s
def combo_sweep():
"""Pairs of options over a stale dest (so update/backup/inplace actually
fire) -- non-symlink option-interaction regressions."""
import itertools
flags = ["-H", "-S", "--inplace", "-z", "-c", "-b", "-O", "-J",
"--numeric-ids", "-A", "-X", "-E", "--no-whole-file", "-I",
"--size-only", "-u"]
s = []
for x, y in itertools.combinations(flags, 2):
s.append(Scenario(f"combo:{x},{y}", build_recvtree, ["-a", x, y],
["src/"], "dest/", pre_dest=stale_dest))
return s
def scale_sweep():
"""Content scale: many small files, deep nesting, a large file."""
def many(src: Path):
for i in range(500):
_write(src / f"d{i % 12}" / f"f{i:04d}", f"file {i}\n".encode())
def deep(src: Path):
p = src
for i in range(40):
p = p / f"d{i}"
_write(p / "leaf", b"deep\n")
_write(src / "shallow", b"s\n")
def big(src: Path):
_write(src / "big.bin", bytes((i * 7) & 0xFF for i in range(3_000_000)))
_write(src / "small", b"x\n")
return [
Scenario("scale:many", many, ["-a"], ["src/"], "dest/"),
Scenario("scale:many-H", many, ["-aH"], ["src/"], "dest/"),
Scenario("scale:deep", deep, ["-a"], ["src/"], "dest/"),
Scenario("scale:big", big, ["-a"], ["src/"], "dest/"),
Scenario("scale:big-inplace", big, ["-a", "--inplace", "--no-whole-file"],
["src/"], "dest/"),
Scenario("scale:big-z", big, ["-az"], ["src/"], "dest/"),
]
def _daemon_scns(chroot):
"""PUSH scenarios to a [m] daemon module (the daemon receiver path)."""
D = {"chroot": chroot}
pfx = "daemonchroot" if chroot == "yes" else "daemon"
def kt(src: Path):
_write(src / "dir/f1", b"n1\n")
_write(src / "dir/f2", b"n2\n")
_write(src / "top", b"t\n")
def kprep(dest: Path):
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
return [
Scenario(f"{pfx}:push", build_recvtree, ["-a"], ["src/"], daemon=D),
Scenario(f"{pfx}:push-H", build_recvtree, ["-aH"], ["src/"], daemon=D),
Scenario(f"{pfx}:push-X", build_recvtree, ["-aX"], ["src/"], daemon=D),
Scenario(f"{pfx}:push-update", build_recvtree, ["-a"], ["src/"],
pre_dest=stale_dest, daemon=D),
Scenario(f"{pfx}:push-delete", build_recvtree, ["-a", "--delete"],
["src/"], pre_dest=stale_dest, daemon=D),
Scenario(f"{pfx}:push-K-symlinkdir", kt, ["-aK"], ["src/"],
dest_prep=kprep, snap_dest=lambda d: d / "realdir", daemon=D),
]
def daemon_sweep():
return _daemon_scns("no")
def daemonchroot_sweep():
"""use chroot = yes (root-only): the daemon chroots into the module path."""
return _daemon_scns("yes")
def gaps_sweep():
"""Scenarios observable mainly via the stdout/stderr/itemize signals rather
than the dest tree: --list-only listings, --dry-run plans (incl.
--mkpath+--dry-run file-to-file), and type-change updates."""
def t_tree(src):
_mk_reg(src / "f", 100)
_mk_reg(src / "dir/g", 50)
os.symlink("f", src / "l")
_ut(src / "dir")
def t_one(src):
_mk_reg(src / "file", 50)
def t_typesrc(src):
_mk_reg(src / "x", 30) # src: x is a FILE
_mk_reg(src / "keep", 10)
def pre_typedir(dest): # dest: x is a DIR (different type)
os.makedirs(dest / "x")
_mk_reg(dest / "x" / "inner", 5)
_ut(dest / "x")
return [
Scenario("gap:list-only", t_tree, ["--list-only", "-a"], ["src/"], "dest/"),
Scenario("gap:dry-run", t_tree, ["-ai", "--dry-run"], ["src/"], "dest/"),
Scenario("gap:mkpath-dryrun-f2f", t_one, ["-ai", "--mkpath", "--dry-run"],
["src/file"], "dest/", dest_arg=lambda d: str(d) + "/newdir/file"),
Scenario("gap:dry-run-delete", build_recvtree, ["-ai", "--dry-run", "--delete"],
["src/"], "dest/", pre_dest=stale_dest),
Scenario("gap:typechange-force", t_typesrc, ["-a", "--force"], ["src/"], "dest/",
pre_dest=pre_typedir),
Scenario("gap:typechange-delete", t_typesrc, ["-a", "--delete"], ["src/"], "dest/",
pre_dest=pre_typedir),
]
def misc_sweep():
"""Genuinely-untested subsystems: filter/include-exclude rules, per-dir merge,
CVS-exclude, fuzzy, write-batch, xattr, -R dot-anchoring -- looking for NEW
root-cause families beyond the daemon-symlink cluster."""
def t_filter(src):
_mk_reg(src / "keep.txt", 10)
_mk_reg(src / "skip.log", 10)
_mk_reg(src / "sub/keep2.txt", 10)
_mk_reg(src / "sub/skip2.log", 10)
_ut(src / "sub")
def t_dirmerge(src):
_mk_reg(src / "a.txt", 10)
_mk_reg(src / "b.log", 10)
_write(src / ".rsync-filter", b"- *.log\n")
_ut(src / ".rsync-filter")
def t_cvs(src):
_mk_reg(src / "keep", 10)
_mk_reg(src / "core", 10)
_mk_reg(src / "obj.o", 10)
def t_fuzzy(src):
_mk_reg(src / "file.txt", 5000, t=T_NEW)
def pre_fuzzy(dest):
_mk_reg(dest / "file.txt.bak", 5000, t=T_OLD, fill=7)
def t_xattr(src):
_mk_reg(src / "f", 10)
try:
os.setxattr(src / "f", "user.test", b"val")
except OSError:
pass
def t_reldot(src):
_mk_reg(src / "sub/deep/f", 10)
_ut(src / "sub/deep")
_ut(src / "sub")
return [
Scenario("misc:exclude", t_filter, ["-a", "--exclude=*.log"], ["src/"], "dest/"),
Scenario("misc:filter-rule", t_filter, ["-a", "-f", "- *.log"], ["src/"], "dest/"),
Scenario("misc:filter-incl", t_filter, ["-a", "-f", "+ */", "-f", "+ *.txt", "-f", "- *"], ["src/"], "dest/"),
Scenario("misc:dirmerge-F", t_dirmerge, ["-a", "-F"], ["src/"], "dest/"),
Scenario("misc:cvs-C", t_cvs, ["-a", "-C"], ["src/"], "dest/"),
Scenario("misc:fuzzy", t_fuzzy, ["-a", "--fuzzy"], ["src/"], "dest/", pre_dest=pre_fuzzy),
Scenario("misc:write-batch", build_recvtree,
(lambda wd, dest: ["-a", f"--write-batch={wd}/batch"]), ["src/"], "dest/"),
Scenario("misc:xattr", t_xattr, ["-aX"], ["src/"], "dest/"),
Scenario("misc:relative-dot", t_reldot, ["-aR"], ["src/./sub/deep/f"], "dest/"),
]
def daemon_sym_sweep():
"""PUSH symlink/dirlink/keep-dirlinks scenarios to a daemon module -- the
daemon receiver/keep-dirlinks path. Enumerates the family: -K through a
symlinked dest dir (plain/nested/update/delete), and -L/-k/-l/--safe-links/
--munge-links/--copy-unsafe-links of in-tree and out-of-tree symlinks pushed
into a module."""
D = {"chroot": "no"}
def kt(src):
_mk_reg(src / "dir/f1", 10)
_mk_reg(src / "dir/f2", 10)
_mk_reg(src / "top", 5)
def kt_nested(src):
_mk_reg(src / "dir/sub/f", 10)
_mk_reg(src / "top", 5)
def kprep(dest):
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
def kprep_nested(dest):
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir" / "sub").mkdir(parents=True, exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
def kprep_update(dest):
kprep(dest)
_mk_reg(dest / "realdir" / "f1", 3, t=T_OLD)
def kprep_delete(dest):
kprep(dest)
_mk_reg(dest / "realdir" / "extra", 3)
def src_dirlink(src):
_mk_reg(src / "rd/inner", 10)
os.symlink("rd", src / "dl")
_mk_reg(src / "anchor", 5)
_ut(src / "rd")
def src_abslink(src):
os.symlink("/etc/hostname", src / "abs")
_mk_reg(src / "anchor", 5)
rd = lambda d: d / "realdir"
return [
Scenario("dsym:K-dirlink", kt, ["-aK"], ["src/"], "dest/", dest_prep=kprep, snap_dest=rd, daemon=D),
Scenario("dsym:K-nested", kt_nested, ["-aK"], ["src/"], "dest/", dest_prep=kprep_nested, snap_dest=rd, daemon=D),
Scenario("dsym:K-update", kt, ["-aK"], ["src/"], "dest/", dest_prep=kprep_update, snap_dest=rd, daemon=D),
Scenario("dsym:K-delete", kt, ["-aK", "--delete"], ["src/"], "dest/", dest_prep=kprep_delete, snap_dest=rd, daemon=D),
Scenario("dsym:L-src-dirlink", src_dirlink, ["-aL"], ["src/"], "dest/", daemon=D),
Scenario("dsym:k-src-dirlink", src_dirlink, ["-ak"], ["src/"], "dest/", daemon=D),
Scenario("dsym:l-src", src_dirlink, ["-al"], ["src/"], "dest/", daemon=D),
Scenario("dsym:safe-links", src_abslink, ["-a", "--safe-links"], ["src/"], "dest/", daemon=D),
Scenario("dsym:munge", src_abslink, ["-a", "--munge-links"], ["src/"], "dest/", daemon=D),
Scenario("dsym:copy-unsafe", src_abslink, ["-a", "--copy-unsafe-links"], ["src/"], "dest/", daemon=D),
]
def daemon_escape_sweep():
"""Daemon following symlinks that point OUTSIDE the module (absolute, or ../
escape), via -L / --copy-links / --copy-unsafe-links / --safe-links, on both
the sender (pull) and receiver (push) side -- the daemon symlink-safety
behaviour."""
Dpull = {"chroot": "no", "pull": True}
Dpush = {"chroot": "no"}
def s_abs(src):
os.symlink("/etc/hostname", src / "abslnk")
_mk_reg(src / "anchor", 5)
def s_escape(src):
_mk_reg(src.parent / "secret", 7) # outside the module (wd/secret)
os.symlink("../secret", src / "esc")
_mk_reg(src / "anchor", 5)
def s_filelink(src):
_mk_reg(src / "real", 10)
os.symlink("real", src / "fl") # in-tree symlink to a FILE
_mk_reg(src / "anchor", 5)
def s_absdir(src):
out = src.parent / "outdir" # small out-of-module dir (wd/outdir)
_mk_reg(out / "x", 8)
_mk_reg(out / "y", 8)
os.symlink(str(out), src / "extdir") # absolute symlink to out-of-module DIR
_mk_reg(src / "anchor", 5)
pull = [
("dpull:L-abs", s_abs, ["-aL"]),
("dpull:L-escape", s_escape, ["-aL"]),
("dpull:copyunsafe-escape", s_escape, ["-a", "--copy-unsafe-links"]),
("dpull:L-filelink", s_filelink, ["-aL"]),
("dpull:safe-escape", s_escape, ["-a", "--safe-links"]),
("dpull:copylinks-abs", s_abs, ["-a", "--copy-links"]),
("dpull:L-absdir", s_absdir, ["-aL"]),
("dpull:k-absdir", s_absdir, ["-ak"]),
("dpull:copydirlinks-absdir", s_absdir, ["-a", "--copy-dirlinks"]),
("dpull:copyunsafe-absdir", s_absdir, ["-a", "--copy-unsafe-links"]),
]
push = [
("dpush:L-abs", s_abs, ["-aL"]),
("dpush:L-escape", s_escape, ["-aL"]),
("dpush:copyunsafe-abs", s_abs, ["-a", "--copy-unsafe-links"]),
("dpush:copyunsafe-escape", s_escape, ["-a", "--copy-unsafe-links"]),
("dpush:L-filelink", s_filelink, ["-aL"]),
]
s = [Scenario(n, fn, o, ["src/"], "dest/", daemon=Dpull) for n, fn, o in pull]
s += [Scenario(n, fn, o, ["src/"], "dest/", daemon=Dpush) for n, fn, o in push]
return s
def daemon_pull_sym_sweep():
"""PULL symlink/dirlink scenarios FROM a daemon module (the daemon SENDER
side -- untested until now). Served source contains the symlinks; the client
pulls with -L/-k/-l/-K/--safe-links/--munge-links/--copy-unsafe-links."""
D = {"chroot": "no", "pull": True}
def served_dirlink(src):
_mk_reg(src / "rd/inner", 10)
os.symlink("rd", src / "dl")
_mk_reg(src / "anchor", 5)
_ut(src / "rd")
def served_intree(src):
_mk_reg(src / "real", 10)
os.symlink("real", src / "lnk")
_mk_reg(src / "anchor", 5)
def served_abslink(src):
os.symlink("/etc/hostname", src / "abs")
_mk_reg(src / "anchor", 5)
def served_dir(src):
_mk_reg(src / "dir/f1", 10)
_mk_reg(src / "dir/f2", 10)
_mk_reg(src / "top", 5)
def kprep(dest):
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
return [
Scenario("dpull:plain", build_recvtree, ["-a"], ["src/"], "dest/", daemon=D),
Scenario("dpull:L-dirlink", served_dirlink, ["-aL"], ["src/"], "dest/", daemon=D),
Scenario("dpull:k-dirlink", served_dirlink, ["-ak"], ["src/"], "dest/", daemon=D),
Scenario("dpull:l", served_dirlink, ["-al"], ["src/"], "dest/", daemon=D),
Scenario("dpull:L-intree", served_intree, ["-aL"], ["src/"], "dest/", daemon=D),
Scenario("dpull:safe-links", served_abslink, ["-a", "--safe-links"], ["src/"], "dest/", daemon=D),
Scenario("dpull:munge", served_abslink, ["-a", "--munge-links"], ["src/"], "dest/", daemon=D),
Scenario("dpull:copy-unsafe", served_abslink, ["-a", "--copy-unsafe-links"], ["src/"], "dest/", daemon=D),
Scenario("dpull:K-dest-dirlink", served_dir, ["-aK"], ["src/"], "dest/", dest_prep=kprep, snap_dest=lambda d: d / "realdir", daemon=D),
Scenario("dpull:hardlinks", build_recvtree, ["-aH"], ["src/"], "dest/", daemon=D),
]
_COMBO_FLAGS = ["-H", "-S", "--inplace", "-z", "-c", "-b", "-O", "-J",
"--numeric-ids", "-A", "-X", "-E", "--no-whole-file", "-I",
"--size-only", "-u"]
def combo3_sweep():
"""Option TRIPLES over a stale dest -- deeper interaction coverage."""
import itertools
return [Scenario(f"combo3:{x},{y},{z}", build_recvtree, ["-a", x, y, z],
["src/"], "dest/", pre_dest=stale_dest)
for x, y, z in itertools.combinations(_COMBO_FLAGS, 3)]
def combo4_sweep():
"""Option QUADRUPLES over a stale dest (C(16,4)=1820)."""
import itertools
return [Scenario(f"combo4:{w},{x},{y},{z}", build_recvtree,
["-a", w, x, y, z], ["src/"], "dest/", pre_dest=stale_dest)
for w, x, y, z in itertools.combinations(_COMBO_FLAGS, 4)]
def ssh_sweep():
"""PUSH over a remote-shell split (support/lsh.sh) -- exercises the real
client+server processes / protocol / arg passing, and confirms whether the
in-tree-symlink regressions also manifest over the wire."""
def kt(src: Path):
_write(src / "dir/f1", b"n1\n")
_write(src / "dir/f2", b"n2\n")
_write(src / "top", b"t\n")
def kprep(dest: Path):
dest.mkdir(parents=True, exist_ok=True)
(dest / "realdir").mkdir(exist_ok=True)
if not (dest / "dir").is_symlink():
os.symlink("realdir", dest / "dir")
def sym_parent(src: Path):
_write(src / "real/sub/file", b"under symlinked parent\n")
os.symlink("real", src / "link")
return [
Scenario("ssh:push", build_recvtree, ["-a"], ["src/"], ssh=True),
Scenario("ssh:push-H", build_recvtree, ["-aH"], ["src/"], ssh=True),
Scenario("ssh:push-X", build_recvtree, ["-aX"], ["src/"], ssh=True),
Scenario("ssh:push-z", build_recvtree, ["-az"], ["src/"], ssh=True),
Scenario("ssh:push-update", build_recvtree, ["-a"], ["src/"],
pre_dest=stale_dest, ssh=True),
Scenario("ssh:push-delete", build_recvtree, ["-a", "--delete"], ["src/"],
pre_dest=stale_dest, ssh=True),
Scenario("ssh:push-protect-args", build_recvtree, ["-a", "-s"], ["src/"],
ssh=True),
Scenario("ssh:K-symlinkdir", kt, ["-aK"], ["src/"], dest_prep=kprep,
snap_dest=lambda d: d / "realdir", ssh=True),
Scenario("ssh:R-symlink-parent", sym_parent, ["-aR"],
["src/link/sub/file"], ssh=True),
]
def redo_sweep():
"""Resume / redo state machine: a partial or corrupted prior dest forces the
delta + verify + resume path (inplace / append-verify / partial-dir, the
latter both relative AND absolute). Generalises the single-pass model -- the
'failed verification, update discarded' loop and the discard-path NULL-deref
both live here. Lifted across transports too."""
def big(src: Path):
_mk_reg(src / "f", 120000, t=T_NEW, fill=7)
_mk_reg(src / "keep", 200, t=T_NEW, fill=3)
def pre_truncated(dest: Path): # a shorter prefix of f (older) -> extend
_mk_reg(dest / "f", 40000, t=T_OLD, fill=7)
def pre_corrupt(dest: Path): # same size, WRONG content -> delta+verify
_mk_reg(dest / "f", 120000, t=T_OLD, fill=200)
def pre_abs_partial(dest: Path): # corrupt dest + a stale leftover in an
pre_corrupt(dest) # ABSOLUTE partial-dir (delta-resume shape)
pdir = dest.parent / (dest.name + "_part")
pdir.mkdir(parents=True, exist_ok=True)
_mk_reg(pdir / "f", 60000, t=T_OLD, fill=7)
abs_part = lambda wd, dest: ["-a", "--no-whole-file", "--partial",
f"--partial-dir={Path(dest).parent}/{Path(dest).name}_part"]
s = [
Scenario("redo:inplace-corrupt", big, ["-a", "--inplace", "--no-whole-file"],
["src/"], "dest/", pre_dest=pre_corrupt),
Scenario("redo:append-verify", big, ["-a", "--append-verify"],
["src/"], "dest/", pre_dest=pre_truncated),
Scenario("redo:append", big, ["-a", "--append"],
["src/"], "dest/", pre_dest=pre_truncated),
Scenario("redo:partialdir-rel", big,
["-a", "--no-whole-file", "--partial", "--partial-dir=.part"],
["src/"], "dest/", pre_dest=pre_corrupt),
Scenario("redo:checksum-corrupt", big, ["-a", "-c", "--no-whole-file"],
["src/"], "dest/", pre_dest=pre_corrupt),
]
s = lift_transports(s) # resume path over the wire too
# absolute partial-dir uses callable opts (not liftable) -> keep local
s.append(Scenario("redo:partialdir-abs", big, abs_part, ["src/"], "dest/",
pre_dest=pre_abs_partial))
return s
def typetrans_sweep():
"""Type-transition: the existing dest entry has a DIFFERENT type than the
source (file/dir/symlink/fifo), crossed with the selection options whose job
is to decide whether to replace it. Generalises 'update skips a file of a
different type'. The stale-dest sweeps only ever vary content, never type."""
def src_file(src: Path):
_mk_reg(src / "x", 60, t=T_NEW)
_mk_reg(src / "keep", 10, t=T_NEW)
def src_dir(src: Path):
_mk_reg(src / "x/inner", 30, t=T_NEW)
_mk_reg(src / "keep", 10, t=T_NEW)
_ut(src / "x")
def src_link(src: Path):
_mk_reg(src / "real", 30, t=T_NEW)
os.symlink("real", src / "x")
_mk_reg(src / "keep", 10, t=T_NEW)
# dest entries are pinned NEWER than the source (T_NEWER) so the -u
# update-skip path is actually exercised across the type change, and so the
# special-file mtimes are deterministic (unpinned fifos/symlinks otherwise
# carry wall-clock creation time -> spurious A/B mtime diffs).
T_NEWER = T_NEW + 10**7
def d_dir(dest: Path):
os.makedirs(dest / "x")
_mk_reg(dest / "x/old", 5, t=T_NEWER)
_ut(dest / "x", T_NEWER)
def d_file(dest: Path):
_mk_reg(dest / "x", 99, t=T_NEWER, fill=200)
def d_link(dest: Path):
os.symlink("keep", dest / "x")
os.utime(dest / "x", (T_NEWER, T_NEWER), follow_symlinks=False)
def d_fifo(dest: Path):
_mk_fifo(dest / "x")
os.utime(dest / "x", (T_NEWER, T_NEWER))
srcs = [("file", src_file), ("dir", src_dir), ("link", src_link)]
dests = [("Ddir", d_dir), ("Dfile", d_file), ("Dlink", d_link), ("Dfifo", d_fifo)]
opts = [("plain", ["-a"]), ("update", ["-a", "-u"]),
("existing", ["-a", "--existing"]),
("ignore-existing", ["-a", "--ignore-existing"]),
("force", ["-a", "--force"]), ("delete", ["-a", "--delete"])]
s = []
for sn, sf in srcs:
for dn, df in dests:
for on, ov in opts:
# skip the same-type combo (file src vs file dest etc.)
if (sn, dn) in (("file", "Dfile"), ("dir", "Ddir"), ("link", "Dlink")):
continue
s.append(Scenario(f"type:{sn}-vs-{dn}-{on}", sf, ov, ["src/"],
"dest/", pre_dest=df))
return lift_transports(s)
def tsprec_sweep():
"""Timestamp precision: sub-second mtimes at nanosecond boundaries, crossed
with the time-handling options. Exercises the nsec validate/convert path that
integer-second fixtures never reach (caught via rc/error/content/itemize --
snapshot records whole-second mtimes, so this is a code-path exerciser)."""
NS = [0, 1, 4999, 500000000, 999999999, 999999001]
def mk(src: Path, nsec):
_mk_reg(src / "f", 100)
os.utime(src / "f", ns=(T_NEW * 10**9 + nsec, T_NEW * 10**9 + nsec))
os.makedirs(src / "d", exist_ok=True)
_mk_reg(src / "d/g", 50)
os.utime(src / "d/g", ns=(T_NEW * 10**9 + nsec, T_NEW * 10**9 + nsec))
s = []
for nsec in NS:
s.append(Scenario(f"ts:nsec{nsec}", lambda src, n=nsec: mk(src, n),
["-a"], ["src/"], "dest/"))
s += [
Scenario("ts:modwindow", lambda src: mk(src, 999999999),
["-a", "--modify-window=1"], ["src/"], "dest/"),
Scenario("ts:atimes", lambda src: mk(src, 123456789),
["-a", "--atimes"], ["src/"], "dest/"),
Scenario("ts:crtimes", lambda src: mk(src, 123456789),
["-a", "--crtimes"], ["src/"], "dest/"),
]
return s
def bigscale_sweep():
"""Scale escalation: many empty dirs / many files at SCALE_N (--scale). Pairs
with the --cost peak-RSS oracle -- a per-entry allocation/footprint regression
only shows up at scale and is invisible to functional outcome alone."""
n = max(1, SCALE_N)
def emptydirs(src: Path):
for i in range(n):
(src / f"d{i:07d}").mkdir()
def manyfiles(src: Path):
for i in range(n):
_write(src / f"b{i % 64:02d}" / f"f{i:07d}", b"x\n")
def deepdirs(src: Path):
for i in range(n):
(src / f"a{i % 50:02d}" / f"b{(i // 50) % 50:02d}" / f"c{i:07d}").mkdir(parents=True)
return [
Scenario("scale:emptydirs", emptydirs, ["-a"], ["src/"], "dest/"),
Scenario("scale:emptydirs-nir", emptydirs, ["-a", "--no-inc-recursive"],
["src/"], "dest/"),
Scenario("scale:manyfiles", manyfiles, ["-a"], ["src/"], "dest/"),
Scenario("scale:deepdirs", deepdirs, ["-a"], ["src/"], "dest/"),
]
def rrsync_sweep():
"""rrsync lane: route the remote side through the restricted rrsync wrapper
(subdir-restricted, so its option/path validation is exercised) -- a whole
subsystem the other lanes never drive. Covers `-a` and an `-rlpt` control,
push and pull. rrsync ships per version, so each build is paired with its own
rrsync via --rrsync-a/--rrsync-b."""
bt = build_recvtree
def tree(src: Path):
_mk_reg(src / "dir/f1", 50)
_mk_reg(src / "dir/f2", 50)
_mk_reg(src / "top", 20)
os.symlink("top", src / "lnk")
_ut(src / "dir")
def abslink(src: Path):
_mk_reg(src / "anchor", 10)
os.symlink("/etc/hostname", src / "abs")
def mk(name, setup, opts, pull=False, pre_dest=None):
sc = Scenario(name, setup, opts, ["src/"], "dest/", pre_dest=pre_dest)
sc.rrsync = {"pull": pull}
return sc
return [
mk("rr:push-a", bt, ["-a"]), # D-bundled -> subdir deny decides
mk("rr:push-rlpt", tree, ["-rlpt"]), # no-D control: should transfer
mk("rr:push-rlptD", tree, ["-rlptD"]), # explicit D into restricted subdir
mk("rr:push-update", bt, ["-a"], pre_dest=stale_dest),
mk("rr:push-copy-unsafe", abslink, ["-rlpt", "--copy-unsafe-links"]),
mk("rr:pull-a", bt, ["-a"], pull=True),
mk("rr:pull-rlpt", tree, ["-rlpt"], pull=True),
mk("rr:pull-symlink", tree, ["-rlpt"], pull=True),
]
def tcpdaemon_sweep():
"""Real-TCP-daemon lane: a genuine `rsync --daemon` on a bound port (greeting/
handshake/socket path), and an auth variant (challenge-response) -- the daemon
code the stdio-pipe lane bypasses. Push + pull, with/without auth."""
bt = build_recvtree
def mk(name, opts, pull=False, auth=False, pre_dest=None):
sc = Scenario(name, bt, opts, ["src/"], "dest/", pre_dest=pre_dest)
sc.daemon = {"tcp": True, "pull": pull, "auth": auth}
return sc
return [
mk("tcp:push", ["-a"]),
mk("tcp:push-H", ["-aH"]),
mk("tcp:push-X", ["-aX"]),
mk("tcp:push-update", ["-a"], pre_dest=stale_dest),
mk("tcp:push-delete", ["-a", "--delete"], pre_dest=stale_dest),
mk("tcp:pull", ["-a"], pull=True),
mk("tcp:pull-H", ["-aH"], pull=True),
mk("tcp:push-auth", ["-a"], auth=True),
mk("tcp:pull-auth", ["-a"], pull=True, auth=True),
]
SWEEPS = {"options": options_sweep, "pathshape": pathshape_sweep,
"recv": recv_sweep, "destshape": destshape_sweep,
"name": name_sweep, "filesfrom": filesfrom_sweep,
"intree": intree_sweep, "intree2": intree2_sweep,
"proto": proto_sweep, "combo": combo_sweep, "combo3": combo3_sweep,
"combo4": combo4_sweep, "scale": scale_sweep, "ssh": ssh_sweep,
"daemon": daemon_sweep, "daemonchroot": daemonchroot_sweep,
"mode": mode_sweep, "size": size_sweep, "filetype": filetype_sweep,
"selection": selection_sweep, "behavior": behavior_sweep,
"placement": placement_sweep, "wire": wire_sweep,
"pairwise": pairwise_sweep, "daemonsym": daemon_sym_sweep, "daemonpull": daemon_pull_sym_sweep, "daemonesc": daemon_escape_sweep, "misc": misc_sweep, "gaps": gaps_sweep,
"redo": redo_sweep, "typetrans": typetrans_sweep, "tsprec": tsprec_sweep,
"bigscale": bigscale_sweep, "rrsync": rrsync_sweep,
"tcpdaemon": tcpdaemon_sweep,
"priv": priv_sweep}
# "all" excludes the root-only sweeps (priv, daemonchroot) and the very large
# combo4; run those explicitly. Parallelism (-j) makes the broad benign set
# (incl. the daemon symlink/escape + misc/gaps sweeps) affordable by default;
# combo4 stays out of a single pass (the --loop ladder reaches order 4 anyway).
# bigscale stays out of the default pass (heavy + meant for --cost runs); reach
# it via --sweep bigscale --cost --scale N.
ALL_SWEEPS = ["options", "pathshape", "recv", "destshape", "name", "filesfrom",
"intree", "intree2", "proto", "combo", "combo3", "scale", "ssh",
"daemon", "mode", "size", "filetype", "selection", "behavior",
"placement", "wire", "pairwise", "daemonsym", "daemonpull",
"daemonesc", "misc", "gaps", "redo", "typetrans", "tsprec",
"rrsync", "tcpdaemon"]
def _compare(a, b, has_times=True, ign_types=(), incl_item=True, incl_lit=True,
incl_out=False, incl_err=False):
"""Issue list for one result vs another (A-vs-B, and the per-binary stability
check). incl_item/incl_lit/incl_out/incl_err drop the itemize / Literal-data /
normalised-stdout / normalised-stderr-text signals (used when those are
themselves nondeterministic, or for content-only checks). Empty list ==
indistinguishable on the requested signals."""
issues = []
if a["rc"] != b["rc"]:
issues.append(f" exit: A={a['rc']} B={b['rc']}")
a_err = any(m in a["err"] for m in ERR_MARKERS)
b_err = any(m in b["err"] for m in ERR_MARKERS)
if a_err != b_err:
issues.append(f" stderr-error: A={a_err!r} B={b_err!r}\n A:{a['err'][:300]}")
if incl_lit and a["lit"] != b["lit"]:
issues.append(f" Literal-data: A={a['lit']} B={b['lit']}")
issues += diff_snapshots(a["snap"], b["snap"], ignore_mtime=not has_times,
ignore_mtime_types=ign_types)
if incl_item and a["item"] != b["item"]:
issues.append(" itemize differs:\n A=" + repr(a["item"])
+ "\n B=" + repr(b["item"]))
if incl_err and a.get("errn", "") != b.get("errn", ""):
issues.append(" stderr-text differs:\n A:" + a.get("errn", "")[:300]
+ "\n B:" + b.get("errn", "")[:300])
if incl_out and a.get("out", "") != b.get("out", ""):
issues.append(" stdout differs:\n A:" + a.get("out", "")[:300]
+ "\n B:" + b.get("out", "")[:300])
return issues
def run_scenario(scn: Scenario, workroot: Path):
wd = workroot / scn.name.replace("/", "_").replace(":", "_").replace("+", "_")
if wd.exists():
shutil.rmtree(wd, ignore_errors=True)
# opts may be a list, or a callable(wd, dest)->list for placement options
# whose aux dir is workdir/dest-relative (dest differs per A/B tag). Resolve
# a representative copy (dest_A) for flag inspection; resolve per-tag below.
def resolve_opts(dest):
return scn.opts(str(wd), str(dest)) if callable(scn.opts) else scn.opts
opts0 = resolve_opts(wd / "dest_A")
# skip if either binary lacks an option (can't A/B compare)
for o in opts0:
flag = o.split("=", 1)[0]
if flag.startswith("--"):
if not (supports(RSYNC_A, flag) and supports(RSYNC_B, flag)):
return ("SKIP", f"a binary lacks {flag}")
# build the source ONCE so both binaries see identical input (incl. mtimes)
src = wd / "src"
src.mkdir(parents=True, exist_ok=True)
scn.setup(src)
src_args = scn.src_args(str(wd)) if callable(scn.src_args) else scn.src_args
# ignore mtime only when neither -t nor -a is in effect; some dest times are
# left unmanaged by rsync (-O dirs, -J links, -b backup) -> ignore those.
has_times = any(o in ("-a", "-t", "-rlptD", "--times") or
(o.startswith("-") and not o.startswith("--") and "t" in o)
for o in opts0)
ign_types = _ign(opts0)
# a "/"-rooted or absolute source (e.g. --files-from with absolute names)
# makes rsync create IMPLIED parent dirs it has no source time for -> their
# mtime is wall-clock and differs between the A and B runs. Ignore dir mtimes
# for those, like -O/backup dirs.
if isinstance(src_args, list) and any(a == "/" or a.startswith("/")
for a in src_args):
ign_types = set(ign_types) | {"d"}
def one_run(tag, binary, rep):
dest = wd / f"dest_{tag}{rep}"
optr = resolve_opts(dest) # per-tag (aux dir is under this tag's dest)
if scn.dest_prep:
scn.dest_prep(dest) # e.g. create dest as a symlink to a real dir
elif scn.pre_dest:
dest.mkdir(parents=True, exist_ok=True)
scn.pre_dest(dest)
_tls.measure = COST # have sh() sample peak process-group RSS
_tls.rss = None
if scn.daemon is not None:
port = (20000 + (abs(hash(scn.name)) % 2000) * 16
+ (0 if tag == "A" else 8) + rep)
D = scn.daemon
if D.get("tcp"): # real bound TCP port (not the stdio pipe)
rc, err, lit, item, out = _tcp_daemon(
binary, str(wd), str(wd / "src") if D.get("pull") else str(dest),
optr, src_args, localdest=str(dest), pull=D.get("pull", False),
chroot=D.get("chroot", "no"), auth=D.get("auth", False))
elif D.get("pull"):
rc, err, lit, item, out = run_daemon_pull(
binary, str(wd), str(wd / "src"), optr, str(dest), port,
chroot=D.get("chroot", "no"))
else:
rc, err, lit, item, out = run_daemon_xfer(
binary, str(wd), str(dest), optr, src_args, port,
chroot=D.get("chroot", "no"))
elif scn.rrsync is not None:
rrs = (RRSYNC_A if tag == "A" else RRSYNC_B) or str(_RRSYNC_SRC)
if scn.rrsync.get("pull"):
rc, err, lit, item, out = run_rrsync_pull(
binary, rrs, str(wd), str(wd / "src"), optr, str(dest))
else:
rc, err, lit, item, out = run_rrsync_push(
binary, rrs, str(wd), optr, src_args, str(dest))
elif scn.ssh:
rc, err, lit, item, out = run_ssh_xfer(binary, str(wd), optr,
src_args, str(dest))
else:
dest_arg = scn.dest_arg(dest) if scn.dest_arg else str(dest) + "/"
rc, err, lit, item, out = run_xfer(binary, str(wd), optr, src_args,
dest_arg)
_tls.measure = False
snap_target = scn.snap_dest(dest) if scn.snap_dest else dest
return dict(rc=rc, err=err, lit=lit, item=item, snap=snapshot(snap_target),
out=_norm_out(out, wd, dest), errn=_norm_err(err, wd, dest),
rss=getattr(_tls, "rss", None))
# STABILITY GATE: run each binary REPEAT times (cheap), and if a candidate
# A/B diff appears, ESCALATE to more samples and require the diff to be stable
# across ALL of them. A binary whose own runs disagree (or a diff that doesn't
# reproduce) is nondeterministic -> quarantine FLAKY, never a false regression.
# (A ~50% flake fools 2 repeats too often; escalation makes false DIFFs rare.)
# CONTENT-level instability within one binary's repeats = real nondeterminism
# (rc / error / dest content+existence; mtime, itemize, Literal-data excluded
# -- those carry incidental wall-clock/dir-time variance, not a correctness
# flake). This is what makes an A/B diff untrustworthy -> quarantine FLAKY.
def content_unstable(rs):
for other in rs[1:]:
d = _compare(rs[0], other, has_times=False, ign_types=set(),
incl_item=False, incl_lit=False)
if d:
return d
return None
def attr_stable(rs, k):
return all(rs[0][k] == r[k] for r in rs[1:])
base = max(1, REPEAT)
ra = [one_run("A", RSYNC_A, r) for r in range(base)]
rb = [one_run("B", RSYNC_B, r) for r in range(base)]
item_ok = lit_ok = out_ok = err_ok = True
if base >= 2:
# escalate sampling when a candidate A/B diff appears (on ANY signal),
# to confirm stability before trusting it
if _compare(ra[0], rb[0], has_times, ign_types, incl_out=True, incl_err=True):
confirm = max(base, 5)
ra += [one_run("A", RSYNC_A, r) for r in range(base, confirm)]
rb += [one_run("B", RSYNC_B, r) for r in range(base, confirm)]
cu = content_unstable(ra) or content_unstable(rb)
if cu:
if not KEEP:
shutil.rmtree(wd, ignore_errors=True)
return ("FLAKY", [" scenario content is nondeterministic across "
"repeats (quarantined, not a regression):"] + cu[:6])
# itemize / Literal-data / stdout / stderr-text are kept as A/B signals
# only if each is stable per binary (else incidental run-variance noise)
item_ok = attr_stable(ra, "item") and attr_stable(rb, "item")
lit_ok = attr_stable(ra, "lit") and attr_stable(rb, "lit")
out_ok = attr_stable(ra, "out") and attr_stable(rb, "out")
err_ok = attr_stable(ra, "errn") and attr_stable(rb, "errn")
a, b = ra[0], rb[0]
# When BOTH builds error on this (often edge) input, neither produced a clean
# benign transfer; the exact stderr/stdout/itemize/literal wording is low
# signal (different errno/message for the same failure). A real regression --
# A worse than B -- still shows in the exit code and the dest tree, which stay
# compared. Drop the free-text/itemize signals in that case.
both_failed = (any(m in a["err"] for m in ERR_MARKERS)
and any(m in b["err"] for m in ERR_MARKERS))
issues = _compare(a, b, has_times, ign_types,
incl_item=item_ok and not both_failed,
incl_lit=lit_ok and not both_failed,
incl_out=out_ok and not both_failed,
incl_err=err_ok and not both_failed)
# COST oracle: directional peak-RSS blow-up. Require the gap to hold across
# ALL samples (min A vs max B) so run-to-run RSS noise can't trip it; only a
# gross ratio + absolute floor counts -- a real resource regression, not jitter.
if COST:
arss = [r["rss"] for r in ra if r.get("rss")]
brss = [r["rss"] for r in rb if r.get("rss")]
if arss and brss:
amin, bmax = min(arss), max(brss)
if amin > 3 * bmax and (amin - bmax) > 64 * 1024 * 1024:
issues.append(f" peak-RSS blow-up: A>={amin // (1<<20)}MB "
f"B<={bmax // (1<<20)}MB ({amin / bmax:.1f}x)")
if not (KEEP or issues):
shutil.rmtree(wd, ignore_errors=True)
if not issues:
return ("OK", issues)
# Classify: if A's only divergence is an INTENTIONAL refusal that B did not
# do (an entry in ALLOWLIST), mark ALLOW -- a documented behaviour change,
# recorded separately, not a silent regression.
for sub, note in ALLOWLIST:
if sub in a["err"] and sub not in b["err"]:
return ("ALLOW", [f" intentional behaviour change: {note}",
f" A:{a['err'][:200]}"] )
# Direction matters for "regression": only A being WORSE than B counts.
a_ok = a["rc"] == 0 and not any(m in a["err"] for m in ERR_MARKERS)
b_ok = b["rc"] == 0 and not any(m in b["err"] for m in ERR_MARKERS)
if a_ok and not b_ok:
return ("ABETTER", [" A succeeds where B FAILS -- an "
"improvement/behaviour change, not a regression:",
f" B:{b['err'][:200]}"])
return ("DIFF", issues)
_mport = [25500]
def _ssh1(cbin, sbin, wd, opts, src, dest, pull):
e = ["-e", f"sh {_LSH}", f"--rsync-path={sbin}"]
if pull: # remote side (sbin) is the SENDER
argv = [cbin, "--stats", "-i", *e, *opts, f"lh:{src}/", f"{dest}/"]
else: # client (cbin) is the SENDER
argv = [cbin, "--stats", "-i", *e, *opts, f"{src}/", f"lh:{dest}/"]
return _parse_out(sh(argv, cwd=str(wd)))
def _daemon1(cbin, sbin, wd, module, opts, localdir, pull):
_mport[0] += 1
port = _mport[0]
conf = Path(wd) / f"d{port}.conf"
Path(module).mkdir(parents=True, exist_ok=True)
conf.write_text(f"use chroot = no\nport = {port}\nlog file = {wd}/d{port}.log\n"
f"pid file = {wd}/d{port}.pid\n[m]\n path = {module}\n"
f" read only = no\n hosts allow = 127.0.0.1\n")
proc = subprocess.Popen([sbin, "--daemon", "--no-detach", f"--config={conf}",
f"--port={port}", "--address=127.0.0.1"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
try:
if not _wait_port(port):
return (99, "daemon failed to start", None, "")
url = f"rsync://127.0.0.1:{port}/m/"
if pull: # daemon (sbin) is the SENDER
argv = [cbin, "--stats", "-i", *opts, url, f"{localdir}/"]
else: # client (cbin) is the SENDER
argv = [cbin, "--stats", "-i", *opts, f"{localdir}/", url]
return _parse_out(sh(argv, cwd=str(wd)))
finally:
proc.terminate()
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
def run_matrix(workroot, logf):
"""Cross-build role matrix: mixed A<->B over the wire, both directions
(push/pull => which build is sender vs receiver), both transports
(ssh/daemon). Each config's dest is compared to the PURE-B baseline; an
A-involved config that is WORSE than pure-B is a regression (protocol/interop
or role-specific)."""
A, B = RSYNC_A, RSYNC_B
scns = [("basic", build_recvtree, ["-a"]),
("H", build_recvtree, ["-aH"]),
("X", build_recvtree, ["-aX"]),
("z", build_recvtree, ["-az"]),
("c", build_recvtree, ["-ac"])]
combos = [("Ac_As", A, A), ("Ac_Bs", A, B), ("Bc_As", B, A)]
nreg = 0
def one(cbin, sbin, wd, tag, transport, direction, opts):
dest = Path(wd) / f"d_{tag}"
if transport == "ssh":
rc, err, lit, item, _out = _ssh1(cbin, sbin, wd, opts, Path(wd) / "src",
dest, pull=(direction == "pull"))
else:
if direction == "pull":
rc, err, lit, item, _out = _daemon1(cbin, sbin, wd, Path(wd) / "src",
opts, dest, pull=True)
else:
rc, err, lit, item, _out = _daemon1(cbin, sbin, wd, dest, opts,
Path(wd) / "src", pull=False)
return rc, err, snapshot(dest)
for name, setup, opts in scns:
for transport in ("ssh", "daemon"):
for direction in ("push", "pull"):
wd = workroot / f"mx_{name}_{transport}_{direction}"
shutil.rmtree(wd, ignore_errors=True)
(wd / "src").mkdir(parents=True)
setup(wd / "src")
brc, berr, bsnap = one(B, B, wd, "base", transport, direction, opts)
bok = brc == 0 and not any(m in berr for m in ERR_MARKERS)
for tag, cv, sv in combos:
rc, err, snap = one(cv, sv, wd, tag, transport, direction, opts)
ok = rc == 0 and not any(m in err for m in ERR_MARKERS)
diffs = diff_snapshots(snap, bsnap)
label = f"matrix:{name}/{transport}/{direction}/{tag}"
if not diffs and ok == bok:
print(f"OK {label}")
continue
if ok and not bok:
print(f"BETTER {label} (A cfg ok; pure-B failed)")
continue
nreg += 1
kind = "exit/err" if ok != bok else "tree"
print(f"DIFF {label} [{kind}; client={cv.split('/')[-1]} "
f"server={sv.split('/')[-1]}]")
detail = ([f" exit/err: cfg ok={ok} (rc={rc}) base ok={bok}",
f" err:{err[:200]}"] if ok != bok else []) + diffs[:8]
for ln in detail:
print(ln)
logf.write(f"\nMATRIX-DIFF {label} opts:{' '.join(opts)} "
f"vs pure-B *** REGRESSION CANDIDATE ***\n")
for ln in detail:
logf.write(ln + "\n")
logf.flush()
print(f"\n=== matrix: {nreg} cross-version regression candidates ===")
return nreg
def _fx_hardlinks(src):
for i in range(6):
_write(src / f"f{i}", f"data{i % 3}\n".encode())
for i in range(6):
try:
os.link(src / f"f{i % 3}", src / f"hl{i}")
except OSError:
pass
os.symlink("f0", src / "sl")
def _fx_weird(src):
for n in ["a space", "café", "semi;colon", "dollar$x", "paren(s)", "-dash",
"tab\tt"]:
_write(src / n, b"x\n")
def _fx_deep(src):
p = src
for i in range(25):
p = p / f"d{i}"
_write(p / "leaf", b"deep\n")
_write(src / "top", b"t\n")
def _fx_sparse(src):
with open(src / "sp.bin", "wb") as f:
f.seek(2 << 20)
f.write(b"end")
_write(src / "reg", b"r\n")
def _fx_many(src):
for i in range(200):
_write(src / f"d{i % 8}" / f"f{i:03d}", f"{i}\n".encode())
FUZZ_FIXTURES = [("recvtree", build_recvtree), ("kitchen", build_kitchen),
("hardlinks", _fx_hardlinks), ("weird", _fx_weird),
("deep", _fx_deep), ("sparse", _fx_sparse), ("many", _fx_many)]
FUZZ_FLAGS = ["-H", "-S", "--inplace", "-z", "-c", "-b", "-O", "-J",
"--numeric-ids", "-A", "-X", "-E", "--no-whole-file", "-I",
"--size-only", "-u", "-k", "-K", "-L", "--copy-unsafe-links",
"--safe-links", "--munge-links", "--delete", "--existing",
"--ignore-existing", "--max-size=100000", "--compress-choice=zstd",
"--no-inc-recursive", "--checksum-choice=md5"]
def _perturb(src, dest):
"""dest = an older/partial copy of src (so update/backup/delete/-u/-I bite)."""
shutil.copytree(src, dest, symlinks=True)
for p in sorted(dest.rglob("*")):
if p.is_file() and not p.is_symlink():
p.write_bytes(b"OLDER CONTENT\n")
os.utime(p, (T_OLD, T_OLD))
break
(dest / "_obsolete").write_text("x\n")
os.utime(dest / "_obsolete", (T_OLD, T_OLD))
def _fuzz_run(sndr, rcvr, transport, direction, wd, tag, opts, src):
dest = wd / f"d_{tag}"
_perturb(src, dest)
if transport == "ssh":
if direction == "push":
rc, err, _, _, _ = _ssh1(sndr, rcvr, wd, opts, src, dest, pull=False)
else:
rc, err, _, _, _ = _ssh1(rcvr, sndr, wd, opts, src, dest, pull=True)
else: # daemon
if direction == "push":
rc, err, _, _, _ = _daemon1(sndr, rcvr, wd, dest, opts, src, pull=False)
else:
rc, err, _, _, _ = _daemon1(rcvr, sndr, wd, src, opts, dest, pull=True)
return rc, err, snapshot(dest)
def run_fuzz(workroot, logf, n, seed):
"""Stochastic differential fuzzer: random fixture x option-subset x transport
x direction x build-pair, each compared to the pure-B baseline for the same
config. Finds cross-build / option-interaction regressions broadly."""
import random
rnd = random.Random(seed)
A, B = RSYNC_A, RSYNC_B
seen = set()
nreg = nrun = 0
print(f"fuzz: {n} iterations, seed={seed}")
for i in range(n):
fxname, fx = rnd.choice(FUZZ_FIXTURES)
flags = sorted(rnd.sample(FUZZ_FLAGS, rnd.randint(0, 4)))
opts = ["-a"] + flags
transport = rnd.choice(["ssh", "daemon"])
direction = rnd.choice(["push", "pull"])
sndr, rcvr = rnd.choice([(A, A), (A, B), (B, A)])
# skip option unsupported by either binary
if any(o.startswith("--") and not (supports(A, o.split("=")[0])
and supports(B, o.split("=")[0])) for o in opts):
continue
wd = workroot / f"fz{i}"
shutil.rmtree(wd, ignore_errors=True)
(wd / "src").mkdir(parents=True)
fx(wd / "src")
try:
brc, berr, bsnap = _fuzz_run(B, B, transport, direction, wd, "base",
opts, wd / "src")
crc, cerr, csnap = _fuzz_run(sndr, rcvr, transport, direction, wd,
"cfg", opts, wd / "src")
except Exception as ex:
continue
nrun += 1
bok = brc == 0 and not any(m in berr for m in ERR_MARKERS)
cok = crc == 0 and not any(m in cerr for m in ERR_MARKERS)
diffs = diff_snapshots(csnap, bsnap, ignore_mtime_types=_ign(opts))
if not diffs and cok == bok:
if not KEEP:
shutil.rmtree(wd, ignore_errors=True)
continue
if cok and not bok: # config better than pure-B
if not KEEP:
shutil.rmtree(wd, ignore_errors=True)
continue
which = f"sndr={'A' if sndr==A else 'B'} rcvr={'A' if rcvr==A else 'B'}"
sig = (fxname, tuple(flags), transport, direction,
"exit" if cok != bok else "tree")
if sig in seen:
if not KEEP:
shutil.rmtree(wd, ignore_errors=True)
continue
seen.add(sig)
nreg += 1
label = f"fuzz:{fxname}/{transport}/{direction}/[{' '.join(opts)}]/{which}"
print(f"DIFF {label}")
detail = ([f" exit: cfg rc={crc}(ok={cok}) base rc={brc}(ok={bok})",
f" err:{cerr[:200]}"] if cok != bok else []) + diffs[:8]
for ln in detail:
print(ln)
logf.write(f"\nFUZZ-DIFF {label} *** REGRESSION CANDIDATE ***\n")
for ln in detail:
logf.write(ln + "\n")
logf.flush()
print(f"\n=== fuzz: {nrun} configs run, {nreg} distinct regression candidates ===")
return nreg
def _ign(opts):
s = set()
if "--omit-dir-times" in opts or any(o[:1] == "-" and o[1:2] != "-" and "O" in o for o in opts):
s.add("d")
if "--omit-link-times" in opts or any(o[:1] == "-" and o[1:2] != "-" and "J" in o for o in opts):
s.add("l")
if "--backup" in opts or any(o[:1] == "-" and o[1:2] != "-" and "b" in o for o in opts):
s.add("d")
# aux/implied dirs (temp/partial/backup, and --mkpath's created parents) sit
# in the dest tree but get no source time -> their mtime is wall-clock and
# differs between the A and B runs.
if any(o.startswith(("--temp-dir", "--partial-dir", "--backup-dir",
"--partial", "--mkpath"))
for o in opts):
s.add("d")
return s
# ---------------------------------------------------------------------------
# --loop: infinite scenario generators (random novel combos + systematic ladder)
# extra flags worth randomizing beyond the combo set (symlink / selection / wire)
_RAND_FLAGS = _COMBO_FLAGS + ["-k", "-K", "-L", "-l", "--copy-unsafe-links",
"--safe-links", "--munge-links", "--delete",
"--existing", "--ignore-existing",
"--no-inc-recursive", "--compress-choice=zstd",
"--checksum-choice=md5", "--sparse", "--fuzzy"]
def _random_scenarios(rnd, seen):
"""Infinite stream of randomized benign A/B scenarios: random fixture x random
option subset (size 2-6), optional stale dest. Deduped by signature, fed
through the same A/B oracle as the fixed sweeps."""
idx = 0
while True:
fxname, fx = rnd.choice(FUZZ_FIXTURES)
k = rnd.randint(2, 6)
flags = tuple(sorted(rnd.sample(_RAND_FLAGS, k)))
stale = fxname == "recvtree" and rnd.random() < 0.5
sig = ("rand", fxname, flags, stale)
if sig in seen:
continue
seen.add(sig)
idx += 1
nm = (f"rand{idx}:{fxname}:" + "_".join(f.lstrip("-") for f in flags)
+ ("+stale" if stale else ""))
yield Scenario(nm, fx, ["-a", *flags], ["src/"], "dest/",
pre_dest=(stale_dest if stale else None))
def _systematic_combos(seen):
"""Infinite stream walking the option-combination ladder combo2->3->4->...
over a stale dest; when the top order is exhausted the ladder restarts."""
k = 2
while True:
for combo in itertools.combinations(_COMBO_FLAGS, k):
sig = ("sys", combo)
if sig in seen:
continue
seen.add(sig)
yield Scenario(f"sys{k}:" + ",".join(combo), build_recvtree,
["-a", *combo], ["src/"], "dest/", pre_dest=stale_dest)
k += 1
if k > len(_COMBO_FLAGS): # exhausted every order -> restart ladder
for s in [s for s in seen if s and s[0] == "sys"]:
seen.discard(s)
k = 2
def _mixed_scenarios(rnd, seen):
"""Alternate random / systematic so a parallel pool runs ~half of each."""
rg = _random_scenarios(rnd, seen)
sg = _systematic_combos(seen)
while True:
yield next(rg)
yield next(sg)
class _Tee:
"""Minimal write/flush fan-out so matrix/fuzz diffs land in both the curated
findings log and the full per-run log."""
def __init__(self, *files):
self._f = files
def write(self, s):
for f in self._f:
f.write(s)
def flush(self):
for f in self._f:
f.flush()
def main():
global RSYNC_A, RSYNC_B, RRSYNC_A, RRSYNC_B, KEEP, REPEAT, CMD_TIMEOUT, COST, SCALE_N
ap = argparse.ArgumentParser()
ap.add_argument("--rsync-a", default="./rsync")
ap.add_argument("--rsync-b", default="old_versions/rsync_3.4.1")
ap.add_argument("--rrsync-a", default=None,
help="rrsync wrapper script paired with A for the rrsync lane "
"(default: in-tree support/rrsync)")
ap.add_argument("--rrsync-b", default=None,
help="rrsync wrapper paired with B (e.g. a baseline version's "
"rrsync); rrsync regressions live in the script, so this "
"must match B's version to A/B the rrsync lane")
ap.add_argument("--sweep", default="all",
choices=["options", "pathshape", "recv", "destshape",
"name", "filesfrom", "intree", "intree2", "proto",
"combo", "combo3", "combo4", "scale", "ssh",
"daemon", "daemonchroot", "mode", "size",
"filetype", "selection", "behavior", "placement",
"wire", "pairwise", "daemonsym", "daemonpull",
"daemonesc", "misc", "gaps", "redo", "typetrans",
"tsprec", "bigscale", "rrsync", "tcpdaemon",
"priv", "all"])
ap.add_argument("--workdir", default="/tmp/abdiff")
ap.add_argument("--findings", default="abdiff-findings.txt")
ap.add_argument("--only", default=None, help="run only scenarios containing this substring")
ap.add_argument("--keep", action="store_true")
ap.add_argument("--list", action="store_true")
ap.add_argument("--matrix", action="store_true",
help="run the cross-build role matrix (mixed A<->B, "
"push+pull, ssh+daemon) instead of the sweeps")
ap.add_argument("--fuzz", type=int, default=0, metavar="N",
help="stochastic differential fuzzer: N random "
"fixture/option/transport/direction/version-pair configs")
ap.add_argument("--seed", type=int, default=1, help="fuzz RNG seed")
ap.add_argument("--repeat", type=int, default=REPEAT, metavar="N",
help="stability gate: run each binary N times per scenario; "
"scenarios whose own runs disagree are quarantined FLAKY "
"(default 2; use 1 to disable, 3+ to catch rarer flakes)")
ap.add_argument("-j", "--jobs", type=int, default=20, metavar="N",
help="run N scenarios in parallel (default 20)")
ap.add_argument("--loop", action="store_true",
help="after the fixed sweeps, keep generating new randomized + "
"higher-order-combo scenarios indefinitely (Ctrl-C to stop)")
ap.add_argument("--cmd-timeout", type=int, default=CMD_TIMEOUT, metavar="SECS",
help="per-rsync wall-clock timeout; 0 disables (default 120)")
ap.add_argument("--log-dir", default=".",
help="directory for the full per-run abdiff-log_<TIME>.txt "
"(default: current directory)")
ap.add_argument("--cost", action="store_true",
help="also compare peak process-group RSS (resource oracle); "
"flags only gross, stable A-worse blow-ups. Pair with "
"--sweep bigscale --scale N.")
ap.add_argument("--scale", type=int, default=SCALE_N, metavar="N",
help=f"element count for the bigscale fixtures (default {SCALE_N}; "
"use e.g. 100000 with --cost to surface footprint regressions)")
ap.add_argument("--timelimit", type=float, default=0, metavar="SECS",
help="stop after SECS seconds (0 = no limit); in --loop it ends "
"the loop, in a finite sweep it stops queuing new scenarios. "
"In-flight scenarios finish, then a summary is written.")
ap.add_argument("--root-extra", dest="root_extra", action="store_true",
default=None,
help="with --sweep all, also include the root-only sweeps "
"(priv, daemonchroot); auto-on when running as root")
args = ap.parse_args()
RSYNC_A = os.path.abspath(args.rsync_a)
RSYNC_B = os.path.abspath(args.rsync_b)
RRSYNC_A = os.path.abspath(args.rrsync_a) if args.rrsync_a else None
RRSYNC_B = os.path.abspath(args.rrsync_b) if args.rrsync_b else None
KEEP = args.keep
REPEAT = args.repeat
CMD_TIMEOUT = args.cmd_timeout
COST = args.cost
SCALE_N = args.scale
if args.matrix or args.fuzz:
workroot = Path(args.workdir)
workroot.mkdir(parents=True, exist_ok=True)
fp = Path(args.findings)
fp.parent.mkdir(parents=True, exist_ok=True)
logf = open(fp, "a", buffering=1)
ld = Path(args.log_dir)
ld.mkdir(parents=True, exist_ok=True)
runlog_path = ld / f"abdiff-log_{time.strftime('%Y%m%d_%H%M%S')}.txt"
runlog = open(runlog_path, "a", buffering=1)
mode = "FUZZ" if args.fuzz else "MATRIX"
header = (f"\n===== abdiff {mode} {time.strftime('%Y-%m-%d %H:%M:%S')} "
f"A={RSYNC_A} B={RSYNC_B} =====\n")
logf.write(header)
runlog.write(header)
print(f"A (under test): {RSYNC_A}\nB (baseline): {RSYNC_B}\n"
f"full log: {runlog_path}\n")
tee = _Tee(logf, runlog)
n = (run_fuzz(workroot, tee, args.fuzz, args.seed) if args.fuzz
else run_matrix(workroot, tee))
logf.close()
runlog.close()
return 1 if n else 0
sweeps = ALL_SWEEPS if args.sweep == "all" else [args.sweep]
# as root (or --root-extra), fold the root-only sweeps into an "all" run so a
# privileged session exercises owner/device/specials/fake-super + chroot daemon.
root_extra = args.root_extra if args.root_extra is not None else (os.geteuid() == 0)
if args.sweep == "all" and root_extra:
sweeps = sweeps + ["priv", "daemonchroot"]
if COST:
sweeps = sweeps + ["bigscale"]
scns = []
for s in sweeps:
scns += SWEEPS[s]()
if args.only:
scns = [s for s in scns if args.only in s.name]
if args.list:
for s in scns:
print(s.name, s.opts)
return 0
print(f"A (under test): {RSYNC_A} ({sh([RSYNC_A,'--version']).stdout.splitlines()[0] if sh([RSYNC_A,'--version']).returncode==0 else '?'})")
print(f"B (baseline): {RSYNC_B} ({sh([RSYNC_B,'--version']).stdout.splitlines()[0]})")
# absolute: scenarios run rsync with cwd=workdir and also pass dest/aux paths
# under it, so a relative --workdir would double-resolve (rsync writes to
# wd/wd/...) and every scenario would go silently vacuous.
workroot = Path(os.path.abspath(args.workdir))
workroot.mkdir(parents=True, exist_ok=True)
# Curated, cross-run findings log: open once, write a run header, and flush
# each anomaly the moment it is found so the log is a live record mid-run.
fp = Path(args.findings)
fp.parent.mkdir(parents=True, exist_ok=True)
logf = open(fp, "a", buffering=1) # line-buffered
logf.write(f"\n===== abdiff run {time.strftime('%Y-%m-%d %H:%M:%S')} "
f"sweep={args.sweep} A={RSYNC_A} B={RSYNC_B} jobs={args.jobs}"
f"{' loop' if args.loop else ''} =====\n")
logf.flush()
# Per-run findings log: fresh timestamped file in the current dir (or
# --log-dir). Holds ONLY findings (DIFF/TIMEOUT/ERROR/FLAKY/ALLOW/BETTER) --
# no OK/SKIP noise; stdout shows a live "test N" counter instead.
ld = Path(args.log_dir)
ld.mkdir(parents=True, exist_ok=True)
runlog_path = ld / f"abdiff-log_{time.strftime('%Y%m%d_%H%M%S')}.txt"
runlog = open(runlog_path, "a", buffering=1)
runlog.write(f"# abdiff findings {time.strftime('%Y-%m-%d %H:%M:%S')}\n"
f"# A={RSYNC_A}\n# B={RSYNC_B}\n"
f"# sweep={args.sweep} jobs={args.jobs} repeat={REPEAT} "
f"loop={args.loop} cmd_timeout={CMD_TIMEOUT}\n")
runlog.flush()
print(f"findings log: {runlog_path}\njobs: {args.jobs}"
f"{' (loop: Ctrl-C to stop)' if args.loop else ''}")
def ostr(s): # opts may be callable (placement scenarios)
return ' '.join(s.opts) if not callable(s.opts) else '(dynamic)'
LABEL = {"OK": "OK ", "SKIP": "SKIP ", "FLAKY": "FLAKY", "ABETTER": "BETTER",
"ALLOW": "ALLOW", "DIFF": "DIFF ", "TIMEOUT": "TIME ", "ERROR": "ERROR"}
counts = {k: 0 for k in LABEL}
done = [0]
rec_lock = threading.Lock()
def progress(total=None):
sys.stdout.write(f"test {done[0]}{('/' + str(total)) if total else ''}"
f"{(' ' + str(counts['DIFF']) + ' DIFF') if counts['DIFF'] else ''}\r")
sys.stdout.flush()
def record(s, status, info, total=None):
if isinstance(info, str):
info = [info]
# a subprocess timeout surfaces as a DIFF whose err carries the marker;
# promote it to its own TIMEOUT class so it's easy to triage.
if status == "DIFF" and any("[abdiff: TIMEOUT]" in ln for ln in info):
status = "TIMEOUT"
with rec_lock:
counts[status] = counts.get(status, 0) + 1
done[0] += 1
if status not in ("OK", "SKIP"): # a finding: print it + log it
sys.stdout.write("\r") # clear the progress line
print(f"{LABEL.get(status, status)} {s.name} [#{done[0]}]")
for line in info:
print(line)
suffix = (" *** REGRESSION CANDIDATE ***"
if status in ("DIFF", "TIMEOUT", "ERROR") else "")
for f in (runlog, logf):
f.write(f"\n{status} {s.name} opts: {ostr(s)}{suffix}\n")
for line in info:
f.write(line + "\n")
f.flush()
progress(total)
def worker(s):
try:
return run_scenario(s, workroot)
except Exception as ex: # never let one scenario kill the pool
return ("ERROR", [f" exception: {ex!r}"])
def summary_line(prefix):
return (f"{prefix} {counts['OK']} OK, {counts['SKIP']} skipped, "
f"{counts['FLAKY']} FLAKY, {counts['ALLOW']} ALLOW (intentional), "
f"{counts['ABETTER']} BETTER (A>B), {counts['TIMEOUT']} TIMEOUT, "
f"{counts['ERROR']} ERROR, {counts['DIFF']} DIFF "
f"(regression candidates) ===")
deadline = (time.time() + args.timelimit) if args.timelimit else None
if deadline:
print(f"time limit: {args.timelimit:.0f}s\n")
if not args.loop:
total = len(scns)
print(f"scenarios: {total}\n")
with ThreadPoolExecutor(max_workers=args.jobs) as ex:
futs = {ex.submit(worker, s): s for s in scns}
for fut in as_completed(futs):
status, info = fut.result()
record(futs[fut], status, info, total=total)
if deadline and time.time() > deadline:
print("\n[time limit reached -- cancelling pending scenarios]")
for p in futs:
p.cancel()
break
summary = summary_line("===")
print("\n" + summary)
runlog.write("\n" + summary + "\n")
logf.write(summary + "\n")
runlog.close()
logf.close()
return 1 if (counts['DIFF'] or counts['TIMEOUT'] or counts['ERROR']) else 0
# --loop: run the fixed sweeps first, then an endless mixed stream of new
# randomized + systematic-combo scenarios, keeping the pool full.
rnd = random.Random(args.seed)
seen = set()
mixed = _mixed_scenarios(rnd, seen)
fixed = iter(scns)
def next_scn():
try:
return next(fixed)
except StopIteration:
return next(mixed)
ex = ThreadPoolExecutor(max_workers=args.jobs)
inflight = {}
def submit_one():
s = next_scn()
inflight[ex.submit(worker, s)] = s
try:
for _ in range(args.jobs * 2):
submit_one()
while True:
fdone, _pending = wait(list(inflight), return_when=FIRST_COMPLETED)
for fut in fdone:
s = inflight.pop(fut)
status, info = fut.result()
record(s, status, info)
if not (deadline and time.time() > deadline):
submit_one()
if deadline and time.time() > deadline and not inflight:
print("\n[time limit reached -- writing summary]")
break
except KeyboardInterrupt:
print("\n[interrupted -- cancelling pending, writing summary]")
ex.shutdown(wait=False, cancel_futures=True)
finally:
summary = summary_line("=== loop stopped:")
print("\n" + summary)
runlog.write("\n" + summary + "\n")
runlog.flush()
logf.write(summary + "\n")
logf.flush()
runlog.close()
logf.close()
return 1 if counts['DIFF'] else 0
if __name__ == "__main__":
sys.exit(main())