diff --git a/support/rrsh.sh b/support/rrsh.sh new file mode 100755 index 00000000..83025f2f --- /dev/null +++ b/support/rrsh.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# abdiff helper: a "remote shell" that emulates an sshd forced-command of +# `rrsync DIR`. rsync invokes a remote shell as: +# [ssh-opts] +# so when used as -e "sh rrsh.sh " rsync calls us as: +# sh rrsh.sh [opts] lh rsync --server ... +# We hand the server command to rrsync via SSH_ORIGINAL_COMMAND (exactly as +# sshd would) and exec the restricted wrapper, so abdiff can A/B the rrsync +# path itself. Only the pretend hosts "lh"/"localhost" are accepted. +RRSYNC="$1"; DIR="$2"; shift 2 +while [ $# -gt 0 ]; do + case "$1" in + -l) shift 2 ;; + lh|localhost) shift; break ;; + -*) shift ;; + *) break ;; + esac +done +SSH_ORIGINAL_COMMAND="$*" +export SSH_ORIGINAL_COMMAND +exec "$RRSYNC" "$DIR" diff --git a/testsuite/README.md b/testsuite/README.md index 5c15f5de..4883a5e8 100644 --- a/testsuite/README.md +++ b/testsuite/README.md @@ -184,3 +184,53 @@ Each target must be provisioned with the build toolchain its workflow installs (autoconf, automake, a C compiler, perl, a python3 markdown module such as cmarkgfm or commonmark unless the flags pass `--disable-md2man`, and the dev libraries its configure flags enable). A missing piece shows up as `BUILD-FAIL`. + +## Differential regression hunting (abdiff.py) + +`testsuite/abdiff.py` is a developer tool — **not** a `*_test.py`, so `runtests.py` +ignores it. It hunts *regressions* by running the **same benign transfer** with +two rsync binaries (`A` = the build under test, `B` = a baseline) and comparing +the OUTCOME. The oracle is: for a benign input, a correctness/behaviour change +between the builds must be **invisible**, so A and B must produce an identical +result. Any divergence is a regression candidate to investigate and, if real, +minimize into a `*_test.py`. + +It compares exit code, stderr (error markers + normalised text), `--stats` +"Literal data", the destination tree (content + full metadata: mode/uid/gid/ +mtime/size/symlink target/xattrs/ACLs/hardlink grouping), the `--itemize` list, +and — with `--cost` — peak process-group RSS (a resource-regression oracle that +functional comparison misses). A **stability gate** runs each binary several +times and escalates on a candidate diff; nondeterministic scenarios are +quarantined `FLAKY`, never reported as regressions. + +Run it from the build directory (so `./rsync` and `old_versions/` resolve): + +```sh +testsuite/abdiff.py # default: ./rsync vs old_versions/rsync_3.4.1 +testsuite/abdiff.py --sweep all -j5 # broad single pass, 5-way parallel +testsuite/abdiff.py --loop --timelimit 3600 --cost # hunt for an hour, resource oracle on +testsuite/abdiff.py --list --sweep all # list scenarios without running +``` + +Each finding is classed `DIFF` (regression candidate), `ALLOW` (an intentional, +documented behaviour change listed in the tool's allowlist), `BETTER` (A succeeds +where B fails), `FLAKY`, or `TIMEOUT`. Findings are printed and appended to a +per-run `abdiff-log_