From e6cb8788f8175d008ff5cfae414a6f6ec258c35d Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 13 Jun 2026 16:41:49 +1000 Subject: [PATCH] testsuite: add gating valgrind memcheck workflow + suppressions Add a .github/workflows/valgrind.yml that runs the full suite under valgrind in a 2x2 matrix (user/root x pipe/tcp transport) and gates on memory errors. It uses --leak-check=no: rsync intentionally leaves file-list/socket/option memory unfreed at exit, so a leak check is inherently noisy; the gate flags uninitialised reads, invalid read/write, bad frees and uninit syscall params instead. Add testsuite/valgrind.supp covering the known-benign reports (rwrite strlcpy over-read on a non-NUL-terminated peer message, atomic_create/ delete_item st_mode read under fakeroot, libfakeroot msgsnd padding, plus popt/xxhash leaks for manual --leak-check audits). runtests.py --valgrind now loads it automatically. --- .github/workflows/valgrind.yml | 96 ++++++++++++++++++++++++++++++++++ runtests.py | 4 ++ testsuite/valgrind.supp | 77 +++++++++++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 .github/workflows/valgrind.yml create mode 100644 testsuite/valgrind.supp diff --git a/.github/workflows/valgrind.yml b/.github/workflows/valgrind.yml new file mode 100644 index 00000000..036581c8 --- /dev/null +++ b/.github/workflows/valgrind.yml @@ -0,0 +1,96 @@ +name: Valgrind memcheck + +on: + push: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/valgrind.yml' + pull_request: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/valgrind.yml' + schedule: + - cron: '17 4 * * *' + workflow_dispatch: + +jobs: + memcheck: + runs-on: ubuntu-latest + timeout-minutes: 120 + strategy: + fail-fast: false + matrix: + privilege: [ user, root ] + transport: [ pipe, tcp ] + name: memcheck (${{ matrix.privilege }}, ${{ matrix.transport }}) + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y valgrind acl libacl1-dev attr libattr1-dev \ + liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + echo "/usr/local/bin" >>"$GITHUB_PATH" + - name: configure + run: ./configure --with-rrsync --enable-debug + - name: make + run: make + - name: info + run: ./rsync --version + + # Run the whole suite under valgrind. We gate on memory *errors* (uninit + # reads, invalid read/write, bad frees, uninit syscall params), not leaks: + # rsync deliberately leaves file-list/socket/option memory unfreed at exit + # (short-lived process; the OS reclaims), so --leak-check=no avoids a sea of + # by-design "definitely lost" reports. Functional pass/fail is covered by + # the other workflows, so the suite is allowed to finish regardless of + # per-test results; the scan step below is the gate. --error-exitcode=0 + # keeps valgrind from perturbing test exit codes; the bundled + # testsuite/valgrind.supp silences known-benign reports. + - name: run testsuite under valgrind + run: | + SUDO= + [ "${{ matrix.privilege }}" = root ] && SUDO="sudo -E" + TCP= + [ "${{ matrix.transport }}" = tcp ] && TCP="--use-tcp" + $SUDO ./runtests.py --valgrind \ + --valgrind-opts="--leak-check=no --error-exitcode=0" \ + $TCP -j8 --preserve-scratch || true + + - name: scan for unsuppressed valgrind errors + run: | + sudo chown -R "$USER" testtmp 2>/dev/null || true + mapfile -t logs < <(find testtmp -name 'valgrind.*.log' 2>/dev/null) + if [ "${#logs[@]}" -eq 0 ]; then + echo "::error::no valgrind logs were produced -- the suite did not run" + exit 1 + fi + echo "scanned ${#logs[@]} valgrind log(s)" + bad=() + for f in "${logs[@]}"; do + grep -qE 'ERROR SUMMARY: [1-9][0-9]* errors' "$f" && bad+=("$f") + done + if [ "${#bad[@]}" -ne 0 ]; then + echo "::error::valgrind reported unsuppressed errors in ${#bad[@]} run(s)" + for f in "${bad[@]}"; do + echo "===== $f =====" + sed 's/==[0-9]*== //' "$f" | grep -A18 \ + -E 'depends on uninitialised|points to uninitialised|Invalid (read|write|free)|lost in loss record|Mismatched free' \ + | head -60 + done + exit 1 + fi + echo "valgrind clean: no unsuppressed errors" + + - name: upload valgrind logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: valgrind-logs-${{ matrix.privilege }}-${{ matrix.transport }} + path: testtmp/**/valgrind.*.log + if-no-files-found: ignore + retention-days: 7 diff --git a/runtests.py b/runtests.py index 574da01b..2c7a7f9b 100755 --- a/runtests.py +++ b/runtests.py @@ -268,6 +268,10 @@ def build_rsync_cmd(rsync_bin, args, scratchbase): if args.valgrind: vlog = os.path.join(scratchbase, 'valgrind.%p.log') vopts = f'--log-file={vlog}' + supp = os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'testsuite', 'valgrind.supp') + if os.path.exists(supp): + vopts += f' --suppressions={supp}' if args.valgrind_opts: vopts += ' ' + args.valgrind_opts parts.append(f'valgrind {vopts}') diff --git a/testsuite/valgrind.supp b/testsuite/valgrind.supp new file mode 100644 index 00000000..b0cf91ea --- /dev/null +++ b/testsuite/valgrind.supp @@ -0,0 +1,77 @@ +# Valgrind suppressions for the rsync test suite. +# +# Every stanza here is a known-benign report, not a defect in rsync's own +# logic. They are suppressed so that a --valgrind run fails only on a *real* +# error. Pass to valgrind with --suppressions=testsuite/valgrind.supp +# (runtests.py --valgrind does this automatically). +# +# The valgrind CI gate runs with --leak-check=no and so checks memory *errors* +# only: rsync intentionally leaves file-list/socket/option memory unfreed at +# exit, which makes a full leak check inherently noisy. The two Memcheck:Leak +# stanzas below therefore only matter for a manual leak audit (--leak-check= +# full); they cover the two leaks that are not rsync's own at-exit slack. + +# popt alias strings are strdup'd while parsing arguments and only reclaimed +# at process exit. One-time, bounded, unreachable-at-exit; bundled popt. +{ + rsync-popt-alias-leak + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:my_alloc + fun:my_strdup + fun:popt_unalias + fun:parse_arguments + fun:main +} + +# rwrite() strlcpy()s a peer log message. read_a_msg() hands it exactly +# msg_bytes valid bytes with no terminating NUL; the copy is bounded to len, +# but strlcpy over-reads past the message to compute its (discarded) return +# length. Harmless read of one+ uninitialised stack byte. +{ + rsync-rwrite-strlcpy-peer-msg + Memcheck:Cond + fun:strlcpy + fun:strlcpy + fun:rwrite +} + +# atomic_create()/delete_item() test sxp->st.st_mode when replacing an +# existing item. Under fakeroot the faked stat overlay leaves mode bits that +# valgrind treats as uninitialised. Pre-existing hard-link code; fakeroot +# artifact, not an rsync read of uninitialised memory. +{ + rsync-atomic-create-stmode + Memcheck:Cond + fun:atomic_create + fun:recv_generator +} +{ + rsync-delete-item-stmode + Memcheck:Cond + fun:delete_item + fun:atomic_create +} + +# libxxhash returns an internally-aligned XXH3 state whose pointer is offset +# from the malloc base, so valgrind sees only an interior pointer and reports +# the one-time checksum state as "possibly lost". Alignment artifact. +{ + xxhash-createstate-aligned + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:XXH3_createState +} + +# libfakeroot's own SysV message padding in send_fakem(); the uninitialised +# bytes are inside libfakeroot's mtext buffer, not rsync memory. +{ + fakeroot-msgsnd-padding + Memcheck:Param + msgsnd(msgp->mtext) + fun:msgsnd + ... + obj:*libfakeroot* +}