From e6cb8788f8175d008ff5cfae414a6f6ec258c35d Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <andrew@tridgell.net>
Date: Sat, 13 Jun 2026 16:41:49 +1000
Subject: [PATCH] testsuite: add gating valgrind memcheck workflow +
 suppressions

Add a .github/workflows/valgrind.yml that runs the full suite under
valgrind in a 2x2 matrix (user/root x pipe/tcp transport) and gates on
memory errors. It uses --leak-check=no: rsync intentionally leaves
file-list/socket/option memory unfreed at exit, so a leak check is
inherently noisy; the gate flags uninitialised reads, invalid
read/write, bad frees and uninit syscall params instead.

Add testsuite/valgrind.supp covering the known-benign reports (rwrite
strlcpy over-read on a non-NUL-terminated peer message, atomic_create/
delete_item st_mode read under fakeroot, libfakeroot msgsnd padding,
plus popt/xxhash leaks for manual --leak-check audits). runtests.py
--valgrind now loads it automatically.
---
 .github/workflows/valgrind.yml | 96 ++++++++++++++++++++++++++++++++++
 runtests.py                    |  4 ++
 testsuite/valgrind.supp        | 77 +++++++++++++++++++++++++++
 3 files changed, 177 insertions(+)
 create mode 100644 .github/workflows/valgrind.yml
 create mode 100644 testsuite/valgrind.supp

diff --git a/.github/workflows/valgrind.yml b/.github/workflows/valgrind.yml
new file mode 100644
index 00000000..036581c8
--- /dev/null
+++ b/.github/workflows/valgrind.yml
@@ -0,0 +1,96 @@
+name: Valgrind memcheck
+
+on:
+  push:
+    branches: [ master ]
+    paths-ignore:
+      - '.github/workflows/*.yml'
+      - '!.github/workflows/valgrind.yml'
+  pull_request:
+    branches: [ master ]
+    paths-ignore:
+      - '.github/workflows/*.yml'
+      - '!.github/workflows/valgrind.yml'
+  schedule:
+    - cron: '17 4 * * *'
+  workflow_dispatch:
+
+jobs:
+  memcheck:
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    strategy:
+      fail-fast: false
+      matrix:
+        privilege: [ user, root ]
+        transport: [ pipe, tcp ]
+    name: memcheck (${{ matrix.privilege }}, ${{ matrix.transport }})
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: prep
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y valgrind acl libacl1-dev attr libattr1-dev \
+          liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl
+        echo "/usr/local/bin" >>"$GITHUB_PATH"
+    - name: configure
+      run: ./configure --with-rrsync --enable-debug
+    - name: make
+      run: make
+    - name: info
+      run: ./rsync --version
+
+    # Run the whole suite under valgrind.  We gate on memory *errors* (uninit
+    # reads, invalid read/write, bad frees, uninit syscall params), not leaks:
+    # rsync deliberately leaves file-list/socket/option memory unfreed at exit
+    # (short-lived process; the OS reclaims), so --leak-check=no avoids a sea of
+    # by-design "definitely lost" reports.  Functional pass/fail is covered by
+    # the other workflows, so the suite is allowed to finish regardless of
+    # per-test results; the scan step below is the gate.  --error-exitcode=0
+    # keeps valgrind from perturbing test exit codes; the bundled
+    # testsuite/valgrind.supp silences known-benign reports.
+    - name: run testsuite under valgrind
+      run: |
+        SUDO=
+        [ "${{ matrix.privilege }}" = root ] && SUDO="sudo -E"
+        TCP=
+        [ "${{ matrix.transport }}" = tcp ] && TCP="--use-tcp"
+        $SUDO ./runtests.py --valgrind \
+          --valgrind-opts="--leak-check=no --error-exitcode=0" \
+          $TCP -j8 --preserve-scratch || true
+
+    - name: scan for unsuppressed valgrind errors
+      run: |
+        sudo chown -R "$USER" testtmp 2>/dev/null || true
+        mapfile -t logs < <(find testtmp -name 'valgrind.*.log' 2>/dev/null)
+        if [ "${#logs[@]}" -eq 0 ]; then
+          echo "::error::no valgrind logs were produced -- the suite did not run"
+          exit 1
+        fi
+        echo "scanned ${#logs[@]} valgrind log(s)"
+        bad=()
+        for f in "${logs[@]}"; do
+          grep -qE 'ERROR SUMMARY: [1-9][0-9]* errors' "$f" && bad+=("$f")
+        done
+        if [ "${#bad[@]}" -ne 0 ]; then
+          echo "::error::valgrind reported unsuppressed errors in ${#bad[@]} run(s)"
+          for f in "${bad[@]}"; do
+            echo "===== $f ====="
+            sed 's/==[0-9]*== //' "$f" | grep -A18 \
+              -E 'depends on uninitialised|points to uninitialised|Invalid (read|write|free)|lost in loss record|Mismatched free' \
+              | head -60
+          done
+          exit 1
+        fi
+        echo "valgrind clean: no unsuppressed errors"
+
+    - name: upload valgrind logs on failure
+      if: failure()
+      uses: actions/upload-artifact@v4
+      with:
+        name: valgrind-logs-${{ matrix.privilege }}-${{ matrix.transport }}
+        path: testtmp/**/valgrind.*.log
+        if-no-files-found: ignore
+        retention-days: 7
diff --git a/runtests.py b/runtests.py
index 574da01b..2c7a7f9b 100755
--- a/runtests.py
+++ b/runtests.py
@@ -268,6 +268,10 @@ def build_rsync_cmd(rsync_bin, args, scratchbase):
     if args.valgrind:
         vlog = os.path.join(scratchbase, 'valgrind.%p.log')
         vopts = f'--log-file={vlog}'
+        supp = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                            'testsuite', 'valgrind.supp')
+        if os.path.exists(supp):
+            vopts += f' --suppressions={supp}'
         if args.valgrind_opts:
             vopts += ' ' + args.valgrind_opts
         parts.append(f'valgrind {vopts}')
diff --git a/testsuite/valgrind.supp b/testsuite/valgrind.supp
new file mode 100644
index 00000000..b0cf91ea
--- /dev/null
+++ b/testsuite/valgrind.supp
@@ -0,0 +1,77 @@
+# Valgrind suppressions for the rsync test suite.
+#
+# Every stanza here is a known-benign report, not a defect in rsync's own
+# logic.  They are suppressed so that a --valgrind run fails only on a *real*
+# error.  Pass to valgrind with --suppressions=testsuite/valgrind.supp
+# (runtests.py --valgrind does this automatically).
+#
+# The valgrind CI gate runs with --leak-check=no and so checks memory *errors*
+# only: rsync intentionally leaves file-list/socket/option memory unfreed at
+# exit, which makes a full leak check inherently noisy.  The two Memcheck:Leak
+# stanzas below therefore only matter for a manual leak audit (--leak-check=
+# full); they cover the two leaks that are not rsync's own at-exit slack.
+
+# popt alias strings are strdup'd while parsing arguments and only reclaimed
+# at process exit.  One-time, bounded, unreachable-at-exit; bundled popt.
+{
+   rsync-popt-alias-leak
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:my_alloc
+   fun:my_strdup
+   fun:popt_unalias
+   fun:parse_arguments
+   fun:main
+}
+
+# rwrite() strlcpy()s a peer log message.  read_a_msg() hands it exactly
+# msg_bytes valid bytes with no terminating NUL; the copy is bounded to len,
+# but strlcpy over-reads past the message to compute its (discarded) return
+# length.  Harmless read of one+ uninitialised stack byte.
+{
+   rsync-rwrite-strlcpy-peer-msg
+   Memcheck:Cond
+   fun:strlcpy
+   fun:strlcpy
+   fun:rwrite
+}
+
+# atomic_create()/delete_item() test sxp->st.st_mode when replacing an
+# existing item.  Under fakeroot the faked stat overlay leaves mode bits that
+# valgrind treats as uninitialised.  Pre-existing hard-link code; fakeroot
+# artifact, not an rsync read of uninitialised memory.
+{
+   rsync-atomic-create-stmode
+   Memcheck:Cond
+   fun:atomic_create
+   fun:recv_generator
+}
+{
+   rsync-delete-item-stmode
+   Memcheck:Cond
+   fun:delete_item
+   fun:atomic_create
+}
+
+# libxxhash returns an internally-aligned XXH3 state whose pointer is offset
+# from the malloc base, so valgrind sees only an interior pointer and reports
+# the one-time checksum state as "possibly lost".  Alignment artifact.
+{
+   xxhash-createstate-aligned
+   Memcheck:Leak
+   match-leak-kinds: possible
+   fun:malloc
+   fun:XXH3_createState
+}
+
+# libfakeroot's own SysV message padding in send_fakem(); the uninitialised
+# bytes are inside libfakeroot's mtext buffer, not rsync memory.
+{
+   fakeroot-msgsnd-padding
+   Memcheck:Param
+   msgsnd(msgp->mtext)
+   fun:msgsnd
+   ...
+   obj:*libfakeroot*
+}