mirror of
https://github.com/RsyncProject/rsync.git
synced 2026-06-07 21:58:06 -04:00
Fix --preallocate --sparse to actually produce sparse files
rsync.1 says combining --preallocate with --sparse yields sparse blocks
wherever the filesystem can punch holes, but since 2019 (commit c2da3809,
"keep file-size 0 when possible") it has silently left the file fully
allocated. Two problems, both rooted in that commit switching --preallocate /
--inplace to fallocate(FALLOC_FL_KEEP_SIZE):
* do_fallocate() then returned 0 instead of the reserved length, so the
receiver's preallocated_len was 0 and write_sparse() always lseek'd over
null runs instead of punching them (and the over-preallocation trim in
receiver.c never fired either).
* more fundamentally, KEEP_SIZE leaves the file size at 0 while data is
written incrementally, so the FALLOC_FL_PUNCH_HOLE call lands on blocks
beyond EOF and is a silent no-op -- the reserved blocks are never freed.
Fix both: don't request KEEP_SIZE when --sparse is also active, so the file is
preallocated at full size and the punch lands within it; and return the
reserved length from do_fallocate() so preallocated_len drives the punch
decision and the over-allocation trim. --preallocate without --sparse keeps
the KEEP_SIZE (file-size-0) behaviour. t_stub.c gains a sparse_files stub since
do_fallocate now references it and the test helpers link syscall.o.
preallocate_test.py now asserts via st_blocks (where the filesystem can punch
holes) that --preallocate --sparse ends up sparse, guarding the regression.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
18
syscall.c
18
syscall.c
@@ -47,6 +47,7 @@ extern int read_only;
|
||||
extern int list_only;
|
||||
extern int inplace;
|
||||
extern int preallocate_files;
|
||||
extern int sparse_files;
|
||||
extern int preserve_perms;
|
||||
extern int preserve_executability;
|
||||
extern int open_noatime;
|
||||
@@ -1525,7 +1526,13 @@ int do_utime(const char *path, STRUCT_STAT *stp)
|
||||
|
||||
OFF_T do_fallocate(int fd, OFF_T offset, OFF_T length)
|
||||
{
|
||||
int opts = inplace || preallocate_files ? DO_FALLOC_OPTIONS : 0;
|
||||
/* FALLOC_FL_KEEP_SIZE lets --preallocate/--inplace keep the file size at 0
|
||||
* until data is written, but a later hole-punch (for --sparse) can only
|
||||
* deallocate blocks that lie within the file's size -- with KEEP_SIZE the
|
||||
* reserved blocks sit beyond EOF and the punch silently does nothing,
|
||||
* leaving the file fully allocated. So when holes will also be punched,
|
||||
* preallocate at full size instead (write_sparse then punches the nulls). */
|
||||
int opts = (inplace || preallocate_files) && sparse_files <= 0 ? DO_FALLOC_OPTIONS : 0;
|
||||
int ret;
|
||||
RETURN_ERROR_IF(dry_run, 0);
|
||||
RETURN_ERROR_IF_RO_OR_LO;
|
||||
@@ -1550,7 +1557,14 @@ OFF_T do_fallocate(int fd, OFF_T offset, OFF_T length)
|
||||
return length;
|
||||
return st.st_blocks * S_BLKSIZE;
|
||||
}
|
||||
return 0;
|
||||
/* With FALLOC_FL_KEEP_SIZE the blocks for [0, length) are reserved even
|
||||
* though the file size stays put. Return that reserved length (not 0) so
|
||||
* the caller's preallocated_len is meaningful: write_sparse() needs it to
|
||||
* choose do_punch_hole() over a plain lseek() when turning a null run into
|
||||
* a hole, and the receiver uses it to trim any over-preallocation. (A
|
||||
* stray 0 here, from 2019's switch to KEEP_SIZE, is why --preallocate
|
||||
* --sparse stopped producing sparse files.) */
|
||||
return length;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
1
t_stub.c
1
t_stub.c
@@ -27,6 +27,7 @@ int am_daemon = 0;
|
||||
int am_chrooted = 0;
|
||||
int modify_window = 0;
|
||||
int preallocate_files = 0;
|
||||
int sparse_files = 0;
|
||||
int protect_args = 0;
|
||||
int module_id = -1;
|
||||
int relative_paths = 0;
|
||||
|
||||
@@ -3,21 +3,28 @@
|
||||
do_fallocate (--preallocate) and do_punch_hole (sparse writes).
|
||||
|
||||
These are receiver-side file operations the resolver restructure also touches.
|
||||
Where the filesystem lacks fallocate/punch-hole the calls warn and the transfer
|
||||
still completes, so the content assertions hold regardless; the coverage is
|
||||
gained wherever the kernel supports them.
|
||||
Content must survive everywhere; in addition, where the filesystem stores holes,
|
||||
--preallocate --sparse must end up sparse (st_blocks below the apparent size).
|
||||
That is a regression guard: do_fallocate() must report the preallocated length
|
||||
so write_sparse() punches holes in the reserved extent instead of lseek'ing over
|
||||
it -- a stray 0 there silently left the file fully allocated.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from rsyncfns import (
|
||||
FROMDIR, TODIR,
|
||||
assert_same, make_data_file, makepath, rmtree, run_rsync, test_skipped,
|
||||
assert_same, make_data_file, makepath, rmtree, run_rsync, test_fail,
|
||||
test_skipped,
|
||||
)
|
||||
|
||||
src = FROMDIR
|
||||
deep = os.path.join('d1', 'd2', 'd3', 'f')
|
||||
|
||||
|
||||
def allocated(path):
|
||||
return os.stat(path).st_blocks * 512
|
||||
|
||||
# --preallocate needs fallocate/posix_fallocate, and do_punch_hole needs
|
||||
# FALLOC_FL_PUNCH_HOLE -- both Linux (and Cygwin) features. macOS, the *BSDs and
|
||||
# Solaris build without preallocation and reject the option outright ("prealloc-
|
||||
@@ -31,6 +38,40 @@ if run_rsync('-a', '--preallocate', f'{src}/', f'{TODIR}/',
|
||||
check=False, capture_output=True).returncode != 0:
|
||||
test_skipped("--preallocate not supported on this platform")
|
||||
|
||||
def fs_can_punch_holes():
|
||||
"""True only where the kernel can deallocate blocks via FALLOC_FL_PUNCH_HOLE
|
||||
-- the mechanism do_punch_hole uses for --sparse. A filesystem may report
|
||||
seek-based sparseness yet still keep every block on a punch (e.g. where
|
||||
rsync's punch falls back to writing zeros), so probe the real capability and
|
||||
assert the hole only where it actually frees blocks."""
|
||||
import ctypes
|
||||
import ctypes.util
|
||||
KEEP_SIZE, PUNCH_HOLE = 0x01, 0x02
|
||||
p = src / 'punch-probe'
|
||||
fd = -1
|
||||
try:
|
||||
libc = ctypes.CDLL(ctypes.util.find_library('c') or 'libc.so.6',
|
||||
use_errno=True)
|
||||
libc.fallocate.argtypes = [ctypes.c_int, ctypes.c_int,
|
||||
ctypes.c_longlong, ctypes.c_longlong]
|
||||
fd = os.open(p, os.O_CREAT | os.O_RDWR | os.O_TRUNC, 0o644)
|
||||
os.write(fd, b'\xff' * 65536)
|
||||
before = os.fstat(fd).st_blocks
|
||||
ret = libc.fallocate(fd, PUNCH_HOLE | KEEP_SIZE, 0, 65536)
|
||||
return ret == 0 and os.fstat(fd).st_blocks < before
|
||||
except (OSError, AttributeError, ValueError):
|
||||
return False
|
||||
finally:
|
||||
if fd >= 0:
|
||||
os.close(fd)
|
||||
try:
|
||||
os.unlink(p)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
can_punch = fs_can_punch_holes()
|
||||
|
||||
|
||||
def seed_plain(size=1_000_000):
|
||||
rmtree(src)
|
||||
@@ -55,9 +96,17 @@ run_rsync('-a', '--preallocate', f'{src}/', f'{TODIR}/')
|
||||
assert_same(TODIR / deep, src / deep, label='--preallocate content')
|
||||
|
||||
# --- --preallocate --sparse on a holey file: do_fallocate + do_punch_hole ---
|
||||
# rsync.1 promises sparse blocks for this combination where the FS supports
|
||||
# holes. Assert it: do_fallocate reserves the whole extent, then the zero run
|
||||
# must be punched back out (st_blocks well below the apparent size).
|
||||
seed_holey()
|
||||
run_rsync('-a', '--preallocate', '--sparse', f'{src}/', f'{TODIR}/')
|
||||
assert_same(TODIR / deep, src / deep, label='--preallocate --sparse content')
|
||||
if can_punch and allocated(TODIR / deep) >= os.path.getsize(TODIR / deep):
|
||||
test_fail(f"--preallocate --sparse left the file fully allocated "
|
||||
f"(allocated {allocated(TODIR / deep)} for a "
|
||||
f"{os.path.getsize(TODIR / deep)}-byte file); the preallocated "
|
||||
"extent's zero run was not punched into a hole")
|
||||
|
||||
# --- --inplace --sparse update that introduces a zero run: do_punch_hole ----
|
||||
# (sparse_end's updating_basis_or_equiv branch punches the hole in place.)
|
||||
|
||||
Reference in New Issue
Block a user