diff --git a/bootstrap.sh b/bootstrap.sh index 713677a..5c3fd4c 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -18,7 +18,7 @@ pacman-key --init pacman --sync --refresh --noconfirm --sysupgrade pacman --sync --refresh --noconfirm mkosi git base-devel ukify vim cpio tree \ rsync btrfs-progs s3cmd dosfstools qemu-img erofs-utils squashfs-tools go openssh \ - compsize + compsize duperemove # Use mkosi from git to not have to wait for releases when things break. # OTOH things may break in git. So which version is used may change over time. diff --git a/btrfs-send-receive.sh b/btrfs-send-receive.sh index 4dedd60..bf80595 100755 --- a/btrfs-send-receive.sh +++ b/btrfs-send-receive.sh @@ -25,7 +25,7 @@ cleanup() { trap cleanup INT TERM EXIT cleanup -mount -o remount,compress-force=zstd:8 "$PWD" +mount -o remount,compress-force=zstd:15 "$PWD" btrfs property set . compression zstd btrfs balance start --force -mconvert=single -dconvert=single . @@ -44,4 +44,22 @@ btrfs subvolume snapshot -r "$EXPORT.live" "@live" btrfs subvolume delete "$EXPORT.live" rm -f "$OUTPUT_ABS.btrfs.live" +# Finally let's condense the data. +btrfs filesystem usage . +## Use duperemove to deduplicate files. +## I would also love to use bees here as it works on extents but we don't know when it is done :( https://github.com/Zygo/bees/issues/279 +duperemove -dr . +## Balance the filesystem with ever increasing chunk sizes to maximize space efficiency. +btrfs balance start --force -mconvert=single -dconvert=single . +btrfs balance start --force -dusage=16 . +btrfs balance start --force -dusage=32 . +btrfs balance start --force -dusage=64 . +## And to finish things off we shrink the filesystem to the minimum size. +./btrfs-shrink.py +mv btrfs.json "$(dirname "$OUTPUT_ABS")/btrfs.json" +## Sync changes to disk. +btrfs filesystem sync . +# Final report. +btrfs filesystem usage . + ln -svf "@$OUTPUT" "$ID" diff --git a/btrfs-shrink.py b/btrfs-shrink.py new file mode 100644 index 0000000..bb77be0 --- /dev/null +++ b/btrfs-shrink.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +# SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL +# SPDX-FileCopyrightText: 2024 Harald Sitter + +# Shrink btrfs. It's a bit awkward because we don't really have a reliable way +# to calculate how much space we actually need. So we first chop off a dynamic +# portion but leave a bit of a buffer behind. Then we keep resizing until the +# resize starts failing. + +import json +import subprocess +from subprocess import check_output + +out = check_output(["btrfs", "--format", "json", "filesystem", "df", "."]) +data = json.loads(out) +df = data["filesystem-df"] + +size = 0 +for block_group in df: + size += block_group["total"] + +# Give 512MiB of buffer space. We'll shrink from there in smaller steps. +size += 512 * 1024 * 1024 + +subprocess.run(["btrfs", "filesystem", "resize", str(size), "."]) + +# With compression one extent is always 128KiB as per btrfs documentation. +extent_size = 128 * 1024 +while True: + try: + subprocess.run(["btrfs", "filesystem", "resize", f"-{extent_size}", "."], stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL, check=True) + size -= extent_size + except subprocess.CalledProcessError as e: + print(e) + break + +with open("btrfs.json", "w") as file: + # Writing data to a file + file.write(json.dumps({"size": size}))