From 26f629068793026446c17ac57108a32db00ce7ec Mon Sep 17 00:00:00 2001 From: Harald Sitter Date: Tue, 5 Nov 2024 19:46:58 +0100 Subject: [PATCH] shrink btrfs to the actually necessary size this doesn't yet give any gains because partition resizing also needs figuring out, but it's a start. specifically we now deduplicate files, balance extents and shrink the actual filesystem as much as possible. in a second step we could then chop off the end of the partition table to actually reduce the image size. except it is more difficult than one might think. perhaps we should construct a new table entirely instead. something to figure out --- bootstrap.sh | 2 +- btrfs-send-receive.sh | 20 +++++++++++++++++++- btrfs-shrink.py | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 btrfs-shrink.py diff --git a/bootstrap.sh b/bootstrap.sh index 713677a..5c3fd4c 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -18,7 +18,7 @@ pacman-key --init pacman --sync --refresh --noconfirm --sysupgrade pacman --sync --refresh --noconfirm mkosi git base-devel ukify vim cpio tree \ rsync btrfs-progs s3cmd dosfstools qemu-img erofs-utils squashfs-tools go openssh \ - compsize + compsize duperemove # Use mkosi from git to not have to wait for releases when things break. # OTOH things may break in git. So which version is used may change over time. diff --git a/btrfs-send-receive.sh b/btrfs-send-receive.sh index 4dedd60..bf80595 100755 --- a/btrfs-send-receive.sh +++ b/btrfs-send-receive.sh @@ -25,7 +25,7 @@ cleanup() { trap cleanup INT TERM EXIT cleanup -mount -o remount,compress-force=zstd:8 "$PWD" +mount -o remount,compress-force=zstd:15 "$PWD" btrfs property set . compression zstd btrfs balance start --force -mconvert=single -dconvert=single . @@ -44,4 +44,22 @@ btrfs subvolume snapshot -r "$EXPORT.live" "@live" btrfs subvolume delete "$EXPORT.live" rm -f "$OUTPUT_ABS.btrfs.live" +# Finally let's condense the data. +btrfs filesystem usage . +## Use duperemove to deduplicate files. +## I would also love to use bees here as it works on extents but we don't know when it is done :( https://github.com/Zygo/bees/issues/279 +duperemove -dr . +## Balance the filesystem with ever increasing chunk sizes to maximize space efficiency. +btrfs balance start --force -mconvert=single -dconvert=single . +btrfs balance start --force -dusage=16 . +btrfs balance start --force -dusage=32 . +btrfs balance start --force -dusage=64 . +## And to finish things off we shrink the filesystem to the minimum size. +./btrfs-shrink.py +mv btrfs.json "$(dirname "$OUTPUT_ABS")/btrfs.json" +## Sync changes to disk. +btrfs filesystem sync . +# Final report. +btrfs filesystem usage . + ln -svf "@$OUTPUT" "$ID" diff --git a/btrfs-shrink.py b/btrfs-shrink.py new file mode 100644 index 0000000..bb77be0 --- /dev/null +++ b/btrfs-shrink.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +# SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL +# SPDX-FileCopyrightText: 2024 Harald Sitter + +# Shrink btrfs. It's a bit awkward because we don't really have a reliable way +# to calculate how much space we actually need. So we first chop off a dynamic +# portion but leave a bit of a buffer behind. Then we keep resizing until the +# resize starts failing. + +import json +import subprocess +from subprocess import check_output + +out = check_output(["btrfs", "--format", "json", "filesystem", "df", "."]) +data = json.loads(out) +df = data["filesystem-df"] + +size = 0 +for block_group in df: + size += block_group["total"] + +# Give 512MiB of buffer space. We'll shrink from there in smaller steps. +size += 512 * 1024 * 1024 + +subprocess.run(["btrfs", "filesystem", "resize", str(size), "."]) + +# With compression one extent is always 128KiB as per btrfs documentation. +extent_size = 128 * 1024 +while True: + try: + subprocess.run(["btrfs", "filesystem", "resize", f"-{extent_size}", "."], stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL, check=True) + size -= extent_size + except subprocess.CalledProcessError as e: + print(e) + break + +with open("btrfs.json", "w") as file: + # Writing data to a file + file.write(json.dumps({"size": size}))