diff --git a/app/flatpak-builtins-build-update-repo.c b/app/flatpak-builtins-build-update-repo.c index a2c5081b..c56fa2d4 100644 --- a/app/flatpak-builtins-build-update-repo.c +++ b/app/flatpak-builtins-build-update-repo.c @@ -32,6 +32,7 @@ #include "flatpak-builtins.h" #include "flatpak-utils-base-private.h" #include "flatpak-builtins-utils.h" +#include "flatpak-prune-private.h" static char *opt_title; static char *opt_comment; @@ -51,6 +52,7 @@ static char *opt_generate_delta_ref; static char *opt_gpg_homedir; static char **opt_gpg_key_ids; static gboolean opt_prune; +static gboolean opt_prune_dry_run; static gboolean opt_generate_deltas; static gboolean opt_no_update_appstream; static gboolean opt_no_update_summary; @@ -86,6 +88,7 @@ static GOptionEntry options[] = { { "static-delta-jobs", 0, 0, G_OPTION_ARG_INT, &opt_static_delta_jobs, N_("Max parallel jobs when creating deltas (default: NUMCPUs)"), N_("NUM-JOBS") }, { "static-delta-ignore-ref", 0, 0, G_OPTION_ARG_STRING_ARRAY, &opt_static_delta_ignore_refs, N_("Don't create deltas matching refs"), N_("PATTERN") }, { "prune", 0, 0, G_OPTION_ARG_NONE, &opt_prune, N_("Prune unused objects"), NULL }, + { "prune-dry-run", 0, 0, G_OPTION_ARG_NONE, &opt_prune_dry_run, N_("Prune but don't actually remove anything"), NULL }, { "prune-depth", 0, 0, G_OPTION_ARG_INT, &opt_prune_depth, N_("Only traverse DEPTH parents for each commit (default: -1=infinite)"), N_("DEPTH") }, { "generate-static-delta-from", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &opt_generate_delta_from, NULL, NULL }, { "generate-static-delta-to", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &opt_generate_delta_to, NULL, NULL }, @@ -636,15 +639,18 @@ flatpak_builtin_build_update_repo (int argc, char **argv, return FALSE; } - if (opt_prune) + if (opt_prune || opt_prune_dry_run) { gint n_objects_total; gint n_objects_pruned; guint64 objsize_total; g_autofree char *formatted_freed_size = NULL; - g_print ("Pruning old commits\n"); - if (!ostree_repo_prune (repo, OSTREE_REPO_PRUNE_FLAGS_REFS_ONLY, opt_prune_depth, + if (opt_prune_dry_run) + g_print ("Pruning old commits (dry-run)\n"); + else + g_print ("Pruning old commits\n"); + if (!flatpak_repo_prune (repo, opt_prune_depth, opt_prune_dry_run, &n_objects_total, &n_objects_pruned, &objsize_total, cancellable, error)) return FALSE; diff --git a/common/Makefile.am.inc b/common/Makefile.am.inc index 5e12e9e8..d15da089 100644 --- a/common/Makefile.am.inc +++ b/common/Makefile.am.inc @@ -167,6 +167,8 @@ libflatpak_common_la_SOURCES = \ common/flatpak-utils-http.c \ common/flatpak-utils-private.h \ common/flatpak-utils.c \ + common/flatpak-prune.c \ + common/flatpak-prune-private.h \ common/flatpak-zstd-decompressor.c \ common/flatpak-zstd-decompressor-private.h \ common/valgrind-private.h \ diff --git a/common/flatpak-prune-private.h b/common/flatpak-prune-private.h new file mode 100644 index 00000000..94ed3ffd --- /dev/null +++ b/common/flatpak-prune-private.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2021 Red Hat, Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Authors: + * Alexander Larsson + */ + +#ifndef __FLATPAK_PRUNE_H__ +#define __FLATPAK_PRUNE_H__ + +#include "flatpak-utils-private.h" + +gboolean flatpak_repo_prune (OstreeRepo *repo, + int depth, + gboolean dry_run, + int *out_objects_total, + int *out_objects_pruned, + guint64 *out_pruned_object_size_total, + GCancellable *cancellable, + GError **error); + +#endif /* __FLATPAK_PRUNE_H__ */ diff --git a/common/flatpak-prune.c b/common/flatpak-prune.c new file mode 100644 index 00000000..c9983fa6 --- /dev/null +++ b/common/flatpak-prune.c @@ -0,0 +1,829 @@ +/* + * Copyright © 2021 Red Hat, Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Authors: + * Alexander Larsson + */ + +#include "config.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "flatpak-error.h" +#include "flatpak-prune-private.h" +#include "flatpak-variant-impl-private.h" +#include "libglnx/libglnx.h" +#include "valgrind-private.h" + +/* This is a custom implementation of ostree-prune that caches the + * traversal for better performance on larger repos. It also merges the list-object + * and prune operation to avoid allocating a lot of memory for the list of all + * objects in the repo. + * + * Locking strategy: + * + * Ostree supports three kinds of approaches to handling parallel access to + * the repo. + * + * EXCLUSIVE LOCK: + * All global operations that modify the repo state take an exclusive lock on the + * repo which means no other repo-modifying operation is allowed in parallel. This + * is currently only done for pruning and summary generation. Prune for instance + * is global; it traverses from a set of root commits and assumes that everything + * that isn't reachable can be deleted, which is not compatible with adding a + * new commit that doesn't have a root commit yet. + * NOTE: Whenever objects are deleted we always hold an exclusive lock. + * + * SHARED LOCKS: + * Operations that do local modifications take a shared lock. This means we can + * have multiple such operations in parallel with each other, but not in parallel + * with an exclusive lock. The typical operation that does this is the commit. + * During a commit we don't add to the transaction objects that already exist + * in the repo, so we rely on them not disappearing because then when we finally + * move the new objects into the repo that would produce a repo that has a broken + * object reference. There is nothing that prohibits two parallel commits to the + * same branch, and doing that could cause one of the commits to be lost in the + * branch history. However, the repo as a whole will always end up valid. + * + * NOTHING: + * Operations that are purely read-only and can either succeed or + * not as a whole do nothing to protect against parallelism. Typical examples + * are checkouts or pulls from a remote client. If such an operation is started + * nothing protects the repo from removing (by e.g. prune) objects from the repo + * that will be necessary to complete the operation. However, such an issue will + * be detected by the operation. + * + * Given the above the standard approach for locking during prune should be to take + * an exclusive lock during the entire operation. However, the initial scan of the + * reachable objects of a repo can take a very long time, and blocking any new + * commits during this is not a great idea. So, to avoid this the prune operation + * does two scans of the reachable commits. One with a shared lock and then again + * with an exclusive lock. The second scan will be faster because it can ignore + * all the commits we scanned with the shared lock held, meaning we spend less + * time with an exclusive lock (during which no new commits can be added to the repo). + * + * Upgrading the shared lock to an exclusive lock is deadlock prune, as two prune + * operations could be holding the shared lock and both blocking forever to get the + * exclusive lock, so we release the lock between the phases. This means there is + * a small chance that some objects were deleted between the two phases. However, that + * will only cause the prune operation to over-estimate what objects are reachable, so + * it can never cause it to delete reachable objects. + */ + +static gboolean +ot_dfd_iter_init_allow_noent (int dfd, + const char *path, + GLnxDirFdIterator *dfd_iter, + gboolean *out_exists, + GError **error) +{ + glnx_autofd int fd = glnx_opendirat_with_errno (dfd, path, TRUE); + if (fd < 0) + { + if (errno != ENOENT) + return glnx_throw_errno_prefix (error, "opendirat"); + *out_exists = FALSE; + return TRUE; + } + if (!glnx_dirfd_iterator_init_take_fd (&fd, dfd_iter, error)) + return FALSE; + *out_exists = TRUE; + return TRUE; +} + +/* Object name helpers */ + +static guint +_ostree_object_name_hash (gconstpointer a) +{ + VarObjectNameRef ref = var_object_name_from_gvariant ((GVariant *)a); + + return g_str_hash (var_object_name_get_checksum (ref)) + (guint)var_object_name_get_objtype (ref); +} + +static gboolean +_ostree_object_name_equal (gconstpointer a, + gconstpointer b) +{ + VarObjectNameRef ref_a = var_object_name_from_gvariant ((GVariant *)a); + VarObjectNameRef ref_b = var_object_name_from_gvariant ((GVariant *)a); + + return + g_str_equal (var_object_name_get_checksum (ref_a), var_object_name_get_checksum (ref_b)) && + var_object_name_get_objtype (ref_a) == var_object_name_get_objtype (ref_b); +} + +static GHashTable * +reachable_commits_new (void) +{ + return g_hash_table_new_full (_ostree_object_name_hash, _ostree_object_name_equal, + NULL, (GDestroyNotify)g_variant_unref); +} + +/* Wrapper to handle flock vs OFD locking based on GLnxLockFile */ +static gboolean +do_repo_lock (int fd, + int flags) +{ + int res; + +#ifdef F_OFD_SETLK + struct flock fl = { + .l_type = (flags & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + + res = TEMP_FAILURE_RETRY (fcntl (fd, (flags & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl)); +#else + res = -1; + errno = EINVAL; +#endif + + /* Fallback to flock when OFD locks not available */ + if (res < 0) + { + if (errno == EINVAL) + res = TEMP_FAILURE_RETRY (flock (fd, flags)); + if (res < 0) + return FALSE; + } + + return TRUE; +} + +static gboolean +get_repo_lock (OstreeRepo *repo, + int flags, + int *out_lock_fd, + GCancellable *cancellable, + GError **error) +{ + glnx_autofd int lock_fd = -1; + + /* This re-implements a simpler (non-stacking) version of the ostree repo lock, as + the API for that is not yet available. When it is (see https://github.com/ostreedev/ostree/pull/2341) + this should be removed. + Note: This also doesn't respect the locking config options, it always locks and it always blocks. + */ + + lock_fd = TEMP_FAILURE_RETRY (openat (ostree_repo_get_dfd (repo), ".lock", + O_CREAT | O_RDWR | O_CLOEXEC, 0660)); + if (lock_fd < 0) + return glnx_throw_errno_prefix (error, + "Opening lock file %s/.lock failed", + flatpak_file_get_path_cached (ostree_repo_get_path (repo))); + + if (!do_repo_lock (lock_fd, flags)) + return glnx_throw_errno_prefix (error, "Locking repo failed (%s)", (flags & LOCK_EX) != 0 ? "exclusive" : "shared"); + + *out_lock_fd = glnx_steal_fd (&lock_fd); + return TRUE; +} + +#define _LOOSE_PATH_MAX (256) + +static inline void +get_extra_commitmeta_path (const char *commit, + char *path_buf, + gsize path_buf_len) +{ + snprintf (path_buf, path_buf_len, + "objects/%c%c/%s.commitmeta2", + commit[0], commit[1], commit + 2); +} + +static gboolean +load_extra_commitmeta (OstreeRepo *repo, + const char *commit, + GVariant **out_variant, + GCancellable *cancellable, + GError **error) +{ + char loose_path_buf[_LOOSE_PATH_MAX]; + glnx_autofd int fd = -1; + g_autoptr(GVariant) ret_variant = NULL; + g_autoptr(GError) temp_error = NULL; + + get_extra_commitmeta_path (commit, loose_path_buf, sizeof (loose_path_buf)); + + if (!glnx_openat_rdonly (ostree_repo_get_dfd (repo), loose_path_buf, FALSE, &fd, &temp_error) && + !g_error_matches (temp_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND)) + { + g_propagate_error (error, temp_error); + return FALSE; + } + + if (fd != -1) + { + g_autoptr(GBytes) content = glnx_fd_readall_bytes (fd, cancellable, error); + if (!content) + return FALSE; + ret_variant = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a{sv}"), content, TRUE)); + } + + *out_variant = g_steal_pointer (&ret_variant); + return TRUE; +} + +static gboolean +save_extra_commitmeta (OstreeRepo *repo, + const char *commit, + GVariant *variant, + GCancellable *cancellable, + GError **error) +{ + char loose_path_buf[_LOOSE_PATH_MAX]; + + get_extra_commitmeta_path (commit, loose_path_buf, sizeof (loose_path_buf)); + + if (!glnx_file_replace_contents_at (ostree_repo_get_dfd (repo), loose_path_buf, + g_variant_get_data (variant), + g_variant_get_size (variant), + GLNX_FILE_REPLACE_DATASYNC_NEW, + cancellable, error)) + return FALSE; + + return TRUE; +} + +static gboolean +remove_extra_commitmeta (OstreeRepo *repo, + const char *commit, + GCancellable *cancellable, + GError **error) +{ + char loose_path_buf[_LOOSE_PATH_MAX]; + + get_extra_commitmeta_path (commit, loose_path_buf, sizeof (loose_path_buf)); + + /* Ignore errors */ + (void) unlinkat (ostree_repo_get_dfd (repo), loose_path_buf, 0); + + return TRUE; +} + + + +/* Traverse parent commits starting at commit_checksum, and + * up to maxdepth parents (-1 for unlimited). + * + * This doesn't do any locking, so need something else to have an exclusive lock + * on the repo to avoid races with other processes modifying the repo. + */ +static gboolean +traverse_commit_parents_unlocked (OstreeRepo *repo, + const char *commit_checksum, + int maxdepth, + GHashTable *inout_checksums, + GCancellable *cancellable, + GError **error) +{ + g_autofree char *tmp_checksum = NULL; + + while (TRUE) + { + g_autoptr(GVariant) commit = NULL; + + if (!ostree_repo_load_variant_if_exists (repo, OSTREE_OBJECT_TYPE_COMMIT, + commit_checksum, &commit, + error)) + return FALSE; + + /* Just return if the parent isn't found; we do expect most + * people to have partial repositories. + */ + if (commit == NULL) + break; + + g_hash_table_add (inout_checksums, g_strdup (commit_checksum)); + + gboolean recurse = FALSE; + if (maxdepth == -1 || maxdepth > 0) + { + g_free (tmp_checksum); + tmp_checksum = ostree_commit_get_parent (commit); + if (tmp_checksum) + { + commit_checksum = tmp_checksum; + if (maxdepth > 0) + maxdepth -= 1; + recurse = TRUE; + } + } + if (!recurse) + break; + } + + return TRUE; +} + +/* We need to keep track of possibly a lot of object names (flathub has > 16 million objects atm), + * so the list of reachable objectnames need to be very compact. To handle this we use a fixed + * size array to reference the object names. The first 32 bytes is the checksum in raw form and + * the final byte is the object type. + */ + +#define FLATPAK_OSTREE_OBJECT_NAME_LEN (32 + 1) +typedef guint8 FlatpakOstreeObjectName[FLATPAK_OSTREE_OBJECT_NAME_LEN]; + +#define FLATPAK_OSTREE_OBJECT_NAME_ELEMENT_TYPE "(yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy)" /* 32 + 1 bytes, is a fixed type */ + +static void +flatpak_ostree_object_name_serialize (FlatpakOstreeObjectName *name, + const char *checksum, + OstreeObjectType objtype) +{ + ostree_checksum_inplace_to_bytes (checksum, &(*name)[0]); + g_assert (objtype < 255); + (*name)[32] = (guint8) objtype; +} + +static gint +flatpak_ostree_name_compare (const FlatpakOstreeObjectName *name_a, + const FlatpakOstreeObjectName *name_b) +{ + return memcmp (name_a, name_b, sizeof (FlatpakOstreeObjectName)); +} + +static guint +flatpak_ostree_object_name_hash (gconstpointer a) +{ + const FlatpakOstreeObjectName *name = a; + const guint8 *data = &(*name)[0]; + + /* The checksum is essentially all random, so any 4 bytes of it should + be a good hash value. However, we avoid using the first ones, because + those are the ones that will be first compared on a hash collision, + so if they were always the same that would waste 4 comparisons. */ + return + data[32] | + data[31] << 8 | + data[30] << 16 | + data[29] << 24; +} + +static gboolean +flatpak_ostree_object_name_equal (gconstpointer a, + gconstpointer b) +{ + const FlatpakOstreeObjectName *name_a = a; + const FlatpakOstreeObjectName *name_b = b; + + return flatpak_ostree_name_compare (name_a, name_b) == 0; +} + +/* This is a container for allocating FlatpakOstreeObjectNames in chunks without relocations so + * that the resulting pointers are stable and can be stored in e.g. a hashtable. + * Storing the names in chunks like this means we avoid fragmentation and overhead related to + * each individual name which is important as we can have millions of object names in a repo. + */ + +#define BAG_CHUNK_SIZE 1985 /* nr of objects per chunk in bag, makes chunk fit in 64k with some spare for overhead */ +typedef struct { + FlatpakOstreeObjectName *current_chunk; /* Null if non started */ + gsize current_chunk_used; /* number of used objects in current chunk */ + GSList *chunks; /* List of allocated chunks */ + GHashTable *hash; /* (element-type FlatpakOstreeObjectName) */ +} FlatpakOstreeObjectNameBag; + +static FlatpakOstreeObjectNameBag * +object_name_bag_new (void) +{ + FlatpakOstreeObjectNameBag *bag = g_new0 (FlatpakOstreeObjectNameBag, 1); + + bag->hash = g_hash_table_new_full (flatpak_ostree_object_name_hash, flatpak_ostree_object_name_equal, + NULL, NULL); + + return bag; +} + +static void +object_name_bag_free (FlatpakOstreeObjectNameBag *bag) +{ + g_hash_table_unref (bag->hash); + g_slist_free_full (bag->chunks, g_free); + g_free (bag); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC (FlatpakOstreeObjectNameBag, object_name_bag_free) + +static gboolean +object_name_bag_contains (FlatpakOstreeObjectNameBag *bag, + const FlatpakOstreeObjectName *name) +{ + return g_hash_table_contains (bag->hash, name); +} + +static void +object_name_bag_insert (FlatpakOstreeObjectNameBag *bag, + const FlatpakOstreeObjectName *name) +{ + FlatpakOstreeObjectName *res; + + if (g_hash_table_contains (bag->hash, name)) + return; + + if (bag->current_chunk == NULL) + { + bag->current_chunk = g_new (FlatpakOstreeObjectName, BAG_CHUNK_SIZE); + bag->current_chunk_used = 0; + bag->chunks = g_slist_prepend (bag->chunks, bag->current_chunk); + } + + res = &bag->current_chunk[bag->current_chunk_used++]; + memcpy (res, name, sizeof (FlatpakOstreeObjectName)); + + if (bag->current_chunk_used == BAG_CHUNK_SIZE) + bag->current_chunk = NULL; /* Need new chunk */ + + g_hash_table_add (bag->hash, res); +} + +/* Find all reachable commit objects starting from any ref in the repo + * optionally limiting the number of parent commits. + * + * This doesn't do any locking, so need something else to have an exclusive lock + * on the repo to avoid races with other processes modifying the repo. + */ +static gboolean +traverse_reachable_refs_unlocked (OstreeRepo *repo, + guint depth, + FlatpakOstreeObjectNameBag *reachable, + GCancellable *cancellable, + GError **error) +{ + g_autoptr(GHashTable) all_refs = NULL; /* (element-type utf8 utf8) */ + g_autoptr(GHashTable) all_collection_refs = NULL; /* (element-type OstreeChecksumRef utf8) */ + g_autoptr(GHashTable) checksums = NULL; /* (element-type const char *) */ + + checksums = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); + + /* Get all commits up to depth from the regular refs */ + if (!ostree_repo_list_refs (repo, NULL, &all_refs, + cancellable, error)) + return FALSE; + + GLNX_HASH_TABLE_FOREACH_V (all_refs, const char*, checksum) + { + if (!traverse_commit_parents_unlocked (repo, checksum, depth, checksums, cancellable, error)) + return FALSE; + } + + /* Get all commits up to depth from the collection refs */ + if (!ostree_repo_list_collection_refs (repo, NULL, &all_collection_refs, + OSTREE_REPO_LIST_REFS_EXT_EXCLUDE_REMOTES, cancellable, error)) + return FALSE; + + GLNX_HASH_TABLE_FOREACH_V (all_collection_refs, const char*, checksum) + { + if (!traverse_commit_parents_unlocked (repo, checksum, depth, checksums, cancellable, error)) + return FALSE; + } + + /* Find reachable objects from each commit checksum */ + GLNX_HASH_TABLE_FOREACH_V (checksums, const char*, checksum) + { + g_autoptr(GVariant) extra_commitmeta = NULL; + g_autoptr(GVariant) commit_reachable = NULL; + FlatpakOstreeObjectName commit_name; + + /* Early bail-out if we already scanned this commit in the first phase (or via some other branch) */ + flatpak_ostree_object_name_serialize (&commit_name, checksum, OSTREE_OBJECT_TYPE_COMMIT); + if (object_name_bag_contains (reachable, &commit_name)) + continue; + + flatpak_debug2 ("Finding objects to keep for commit %s", checksum); + + if (!load_extra_commitmeta (repo, checksum, &extra_commitmeta, cancellable, error)) + return FALSE; + + if (extra_commitmeta) + commit_reachable = g_variant_lookup_value (extra_commitmeta, "xa.reachable", G_VARIANT_TYPE ("a" FLATPAK_OSTREE_OBJECT_NAME_ELEMENT_TYPE)); + + if (commit_reachable == NULL) + { + g_autoptr(GHashTable) commit_reachable_ht = reachable_commits_new (); + g_autoptr(GVariant) new_extra_commitmeta = NULL; + g_autofree FlatpakOstreeObjectName *commit_reachable_raw = NULL; + FlatpakOstreeObjectName *next_commit_reachable_raw; + g_auto(GVariantDict) extra_commitmeta_builder = FLATPAK_VARIANT_BUILDER_INITIALIZER; + OstreeRepoCommitState commitstate = 0; + g_autoptr(GError) local_error = NULL; + + if (!ostree_repo_load_commit (repo, checksum, NULL, &commitstate, &local_error) && + !g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND)) + { + g_propagate_error (error, g_steal_pointer (&local_error)); + return FALSE; + } + + if (!ostree_repo_traverse_commit_union (repo, checksum, 0, commit_reachable_ht, + cancellable, error)) + return FALSE; + + commit_reachable_raw = g_new (FlatpakOstreeObjectName, g_hash_table_size (commit_reachable_ht)); + + next_commit_reachable_raw = &commit_reachable_raw[0]; + GLNX_HASH_TABLE_FOREACH_V (commit_reachable_ht, GVariant *, reachable_commit) + { + VarObjectNameRef ref = var_object_name_from_gvariant ((GVariant *)reachable_commit); + + flatpak_ostree_object_name_serialize (next_commit_reachable_raw, + var_object_name_get_checksum (ref), + var_object_name_get_objtype (ref)); + next_commit_reachable_raw++; + } + + commit_reachable = g_variant_ref_sink (g_variant_new_fixed_array (G_VARIANT_TYPE (FLATPAK_OSTREE_OBJECT_NAME_ELEMENT_TYPE), + commit_reachable_raw, + g_hash_table_size (commit_reachable_ht), + sizeof(FlatpakOstreeObjectName))); + + /* Don't save the reachable set for later reuse if the commit is partial, as it may not be complete */ + if ((commitstate & OSTREE_REPO_COMMIT_STATE_PARTIAL) == 0) + { + g_variant_dict_init (&extra_commitmeta_builder, extra_commitmeta); + g_variant_dict_insert_value (&extra_commitmeta_builder, "xa.reachable", commit_reachable); + + new_extra_commitmeta = g_variant_ref_sink (g_variant_dict_end (&extra_commitmeta_builder)); + if (!save_extra_commitmeta (repo, checksum, new_extra_commitmeta, cancellable, error)) + return FALSE; + } + } + + { + gsize n_reachable, i; + const FlatpakOstreeObjectName *reachable_objects = + g_variant_get_fixed_array (commit_reachable, &n_reachable, + sizeof(FlatpakOstreeObjectName)); + + for (i = 0; i < n_reachable; i++) + object_name_bag_insert (reachable, &reachable_objects[i]); + } + } + + return TRUE; +} + +typedef struct { + OstreeRepo *repo; + FlatpakOstreeObjectNameBag *reachable; + gboolean dont_prune; + guint n_reachable; + guint n_unreachable; + guint64 freed_bytes; +} OtPruneData; + +static gboolean +prune_loose_object (OtPruneData *data, + const char *checksum, + OstreeObjectType objtype, + GCancellable *cancellable, + GError **error) +{ + guint64 storage_size = 0; + + flatpak_debug2 ("Pruning unneeded object %s.%s", checksum, + ostree_object_type_to_string (objtype)); + + if (!ostree_repo_query_object_storage_size (data->repo, objtype, checksum, + &storage_size, cancellable, error)) + return FALSE; + + data->freed_bytes += storage_size; + data->n_unreachable++; + + if (!data->dont_prune) + { + if (objtype == OSTREE_OBJECT_TYPE_COMMIT) + { + if (!remove_extra_commitmeta (data->repo, checksum, cancellable, error)) + return FALSE; + + if (!ostree_repo_mark_commit_partial (data->repo, checksum, FALSE, error)) + return FALSE; + } + + if (!ostree_repo_delete_object (data->repo, objtype, checksum, + cancellable, error)) + return FALSE; + } + + return TRUE; +} + +static gboolean +prune_unreachable_loose_objects_at (OstreeRepo *self, + OtPruneData *data, + int dfd, + const char *prefix, + GCancellable *cancellable, + GError **error) +{ + + g_auto(GLnxDirFdIterator) dfd_iter = { 0, }; + gboolean exists; + if (!ot_dfd_iter_init_allow_noent (dfd, prefix, &dfd_iter, &exists, error)) + return FALSE; + /* Note early return */ + if (!exists) + return TRUE; + + while (TRUE) + { + struct dirent *dent; + FlatpakOstreeObjectName key; + + if (!glnx_dirfd_iterator_next_dent (&dfd_iter, &dent, cancellable, error)) + return FALSE; + if (dent == NULL) + break; + + const char *name = dent->d_name; + if (strcmp (name, ".") == 0 || + strcmp (name, "..") == 0) + continue; + + const char *dot = strrchr (name, '.'); + if (!dot) + continue; + + OstreeObjectType objtype; + + if (strcmp (dot, ".filez") == 0) + objtype = OSTREE_OBJECT_TYPE_FILE; + else if (strcmp (dot, ".dirtree") == 0) + objtype = OSTREE_OBJECT_TYPE_DIR_TREE; + else if (strcmp (dot, ".dirmeta") == 0) + objtype = OSTREE_OBJECT_TYPE_DIR_META; + else if (strcmp (dot, ".commit") == 0) + objtype = OSTREE_OBJECT_TYPE_COMMIT; + else /* No need to handle payload links, they don't happen in archive repos and we call the ostree prune for all other repos */ + continue; + + if ((dot - name) != 62) + continue; + + char buf[OSTREE_SHA256_STRING_LEN+1]; + + memcpy (buf, prefix+8, 2); + memcpy (buf + 2, name, 62); + buf[sizeof(buf)-1] = '\0'; + + flatpak_ostree_object_name_serialize (&key, buf, objtype); + if (object_name_bag_contains (data->reachable, &key)) + { + data->n_reachable++; + continue; + } + + if (!prune_loose_object (data, buf, objtype, cancellable, error)) + return FALSE; + } + + return TRUE; +} + +static gboolean +prune_unreachable_loose_objects (OstreeRepo *self, + OtPruneData *data, + GCancellable *cancellable, + GError **error) +{ + static const gchar hexchars[] = "0123456789abcdef"; + int dfd = ostree_repo_get_dfd (self); + + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); + + for (guint c = 0; c < 256; c++) + { + char buf[] = "objects/XX"; + buf[8] = hexchars[c >> 4]; + buf[9] = hexchars[c & 0xF]; + + if (!prune_unreachable_loose_objects_at (self, data, dfd, buf, cancellable, error)) + return FALSE; + } + + return TRUE; +} + +gboolean +flatpak_repo_prune (OstreeRepo *repo, + int depth, + gboolean dry_run, + int *out_objects_total, + int *out_objects_pruned, + guint64 *out_pruned_object_size_total, + GCancellable *cancellable, + GError **error) +{ + g_autoptr(FlatpakOstreeObjectNameBag) reachable = object_name_bag_new (); + OtPruneData data = { 0, }; + g_autoptr(GTimer) timer = NULL; + + /* This version only handles archive repos, if called for something else call ostree */ + if (ostree_repo_get_mode (repo) != OSTREE_REPO_MODE_ARCHIVE) + { + OstreeRepoPruneFlags flags = OSTREE_REPO_PRUNE_FLAGS_REFS_ONLY; + if (dry_run) + flags |= OSTREE_REPO_PRUNE_FLAGS_NO_PRUNE; + + return ostree_repo_prune (repo, flags, depth, + out_objects_total, out_objects_pruned, out_pruned_object_size_total, + cancellable, error); + } + + { + /* shared lock in this region, see locking strategy above */ + glnx_autofd int lock_fd = -1; + + if (!get_repo_lock (repo, LOCK_SH, &lock_fd, cancellable, error)) + return FALSE; + + timer = g_timer_new (); + g_debug ("Finding reachable objects, unlocked (depth=%d)", depth); + g_timer_start (timer); + + if (!traverse_reachable_refs_unlocked (repo, depth, reachable, cancellable, error)) + return FALSE; + + g_timer_stop (timer); + g_debug ("Elapsed time: %.1f sec", g_timer_elapsed (timer, NULL)); + } + + { + /* exclusive lock in this region, see locking strategy above */ + glnx_autofd int lock_fd = -1; + + if (!get_repo_lock (repo, LOCK_EX, &lock_fd, cancellable, error)) + return FALSE; + + timer = g_timer_new (); + g_debug ("Finding reachable objects, locked (depth=%d)", depth); + g_timer_start (timer); + + if (!traverse_reachable_refs_unlocked (repo, depth, reachable, cancellable, error)) + return FALSE; + + data.repo = repo; + data.reachable = reachable; + data.dont_prune = dry_run; + + g_timer_stop (timer); + g_debug ("Elapsed time: %.1f sec", g_timer_elapsed (timer, NULL)); + + g_debug ("Pruning unreachable objects"); + g_timer_start (timer); + + if (!prune_unreachable_loose_objects (repo, &data, cancellable, error)) + return FALSE; + + g_timer_stop (timer); + g_debug ("Elapsed time: %.1f sec", g_timer_elapsed (timer, NULL)); + } + + /* Prune static deltas outside lock to avoid conflict with its exclusive lock */ + if (!dry_run) + { + g_debug ("Pruning static deltas"); + g_timer_start (timer); + + if (!ostree_repo_prune_static_deltas (repo, NULL, cancellable, error)) + return FALSE; + + g_timer_stop (timer); + g_debug ("Elapsed time: %.1f sec", g_timer_elapsed (timer, NULL)); + } + + *out_objects_total = data.n_reachable + data.n_unreachable; + *out_objects_pruned = data.n_unreachable; + *out_pruned_object_size_total = data.freed_bytes; + return TRUE; +} +