portal: Make the SpawnStarted code more robust

An interesting side effect of #3770 was that the portal would loop forever, waiting for a process to come up every 100ms. This isn't really ideal; of course, *ideally* nothing would hang, but in practice this can happen in unusual cases, and spamming the logs every 100ms when it does isn't terribly ideal. Now, if the process is not running after around 2 seconds, the repeat timer is changed to a full second. This isn't perfect, but it would help prevent bizarre problems becoming even more problematic. (cherry picked from commit 6d3b30dc9a)
2026-04-03 06:47:19 -04:00 · 2020-08-02 02:47:40 -05:00
parent f4c3ea5e97
commit a3b562eaa8
1 changed files with 35 additions and 9 deletions
--- a/portal/flatpak-portal.c
+++ b/portal/flatpak-portal.c
@@ -60,6 +60,9 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC (PortalFlatpakUpdateMonitorSkeleton, g_object_unre

 #define IDLE_TIMEOUT_SECS 10 * 60

+/* Should be roughly 2 seconds */
+#define CHILD_STATUS_CHECK_ATTEMPTS 20
+
 static GHashTable *client_pid_data_hash = NULL;
 static GDBusConnection *session_bus = NULL;
 static GNetworkMonitor *network_monitor = NULL;
@@ -266,7 +269,7 @@ typedef struct
 {
  FlatpakInstance *instance;
  guint            pid;
-  guint            timeout_index;
+  guint            attempt;
 } BwrapinfoWatcherData;

 static void
@@ -345,7 +348,10 @@ check_child_pid_status (void *user_data)
 {
  /* Stores a sequence of the time interval to use until the child PID is checked again.
     In general from testing, bwrapinfo is never ready before 25ms have passed at minimum,
-     thus 25ms is the first interval, doubling until a max interval of 100ms is reached. */
+     thus 25ms is the first interval, doubling until a max interval of 100ms is reached.
+
+     In addition, if the program is not available after 100ms for an extended period of time,
+     the timeout is further increased to a full second. */
  static gint timeouts[] = {25, 50, 100};

  g_autoptr(GVariant) signal_variant = NULL;
@@ -370,15 +376,35 @@ check_child_pid_status (void *user_data)
  child_pid = flatpak_instance_get_child_pid (data->instance);
  if (child_pid == 0)
    {
-      g_debug ("Failed to read child PID, trying again in %d ms", timeouts[data->timeout_index]);
+      gint timeout;
+      gboolean readd_timer = FALSE;

-      if (data->timeout_index == G_N_ELEMENTS (timeouts))
-        /* Already at the max timeout, no need to change. */
-        return G_SOURCE_CONTINUE;
+      if (data->attempt >= CHILD_STATUS_CHECK_ATTEMPTS)
+        /* If too many attempts, use a 1 second timeout */
+        timeout = 1000;
+      else
+        timeout = timeouts[MIN (data->attempt, G_N_ELEMENTS (timeouts) - 1)];

-      /* Re-add with the next timeout to use. */
-      g_timeout_add (timeouts[data->timeout_index++], check_child_pid_status, g_steal_pointer (&data));
-      return G_SOURCE_REMOVE;
+      g_debug ("Failed to read child PID, trying again in %d ms", timeout);
+
+      /* The timer source only needs to be re-added if the timeout has changed,
+          which won't happen while staying on the 100 or 1000ms timeouts.
+
+          This test must happen *before* the attempt counter is incremented, since the
+          attempt counter represents the *current* timeout. */
+      readd_timer = data->attempt <= G_N_ELEMENTS (timeouts) || data->attempt == CHILD_STATUS_CHECK_ATTEMPTS;
+      data->attempt++;
+
+      /* Make sure the data isn't destroyed */
+      data = NULL;
+
+      if (readd_timer)
+        {
+          g_timeout_add (timeout, check_child_pid_status, user_data);
+          return G_SOURCE_REMOVE;
+        }
+
+      return G_SOURCE_CONTINUE;
    }

  /* Only send the child PID if it's exposed */