From 20e078d2d7e3f6543fac079f96339f849071c565 Mon Sep 17 00:00:00 2001 From: James Rich <2199651+jamesarich@users.noreply.github.com> Date: Wed, 22 Apr 2026 16:22:42 -0500 Subject: [PATCH] fix(ble): cleanup races discovered while reviewing #5207 (#5221) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../meshtastic/core/ble/KableBleConnection.kt | 24 +++++++++++++++---- .../core/network/radio/BleRadioTransport.kt | 14 ++++++++++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/KableBleConnection.kt b/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/KableBleConnection.kt index bfc963fc4..a3808d3e6 100644 --- a/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/KableBleConnection.kt +++ b/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/KableBleConnection.kt @@ -139,10 +139,17 @@ class KableBleConnection(private val scope: CoroutineScope) : BleConnection { meshtasticDevice.advertisement?.let { adv -> Peripheral(adv) { commonConfig() } } ?: createPeripheral(device.address) { commonConfig() } - cleanUpPeripheral(device.address) - peripheral = p - - ActiveBleConnection.active = ActiveConnection(p, device.address) + // Install ownership of the new peripheral atomically. Cancellation between + // peripheral construction and field assignment would strand `p` (Kable allocates + // a per-peripheral scope + Bluetooth-state observer eagerly), so the cleanup, + // assignment, and ActiveBleConnection update must complete as a single unit. + // _deviceFlow.emit() is intentionally outside this block — making it + // non-cancellable could hang teardown on a slow collector. + withContext(NonCancellable) { + cleanUpPeripheral(device.address) + peripheral = p + ActiveBleConnection.active = ActiveConnection(p, device.address) + } _deviceFlow.emit(device) @@ -212,11 +219,18 @@ class KableBleConnection(private val scope: CoroutineScope) : BleConnection { stateJob?.cancel() stateJob = null + // Capture the peripheral we own before clearing it so we can identity-check + // ActiveBleConnection below. A stale disconnect from an earlier connection + // attempt's exception handler must not clobber a newer connection that has + // already installed itself as active. + val owned = peripheral safeClosePeripheral("disconnect") peripheral = null connectionScope = null - ActiveBleConnection.active = null + if (owned != null && ActiveBleConnection.active?.peripheral === owned) { + ActiveBleConnection.active = null + } _deviceFlow.emit(null) } diff --git a/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt b/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt index 40adf41e2..9f1b530a8 100644 --- a/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt +++ b/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt @@ -103,9 +103,17 @@ class BleRadioTransport( internal val address: String, ) : RadioTransport { + // Detached cleanup scope for last-ditch GATT teardown from the exception handler. + // Must NOT be a child of `scope`: when an uncaught exception fires in connectionScope, + // upper layers often tear down `scope` immediately. Launching cleanup on `scope` then + // races the cancellation and may never start, leaking BluetoothGatt (status 133 on + // the next reconnect). This scope is cancelled in close() once our own disconnect + // has completed and the safety net is no longer needed. + private val cleanupScope: CoroutineScope = CoroutineScope(SupervisorJob() + scope.coroutineContext.minusKey(Job)) + private val exceptionHandler = CoroutineExceptionHandler { _, throwable -> Logger.w(throwable) { "[$address] Uncaught exception in connectionScope" } - scope.launch { + cleanupScope.launch { try { bleConnection.disconnect() } catch (e: Exception) { @@ -427,6 +435,10 @@ class BleRadioTransport( Logger.w(e) { "[$address] Failed to disconnect in close()" } } } + // Our own disconnect succeeded — the exception-handler safety net is no longer + // needed. Cancel the detached cleanup scope so it doesn't outlive us in tests + // or process lifetime. + cleanupScope.cancel("close() called") } private fun dispatchPacket(packet: ByteArray) {