diff --git a/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/MeshtasticBleDevice.kt b/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/MeshtasticBleDevice.kt index 177fa95db..0e2cb7140 100644 --- a/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/MeshtasticBleDevice.kt +++ b/core/ble/src/commonMain/kotlin/org/meshtastic/core/ble/MeshtasticBleDevice.kt @@ -28,9 +28,10 @@ import kotlinx.coroutines.flow.asStateFlow * When created from a live BLE scan, [advertisement] is populated and used for optimal peripheral construction via * `Peripheral(advertisement)` with a direct (non-autoConnect) connection attempt. * - * Bonded-only devices (address only, [advertisement] null) can still be constructed — e.g. for display in the device - * list — but [BleRadioTransport.findDevice] requires a fresh advertisement before connecting and will throw - * [RadioNotConnectedException] rather than returning a stale handle. + * Bonded-only devices (address only, [advertisement] null) can still be constructed for display in the device list and + * as a bounded fallback when fresh scans miss. In that fallback, Kable creates an address-only peripheral so Android + * can wait for the device to advertise through its `autoConnect` path while the transport keeps the connection attempt + * bounded. * * @param address The device's MAC address (or platform identifier string). * @param name The device's display name, if known. diff --git a/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt b/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt index a9130e4bd..eb5ed92f2 100644 --- a/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt +++ b/core/network/src/commonMain/kotlin/org/meshtastic/core/network/radio/BleRadioTransport.kt @@ -78,20 +78,19 @@ private val CONNECTION_TIMEOUT = 15.seconds */ private val HEARTBEAT_DRAIN_DELAY = 200.milliseconds -private val TARGETED_SCAN_TIMEOUT = 2.seconds +internal val TARGETED_SCAN_TIMEOUT = 2.seconds /** * Bounded scan duration used by both discovery paths in [findDevice]: * - Bonded escalation: after the 2s [TARGETED_SCAN_TIMEOUT] misses the low-duty advertisement window, this is the - * single bounded retry before declaring the bonded device unreachable. + * single bounded retry before falling back to the bonded handle. * - Non-bonded retry: each of the [SCAN_RETRY_COUNT] attempts uses this duration. * - * 5s covers multiple advertising intervals for typical BLE power-save slots (~1–2s each) while keeping the total bonded - * scan window bounded (~7s scan, ~10s total attempt including the existing reconnect settle delay) so - * [BleReconnectPolicy] iterates on its normal schedule instead of parking on [CONNECTION_TIMEOUT] (15s) via an - * `autoConnect=true` hang on a stale handle. + * 5s covers multiple advertising intervals for typical BLE power-save slots (~1–2s each). If the bounded bonded scan + * window still misses, [findDevice] falls back to the bonded handle and [attemptConnection] keeps that patient + * `autoConnect` path bounded through [CONNECTION_TIMEOUT]. */ -private val SCAN_TIMEOUT = 5.seconds +internal val SCAN_TIMEOUT = 5.seconds private val GATT_CLEANUP_TIMEOUT = 5.seconds /** @@ -246,18 +245,21 @@ class BleRadioTransport( } // Escalation: radio may be in a low-duty advertising slot. Try one bounded SCAN_TIMEOUT scan. - Logger.i { "[${address.anonymize()}] Targeted scan missed; escalating to bounded scan before giving up" } + Logger.i { "[${address.anonymize()}] Targeted scan missed; escalating to bounded scan before fallback" } scanForFreshDevice(SCAN_TIMEOUT)?.let { Logger.i { "[${address.anonymize()}] Fresh advertisement found during escalated scan" } return it } - // Never fall back to the stale bonded handle — that path forces autoConnect=true and hangs for - // CONNECTION_TIMEOUT (15s) before throwing. Bounded scan window (~7s scan, ~10s total attempt - // including the existing reconnect settle delay) lets the reconnect policy iterate on its - // normal schedule instead of parking on the autoConnect hang. - Logger.w { "[${address.anonymize()}] No fresh advertisement within $SCAN_TIMEOUT; reporting unreachable" } - throw RadioNotConnectedException("Bonded device is not advertising") + // If both scans miss, fall back to the bonded handle. Bonded-only devices have no fresh advertisement, so + // Kable uses autoConnect=true and Android can patiently wait for the device to advertise again. + // This remains bounded by CONNECTION_TIMEOUT in connectAndAwait(), after which BleReconnectPolicy owns + // retry/backoff. + Logger.w { + "[${address.anonymize()}] No fresh advertisement within $SCAN_TIMEOUT; " + + "falling back to bonded handle for bounded autoConnect" + } + return bondedDevice } // Non-bonded path: preserve existing retry behavior (SCAN_RETRY_COUNT attempts at SCAN_TIMEOUT). @@ -279,8 +281,7 @@ class BleRadioTransport( * * One scan attempt only — no retry, no backoff. Both bonded and non-bonded paths in [findDevice] share this * primitive so retry policy stays centralized: - * - Bonded: escalated from [TARGETED_SCAN_TIMEOUT] to [SCAN_TIMEOUT] before [findDevice] declares the device - * unreachable. + * - Bonded: escalated from [TARGETED_SCAN_TIMEOUT] to [SCAN_TIMEOUT] before [findDevice] returns the bonded handle. * - Non-bonded: [SCAN_RETRY_COUNT] attempts at [SCAN_TIMEOUT] with [SCAN_RETRY_DELAY] between attempts. * * The outer [withTimeoutOrNull] is binding: the scanner receives [timeout] as a hint, but this coroutine resumes on diff --git a/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportReconnectCrashTest.kt b/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportReconnectCrashTest.kt index 50b924c95..abd13e7ed 100644 --- a/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportReconnectCrashTest.kt +++ b/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportReconnectCrashTest.kt @@ -59,6 +59,15 @@ import kotlin.test.assertEquals import kotlin.test.assertTrue import kotlin.time.Duration +private const val BONDED_FALLBACK_CONNECT_BUFFER_MILLIS = 1_000L +private val targetedScanTimeoutMillis = TARGETED_SCAN_TIMEOUT.inWholeMilliseconds +private val escalatedScanTimeoutMillis = SCAN_TIMEOUT.inWholeMilliseconds +private val bondedReconnectWindowMillis = + BleReconnectPolicy.DEFAULT_SETTLE_DELAY.inWholeMilliseconds + + targetedScanTimeoutMillis + + escalatedScanTimeoutMillis + + BONDED_FALLBACK_CONNECT_BUFFER_MILLIS + /** * Tests covering the BLE reconnect crash fixes in [BleRadioTransport]: * 1. **CancellationException / GATT 133 fix**: [discoverServicesAndSetupCharacteristics] previously had a bare `catch @@ -1037,8 +1046,8 @@ class BleRadioTransportReconnectCrashTest { * advertising window. The escalation path (TARGETED_SCAN_TIMEOUT → SCAN_TIMEOUT) must surface a freshly-scanned * device. * - * Sequenced scanner: call 0 (2s targeted) returns empty; call 1 (5s escalated) emits a fresh `FakeBleDevice` for - * the bonded address. The transport must connect using the fresh scanned device, never the stale bonded handle. + * Sequenced scanner: call 0 (targeted) returns empty; call 1 (escalated) emits a fresh `FakeBleDevice` for the + * bonded address. The transport must connect using the fresh scanned device, never the stale bonded handle. */ @Test fun `bonded device missed by targeted scan is found by escalated scan`() = runTest { @@ -1048,7 +1057,7 @@ class BleRadioTransportReconnectCrashTest { val freshDevice = FakeBleDevice(address = address, name = "Fresh Scan Result") val sequencedScanner = SequencedBleScanner().apply { - responses += { flow { awaitCancellation() } } // call 0: targeted scan misses (runs until 2s timeout) + responses += { flow { awaitCancellation() } } // call 0: targeted scan misses responses += { flow { emit(freshDevice) } } // call 1: escalated scan emits fresh device } @@ -1069,22 +1078,21 @@ class BleRadioTransportReconnectCrashTest { try { bleTransport.start() - // 2s targeted scan + immediate escalated emit + connect/handshake settle. - advanceTimeBy(8_000L) + advanceTimeBy(bondedReconnectWindowMillis) assertTrue( sequencedScanner.callCount >= 2, "Expected at least 2 scan calls (targeted + escalated); got ${sequencedScanner.callCount}", ) assertEquals( - 2_000L, + targetedScanTimeoutMillis, sequencedScanner.calls[0].timeout.inWholeMilliseconds, - "First scan must be targeted (2s)", + "First scan must use TARGETED_SCAN_TIMEOUT", ) assertEquals( - 5_000L, + escalatedScanTimeoutMillis, sequencedScanner.calls[1].timeout.inWholeMilliseconds, - "Second scan must be escalated (5s)", + "Second scan must use SCAN_TIMEOUT", ) assertEquals(SERVICE_UUID, sequencedScanner.calls[0].serviceUuid, "First scan must include service UUID") assertEquals(address, sequencedScanner.calls[0].address, "First scan must target selected address") @@ -1104,24 +1112,27 @@ class BleRadioTransportReconnectCrashTest { } /** - * Critical regression: when the bonded device is not advertising at all, [findDevice] must throw - * [RadioNotConnectedException] after the bounded scan window (~7s scan, ~10s total attempt including the existing - * reconnect settle delay) and must not fall back to the stale bonded handle. + * Regression: when the bonded device misses both fresh scan windows, [findDevice] must fall back to the bonded + * handle so Android's autoConnect path can patiently wait for the radio to advertise again. * - * Before the fix, the stale handle forced `autoConnect=true` and hung for [CONNECTION_TIMEOUT] (15s) before the - * reconnect loop could iterate. This test proves `connectAndAwait` is never invoked against the stale handle. + * The fallback remains bounded by [CONNECTION_TIMEOUT] in `connectAndAwait`, after which the reconnect policy owns + * the next retry/backoff iteration. */ @Test - fun `bonded device never advertising results in zero connect calls`() = runTest { + fun `bonded device never advertising falls back to bounded autoConnect`() = runTest { val bondedDevice = FakeBleDevice(address = address, name = "Bonded Handle") bluetoothRepository.bond(bondedDevice) val sequencedScanner = SequencedBleScanner().apply { - responses += { flow { awaitCancellation() } } // call 0: targeted scan misses (runs until 2s timeout) - responses += { flow { awaitCancellation() } } // call 1: escalated scan misses (runs until 5s timeout) + responses += { flow { awaitCancellation() } } // call 0: targeted scan misses + responses += { flow { awaitCancellation() } } // call 1: escalated scan misses } + // Keep profile setup stable so the test can inspect the connected device. + connection.service.addCharacteristic(FROMNUM_CHARACTERISTIC) + connection.service.addCharacteristic(FROMRADIO_CHARACTERISTIC) + val bleTransport = BleRadioTransport( scope = this, @@ -1134,21 +1145,27 @@ class BleRadioTransportReconnectCrashTest { try { bleTransport.start() - // 2s targeted + 5s escalated + reconnect backoff + settle delay before second iteration. - // With awaitCancellation(), scans now suspend for their full timeout durations, so we - // must cover the full reconnect iteration window (settle + targeted + escalated + backoff). - advanceTimeBy(20_000L) + advanceTimeBy(bondedReconnectWindowMillis) - // Proves the stale bonded handle was not used: no connectAndAwait call. - assertEquals( - 0, - connection.connectAndAwaitCalls, - "Must never call connectAndAwait when no fresh advertisement was found", - ) assertTrue( sequencedScanner.callCount >= 2, "Expected at least 2 scan calls (targeted + escalated); got ${sequencedScanner.callCount}", ) + assertEquals( + targetedScanTimeoutMillis, + sequencedScanner.calls[0].timeout.inWholeMilliseconds, + "First scan must use TARGETED_SCAN_TIMEOUT", + ) + assertEquals( + escalatedScanTimeoutMillis, + sequencedScanner.calls[1].timeout.inWholeMilliseconds, + "Second scan must use SCAN_TIMEOUT", + ) + assertEquals(1, connection.connectAndAwaitCalls, "Must connect once through the bounded bonded fallback") + assertTrue( + connection.device === bondedDevice, + "Must use the bonded handle after fresh scans miss (got: ${connection.device?.name})", + ) bleTransport.close() } finally { diff --git a/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportTest.kt b/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportTest.kt index de2625773..9348af6d0 100644 --- a/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportTest.kt +++ b/core/network/src/commonTest/kotlin/org/meshtastic/core/network/radio/BleRadioTransportTest.kt @@ -27,6 +27,8 @@ import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.test.TestScope import kotlinx.coroutines.test.advanceTimeBy import kotlinx.coroutines.test.runTest +import org.meshtastic.core.ble.MeshtasticBleConstants.FROMNUM_CHARACTERISTIC +import org.meshtastic.core.ble.MeshtasticBleConstants.FROMRADIO_CHARACTERISTIC import org.meshtastic.core.ble.MeshtasticBleConstants.SERVICE_UUID import org.meshtastic.core.model.RadioNotConnectedException import org.meshtastic.core.repository.RadioInterfaceService @@ -209,9 +211,11 @@ class BleRadioTransportTest { } @Test - fun `findDevice does not fall back to bonded device when scan finds nothing`() = runTest { + fun `findDevice falls back to bonded device when fresh scans miss`() = runTest { val bondedDevice = FakeBleDevice(address = address, name = "Bonded Device") bluetoothRepository.bond(bondedDevice) + connection.service.addCharacteristic(FROMNUM_CHARACTERISTIC) + connection.service.addCharacteristic(FROMRADIO_CHARACTERISTIC) val bleTransport = BleRadioTransport( @@ -224,12 +228,11 @@ class BleRadioTransportTest { ) bleTransport.start() try { - // 3s settle + 2s targeted + 5s escalated scan = 10s before RadioNotConnectedException. - // The bonded device is never advertising, so findDevice must escalate and throw - // rather than returning the stale bonded handle. + // 3s settle + 2s targeted + 5s escalated scan = 10s before bonded fallback. advanceTimeBy(11_000) - assertEquals(0, connection.connectAndAwaitCalls, "Must not connect when bonded device is not advertising") + assertEquals(1, connection.connectAndAwaitCalls, "Must use bounded bonded fallback after fresh scans miss") + assertEquals("Bonded Device", connection.device?.name) } finally { bleTransport.close() }