mirror of
https://github.com/meshtastic/Meshtastic-Android.git
synced 2026-06-27 15:05:26 -04:00
fix(ble): Restore bounded bonded reconnect fallback (#5960)
This commit is contained in:
@@ -28,9 +28,10 @@ import kotlinx.coroutines.flow.asStateFlow
|
||||
* When created from a live BLE scan, [advertisement] is populated and used for optimal peripheral construction via
|
||||
* `Peripheral(advertisement)` with a direct (non-autoConnect) connection attempt.
|
||||
*
|
||||
* Bonded-only devices (address only, [advertisement] null) can still be constructed — e.g. for display in the device
|
||||
* list — but [BleRadioTransport.findDevice] requires a fresh advertisement before connecting and will throw
|
||||
* [RadioNotConnectedException] rather than returning a stale handle.
|
||||
* Bonded-only devices (address only, [advertisement] null) can still be constructed for display in the device list and
|
||||
* as a bounded fallback when fresh scans miss. In that fallback, Kable creates an address-only peripheral so Android
|
||||
* can wait for the device to advertise through its `autoConnect` path while the transport keeps the connection attempt
|
||||
* bounded.
|
||||
*
|
||||
* @param address The device's MAC address (or platform identifier string).
|
||||
* @param name The device's display name, if known.
|
||||
|
||||
@@ -78,20 +78,19 @@ private val CONNECTION_TIMEOUT = 15.seconds
|
||||
*/
|
||||
private val HEARTBEAT_DRAIN_DELAY = 200.milliseconds
|
||||
|
||||
private val TARGETED_SCAN_TIMEOUT = 2.seconds
|
||||
internal val TARGETED_SCAN_TIMEOUT = 2.seconds
|
||||
|
||||
/**
|
||||
* Bounded scan duration used by both discovery paths in [findDevice]:
|
||||
* - Bonded escalation: after the 2s [TARGETED_SCAN_TIMEOUT] misses the low-duty advertisement window, this is the
|
||||
* single bounded retry before declaring the bonded device unreachable.
|
||||
* single bounded retry before falling back to the bonded handle.
|
||||
* - Non-bonded retry: each of the [SCAN_RETRY_COUNT] attempts uses this duration.
|
||||
*
|
||||
* 5s covers multiple advertising intervals for typical BLE power-save slots (~1–2s each) while keeping the total bonded
|
||||
* scan window bounded (~7s scan, ~10s total attempt including the existing reconnect settle delay) so
|
||||
* [BleReconnectPolicy] iterates on its normal schedule instead of parking on [CONNECTION_TIMEOUT] (15s) via an
|
||||
* `autoConnect=true` hang on a stale handle.
|
||||
* 5s covers multiple advertising intervals for typical BLE power-save slots (~1–2s each). If the bounded bonded scan
|
||||
* window still misses, [findDevice] falls back to the bonded handle and [attemptConnection] keeps that patient
|
||||
* `autoConnect` path bounded through [CONNECTION_TIMEOUT].
|
||||
*/
|
||||
private val SCAN_TIMEOUT = 5.seconds
|
||||
internal val SCAN_TIMEOUT = 5.seconds
|
||||
private val GATT_CLEANUP_TIMEOUT = 5.seconds
|
||||
|
||||
/**
|
||||
@@ -246,18 +245,21 @@ class BleRadioTransport(
|
||||
}
|
||||
|
||||
// Escalation: radio may be in a low-duty advertising slot. Try one bounded SCAN_TIMEOUT scan.
|
||||
Logger.i { "[${address.anonymize()}] Targeted scan missed; escalating to bounded scan before giving up" }
|
||||
Logger.i { "[${address.anonymize()}] Targeted scan missed; escalating to bounded scan before fallback" }
|
||||
scanForFreshDevice(SCAN_TIMEOUT)?.let {
|
||||
Logger.i { "[${address.anonymize()}] Fresh advertisement found during escalated scan" }
|
||||
return it
|
||||
}
|
||||
|
||||
// Never fall back to the stale bonded handle — that path forces autoConnect=true and hangs for
|
||||
// CONNECTION_TIMEOUT (15s) before throwing. Bounded scan window (~7s scan, ~10s total attempt
|
||||
// including the existing reconnect settle delay) lets the reconnect policy iterate on its
|
||||
// normal schedule instead of parking on the autoConnect hang.
|
||||
Logger.w { "[${address.anonymize()}] No fresh advertisement within $SCAN_TIMEOUT; reporting unreachable" }
|
||||
throw RadioNotConnectedException("Bonded device is not advertising")
|
||||
// If both scans miss, fall back to the bonded handle. Bonded-only devices have no fresh advertisement, so
|
||||
// Kable uses autoConnect=true and Android can patiently wait for the device to advertise again.
|
||||
// This remains bounded by CONNECTION_TIMEOUT in connectAndAwait(), after which BleReconnectPolicy owns
|
||||
// retry/backoff.
|
||||
Logger.w {
|
||||
"[${address.anonymize()}] No fresh advertisement within $SCAN_TIMEOUT; " +
|
||||
"falling back to bonded handle for bounded autoConnect"
|
||||
}
|
||||
return bondedDevice
|
||||
}
|
||||
|
||||
// Non-bonded path: preserve existing retry behavior (SCAN_RETRY_COUNT attempts at SCAN_TIMEOUT).
|
||||
@@ -279,8 +281,7 @@ class BleRadioTransport(
|
||||
*
|
||||
* One scan attempt only — no retry, no backoff. Both bonded and non-bonded paths in [findDevice] share this
|
||||
* primitive so retry policy stays centralized:
|
||||
* - Bonded: escalated from [TARGETED_SCAN_TIMEOUT] to [SCAN_TIMEOUT] before [findDevice] declares the device
|
||||
* unreachable.
|
||||
* - Bonded: escalated from [TARGETED_SCAN_TIMEOUT] to [SCAN_TIMEOUT] before [findDevice] returns the bonded handle.
|
||||
* - Non-bonded: [SCAN_RETRY_COUNT] attempts at [SCAN_TIMEOUT] with [SCAN_RETRY_DELAY] between attempts.
|
||||
*
|
||||
* The outer [withTimeoutOrNull] is binding: the scanner receives [timeout] as a hint, but this coroutine resumes on
|
||||
|
||||
@@ -59,6 +59,15 @@ import kotlin.test.assertEquals
|
||||
import kotlin.test.assertTrue
|
||||
import kotlin.time.Duration
|
||||
|
||||
private const val BONDED_FALLBACK_CONNECT_BUFFER_MILLIS = 1_000L
|
||||
private val targetedScanTimeoutMillis = TARGETED_SCAN_TIMEOUT.inWholeMilliseconds
|
||||
private val escalatedScanTimeoutMillis = SCAN_TIMEOUT.inWholeMilliseconds
|
||||
private val bondedReconnectWindowMillis =
|
||||
BleReconnectPolicy.DEFAULT_SETTLE_DELAY.inWholeMilliseconds +
|
||||
targetedScanTimeoutMillis +
|
||||
escalatedScanTimeoutMillis +
|
||||
BONDED_FALLBACK_CONNECT_BUFFER_MILLIS
|
||||
|
||||
/**
|
||||
* Tests covering the BLE reconnect crash fixes in [BleRadioTransport]:
|
||||
* 1. **CancellationException / GATT 133 fix**: [discoverServicesAndSetupCharacteristics] previously had a bare `catch
|
||||
@@ -1037,8 +1046,8 @@ class BleRadioTransportReconnectCrashTest {
|
||||
* advertising window. The escalation path (TARGETED_SCAN_TIMEOUT → SCAN_TIMEOUT) must surface a freshly-scanned
|
||||
* device.
|
||||
*
|
||||
* Sequenced scanner: call 0 (2s targeted) returns empty; call 1 (5s escalated) emits a fresh `FakeBleDevice` for
|
||||
* the bonded address. The transport must connect using the fresh scanned device, never the stale bonded handle.
|
||||
* Sequenced scanner: call 0 (targeted) returns empty; call 1 (escalated) emits a fresh `FakeBleDevice` for the
|
||||
* bonded address. The transport must connect using the fresh scanned device, never the stale bonded handle.
|
||||
*/
|
||||
@Test
|
||||
fun `bonded device missed by targeted scan is found by escalated scan`() = runTest {
|
||||
@@ -1048,7 +1057,7 @@ class BleRadioTransportReconnectCrashTest {
|
||||
val freshDevice = FakeBleDevice(address = address, name = "Fresh Scan Result")
|
||||
val sequencedScanner =
|
||||
SequencedBleScanner().apply {
|
||||
responses += { flow { awaitCancellation() } } // call 0: targeted scan misses (runs until 2s timeout)
|
||||
responses += { flow { awaitCancellation() } } // call 0: targeted scan misses
|
||||
responses += { flow { emit(freshDevice) } } // call 1: escalated scan emits fresh device
|
||||
}
|
||||
|
||||
@@ -1069,22 +1078,21 @@ class BleRadioTransportReconnectCrashTest {
|
||||
try {
|
||||
bleTransport.start()
|
||||
|
||||
// 2s targeted scan + immediate escalated emit + connect/handshake settle.
|
||||
advanceTimeBy(8_000L)
|
||||
advanceTimeBy(bondedReconnectWindowMillis)
|
||||
|
||||
assertTrue(
|
||||
sequencedScanner.callCount >= 2,
|
||||
"Expected at least 2 scan calls (targeted + escalated); got ${sequencedScanner.callCount}",
|
||||
)
|
||||
assertEquals(
|
||||
2_000L,
|
||||
targetedScanTimeoutMillis,
|
||||
sequencedScanner.calls[0].timeout.inWholeMilliseconds,
|
||||
"First scan must be targeted (2s)",
|
||||
"First scan must use TARGETED_SCAN_TIMEOUT",
|
||||
)
|
||||
assertEquals(
|
||||
5_000L,
|
||||
escalatedScanTimeoutMillis,
|
||||
sequencedScanner.calls[1].timeout.inWholeMilliseconds,
|
||||
"Second scan must be escalated (5s)",
|
||||
"Second scan must use SCAN_TIMEOUT",
|
||||
)
|
||||
assertEquals(SERVICE_UUID, sequencedScanner.calls[0].serviceUuid, "First scan must include service UUID")
|
||||
assertEquals(address, sequencedScanner.calls[0].address, "First scan must target selected address")
|
||||
@@ -1104,24 +1112,27 @@ class BleRadioTransportReconnectCrashTest {
|
||||
}
|
||||
|
||||
/**
|
||||
* Critical regression: when the bonded device is not advertising at all, [findDevice] must throw
|
||||
* [RadioNotConnectedException] after the bounded scan window (~7s scan, ~10s total attempt including the existing
|
||||
* reconnect settle delay) and must not fall back to the stale bonded handle.
|
||||
* Regression: when the bonded device misses both fresh scan windows, [findDevice] must fall back to the bonded
|
||||
* handle so Android's autoConnect path can patiently wait for the radio to advertise again.
|
||||
*
|
||||
* Before the fix, the stale handle forced `autoConnect=true` and hung for [CONNECTION_TIMEOUT] (15s) before the
|
||||
* reconnect loop could iterate. This test proves `connectAndAwait` is never invoked against the stale handle.
|
||||
* The fallback remains bounded by [CONNECTION_TIMEOUT] in `connectAndAwait`, after which the reconnect policy owns
|
||||
* the next retry/backoff iteration.
|
||||
*/
|
||||
@Test
|
||||
fun `bonded device never advertising results in zero connect calls`() = runTest {
|
||||
fun `bonded device never advertising falls back to bounded autoConnect`() = runTest {
|
||||
val bondedDevice = FakeBleDevice(address = address, name = "Bonded Handle")
|
||||
bluetoothRepository.bond(bondedDevice)
|
||||
|
||||
val sequencedScanner =
|
||||
SequencedBleScanner().apply {
|
||||
responses += { flow { awaitCancellation() } } // call 0: targeted scan misses (runs until 2s timeout)
|
||||
responses += { flow { awaitCancellation() } } // call 1: escalated scan misses (runs until 5s timeout)
|
||||
responses += { flow { awaitCancellation() } } // call 0: targeted scan misses
|
||||
responses += { flow { awaitCancellation() } } // call 1: escalated scan misses
|
||||
}
|
||||
|
||||
// Keep profile setup stable so the test can inspect the connected device.
|
||||
connection.service.addCharacteristic(FROMNUM_CHARACTERISTIC)
|
||||
connection.service.addCharacteristic(FROMRADIO_CHARACTERISTIC)
|
||||
|
||||
val bleTransport =
|
||||
BleRadioTransport(
|
||||
scope = this,
|
||||
@@ -1134,21 +1145,27 @@ class BleRadioTransportReconnectCrashTest {
|
||||
try {
|
||||
bleTransport.start()
|
||||
|
||||
// 2s targeted + 5s escalated + reconnect backoff + settle delay before second iteration.
|
||||
// With awaitCancellation(), scans now suspend for their full timeout durations, so we
|
||||
// must cover the full reconnect iteration window (settle + targeted + escalated + backoff).
|
||||
advanceTimeBy(20_000L)
|
||||
advanceTimeBy(bondedReconnectWindowMillis)
|
||||
|
||||
// Proves the stale bonded handle was not used: no connectAndAwait call.
|
||||
assertEquals(
|
||||
0,
|
||||
connection.connectAndAwaitCalls,
|
||||
"Must never call connectAndAwait when no fresh advertisement was found",
|
||||
)
|
||||
assertTrue(
|
||||
sequencedScanner.callCount >= 2,
|
||||
"Expected at least 2 scan calls (targeted + escalated); got ${sequencedScanner.callCount}",
|
||||
)
|
||||
assertEquals(
|
||||
targetedScanTimeoutMillis,
|
||||
sequencedScanner.calls[0].timeout.inWholeMilliseconds,
|
||||
"First scan must use TARGETED_SCAN_TIMEOUT",
|
||||
)
|
||||
assertEquals(
|
||||
escalatedScanTimeoutMillis,
|
||||
sequencedScanner.calls[1].timeout.inWholeMilliseconds,
|
||||
"Second scan must use SCAN_TIMEOUT",
|
||||
)
|
||||
assertEquals(1, connection.connectAndAwaitCalls, "Must connect once through the bounded bonded fallback")
|
||||
assertTrue(
|
||||
connection.device === bondedDevice,
|
||||
"Must use the bonded handle after fresh scans miss (got: ${connection.device?.name})",
|
||||
)
|
||||
|
||||
bleTransport.close()
|
||||
} finally {
|
||||
|
||||
@@ -27,6 +27,8 @@ import kotlinx.coroutines.ExperimentalCoroutinesApi
|
||||
import kotlinx.coroutines.test.TestScope
|
||||
import kotlinx.coroutines.test.advanceTimeBy
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.meshtastic.core.ble.MeshtasticBleConstants.FROMNUM_CHARACTERISTIC
|
||||
import org.meshtastic.core.ble.MeshtasticBleConstants.FROMRADIO_CHARACTERISTIC
|
||||
import org.meshtastic.core.ble.MeshtasticBleConstants.SERVICE_UUID
|
||||
import org.meshtastic.core.model.RadioNotConnectedException
|
||||
import org.meshtastic.core.repository.RadioInterfaceService
|
||||
@@ -209,9 +211,11 @@ class BleRadioTransportTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `findDevice does not fall back to bonded device when scan finds nothing`() = runTest {
|
||||
fun `findDevice falls back to bonded device when fresh scans miss`() = runTest {
|
||||
val bondedDevice = FakeBleDevice(address = address, name = "Bonded Device")
|
||||
bluetoothRepository.bond(bondedDevice)
|
||||
connection.service.addCharacteristic(FROMNUM_CHARACTERISTIC)
|
||||
connection.service.addCharacteristic(FROMRADIO_CHARACTERISTIC)
|
||||
|
||||
val bleTransport =
|
||||
BleRadioTransport(
|
||||
@@ -224,12 +228,11 @@ class BleRadioTransportTest {
|
||||
)
|
||||
bleTransport.start()
|
||||
try {
|
||||
// 3s settle + 2s targeted + 5s escalated scan = 10s before RadioNotConnectedException.
|
||||
// The bonded device is never advertising, so findDevice must escalate and throw
|
||||
// rather than returning the stale bonded handle.
|
||||
// 3s settle + 2s targeted + 5s escalated scan = 10s before bonded fallback.
|
||||
advanceTimeBy(11_000)
|
||||
|
||||
assertEquals(0, connection.connectAndAwaitCalls, "Must not connect when bonded device is not advertising")
|
||||
assertEquals(1, connection.connectAndAwaitCalls, "Must use bounded bonded fallback after fresh scans miss")
|
||||
assertEquals("Bonded Device", connection.device?.name)
|
||||
} finally {
|
||||
bleTransport.close()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user