From 3fd6a9fba5d602dc235b772a73a780563e81dad4 Mon Sep 17 00:00:00 2001 From: Ben Meadors Date: Sat, 18 Apr 2026 08:00:16 -0500 Subject: [PATCH] Fix transmit history file to get removed on factory reset --- mcp-server/tests/conftest.py | 67 +++++++++++++++++++ mcp-server/tests/mesh/test_direct_with_ack.py | 51 ++++---------- src/mesh/NodeDB.cpp | 7 ++ src/mesh/TransmitHistory.cpp | 21 ++++++ src/mesh/TransmitHistory.h | 7 ++ 5 files changed, 115 insertions(+), 38 deletions(-) diff --git a/mcp-server/tests/conftest.py b/mcp-server/tests/conftest.py index 4597fac46..3d033b9b8 100644 --- a/mcp-server/tests/conftest.py +++ b/mcp-server/tests/conftest.py @@ -394,6 +394,73 @@ def hub_devices(hub_profile: dict[str, dict[str, Any]]) -> dict[str, str]: return resolved +def _reset_transmit_history_state(role: str, port: str) -> str: + """Wipe `/prefs/transmit_history.dat` + in-memory throttle cache via + delete_file_request + reboot. Returns the post-reboot port (nRF52 + re-enumerates). Best-effort — errors log to stderr + return original + port so a flaky start doesn't block the session. + """ + from ._port_discovery import resolve_port_by_role + + try: + from meshtastic.protobuf import admin_pb2 # type: ignore[import-untyped] + from meshtastic_mcp.connection import connect + + with connect(port=port) as iface: + msg = admin_pb2.AdminMessage() + msg.delete_file_request = "/prefs/transmit_history.dat" + iface.localNode._sendAdmin(msg) + time.sleep(1.0) + # Reboot clears in-memory cache; otherwise the 5-min auto-flush + # rewrites the file with pre-reset timestamps. + iface.localNode.reboot(3) + except Exception as exc: + print( + f"[transmit-history-reset] {role} @ {port} clear failed: {exc!r}", + file=sys.stderr, + ) + return port + + time.sleep(8.0) + try: + fresh = resolve_port_by_role(role, timeout_s=45.0) + except Exception as exc: + print( + f"[transmit-history-reset] {role} didn't reappear: {exc!r}", + file=sys.stderr, + ) + return port + for _ in range(20): + try: + if info.device_info(port=fresh, timeout_s=5.0).get("my_node_num"): + return fresh + except Exception: + time.sleep(1.5) + return fresh + + +@pytest.fixture(scope="session", autouse=True) +def _session_clear_transmit_history(hub_devices: dict[str, str]) -> None: + """Wipe transmit_history.dat on each device at session start. + + Without this, the firmware's per-portnum last-broadcast cache + (`src/mesh/TransmitHistory.h`) carries throttle state across sessions + and suppresses early broadcasts. Mutates `hub_devices` in place with + post-reboot ports since nRF52 re-enumerates. + """ + if not hub_devices: + yield + return + # Iterate over a snapshot — _reset_transmit_history_state can mutate + # hub_devices mid-loop via the update below, and dict-iteration isn't + # safe during mutation. + for role, port in list(hub_devices.items()): + fresh_port = _reset_transmit_history_state(role, port) + if fresh_port != port: + hub_devices[role] = fresh_port + yield + + @pytest.fixture(scope="session") def baked_mesh( hub_devices: dict[str, str], diff --git a/mcp-server/tests/mesh/test_direct_with_ack.py b/mcp-server/tests/mesh/test_direct_with_ack.py index 1b3186ac5..6380bf94e 100644 --- a/mcp-server/tests/mesh/test_direct_with_ack.py +++ b/mcp-server/tests/mesh/test_direct_with_ack.py @@ -38,41 +38,16 @@ def test_direct_with_ack_roundtrip( unique = f"mcp-ack-{tx_role}-to-{rx_role}-{int(time.time())}" - # Why the TX interface stays open across the RX wait: - # With wantAck=True, meshtastic-python queues the packet and the firmware - # retransmits until it sees an ACK from the destination. Closing the - # SerialInterface immediately after sendText() races that retry loop — - # empirically the packet never reaches RX. - # - # Why we ping BOTH RX and TX for a fresh NodeInfo before polling: - # Directed packets are PKI-encrypted with the destination's public key. - # The ENCRYPT path needs TX to hold RX's current pubkey; the DECRYPT - # path needs RX to hold TX's current pubkey. After a factory_reset or - # reboot, either side's nodeDB entry for the other can still carry - # a stale pubkey — directed sends then NAK with Routing.Error=35 - # (PKI_UNKNOWN_PUBKEY, receiver can't decrypt) or 39 - # (PKI_SEND_FAIL_PUBLIC_KEY, sender has no pubkey at all). NodeInfo - # broadcasts are the sole source of fresh pubkeys and the firmware - # rate-limits them to every 10 min. ToRadio.heartbeat(nonce=1) - # bypasses that via the 60-s shorterTimeout path - # (`src/mesh/PhoneAPI.cpp::handleToRadio` for serial, - # `src/mesh/api/PacketAPI.cpp::handlePacket` for TCP/UDP, both - # calling `NodeInfoModule::sendOurNodeInfo(..., true)`). - # - # Earlier revisions of this test only nudged RX — which covers the - # common case of a recently-baked RX whose TX doesn't know its new - # key yet. But when the OPPOSITE side is the one with stale state - # (RX holds an old TX pubkey), the test would silently fail with - # err=35 in the firmware log. Bilateral nudge eliminates that blind - # spot. Poll TX's nodesByNum for RX's publicKey as a proxy for "the - # exchange has propagated"; a matching symmetry on RX's side is - # implied by the firmware's NodeInfo-on-receipt update path. + # TX iface stays open across the RX wait — sendText+wantAck relies on + # the firmware's retransmit loop, which races the SerialInterface close. + # Bilateral NodeInfo nudge: directed packets are PKI-encrypted, so BOTH + # sides need current pubkeys (err=35/39 otherwise). See + # `tests/mesh/_receive.py::nudge_nodeinfo` for the heartbeat-nonce=1 + # firmware path. with ReceiveCollector(rx_port, topic="meshtastic.receive.text") as rx: rx.broadcast_nodeinfo_ping() with connect(port=tx_port) as tx_iface: - # Bilateral warmup: nudge TX to broadcast too, so RX's nodeDB - # also gets refreshed with TX's current pubkey. nudge_nodeinfo(tx_iface) pk_deadline = time.monotonic() + 45.0 @@ -83,10 +58,8 @@ def test_direct_with_ack_roundtrip( user = last_rec.get("user", {}) if user.get("publicKey"): break - # Re-nudge every 15s in case the first NodeInfo broadcast - # was lost to a LoRa collision with concurrent traffic. Both - # sides re-broadcast for the same reason they were nudged - # initially — stale pubkeys can live on either side. + # Re-nudge both sides every 15 s in case a broadcast was + # lost to a LoRa collision. if time.monotonic() - last_nudge > 15.0: rx.broadcast_nodeinfo_ping() nudge_nodeinfo(tx_iface) @@ -98,9 +71,9 @@ def test_direct_with_ack_roundtrip( f"within 45s; nodesByNum entry={last_rec!r}" ) - # Directed send + short retry: at most 2 attempts. Each is - # sufficient on its own with fresh keys; the retry is purely - # an airtime-collision safety net. + # Retry covers LoRa collisions. Re-nudge both sides between + # attempts — if RX's cached TX pubkey is stale, just re-sending + # the text doesn't heal it; re-broadcasting NodeInfo does. got = None for _attempt in range(2): packet = tx_iface.sendText( @@ -115,6 +88,8 @@ def test_direct_with_ack_roundtrip( ) if got is not None: break + rx.broadcast_nodeinfo_ping() + nudge_nodeinfo(tx_iface) time.sleep(5.0) assert got is not None, ( diff --git a/src/mesh/NodeDB.cpp b/src/mesh/NodeDB.cpp index 4b0871566..6e57e89f6 100644 --- a/src/mesh/NodeDB.cpp +++ b/src/mesh/NodeDB.cpp @@ -17,6 +17,7 @@ #include "Router.h" #include "SPILock.h" #include "SafeFile.h" +#include "TransmitHistory.h" #include "TypeConversions.h" #include "error.h" #include "main.h" @@ -509,6 +510,12 @@ bool NodeDB::factoryReset(bool eraseBleBonds) } #endif spiLock->unlock(); + + // rmDir above nuked the .dat file, but TransmitHistory's in-memory + // cache auto-flushes every 5 min and would resurrect it. + if (transmitHistory) { + transmitHistory->clear(); + } // second, install default state (this will deal with the duplicate mac address issue) installDefaultNodeDatabase(); installDefaultDeviceState(); diff --git a/src/mesh/TransmitHistory.cpp b/src/mesh/TransmitHistory.cpp index 33da7d35c..afdcf5285 100644 --- a/src/mesh/TransmitHistory.cpp +++ b/src/mesh/TransmitHistory.cpp @@ -255,6 +255,21 @@ bool TransmitHistory::saveToDisk() return false; } +void TransmitHistory::clear() +{ + history.clear(); + lastMillis.clear(); + dirty = false; + lastDiskSave = 0; // so the next legit broadcast persists immediately + + spiLock->lock(); + if (FSCom.exists(FILENAME)) { + FSCom.remove(FILENAME); + } + spiLock->unlock(); + LOG_INFO("TransmitHistory: cleared in-memory state + on-disk file"); +} + #else // No filesystem available — provide stub with in-memory tracking TransmitHistory *transmitHistory = nullptr; @@ -290,4 +305,10 @@ bool TransmitHistory::saveToDisk() return true; } +void TransmitHistory::clear() +{ + history.clear(); + lastMillis.clear(); +} + #endif diff --git a/src/mesh/TransmitHistory.h b/src/mesh/TransmitHistory.h index 1a79048ea..57e5fb6cc 100644 --- a/src/mesh/TransmitHistory.h +++ b/src/mesh/TransmitHistory.h @@ -76,6 +76,13 @@ class TransmitHistory */ bool saveToDisk(); + /** + * Wipe in-memory throttle state + remove the on-disk file. Required + * alongside rmDir("/prefs") in factoryReset — otherwise the 5-min + * auto-flush resurrects the file from the still-populated maps. + */ + void clear(); + private: TransmitHistory() = default;