Fix transmit history file to get removed on factory reset

This commit is contained in:
Ben Meadors
2026-04-18 08:00:16 -05:00
parent d78e7b1ef9
commit 3fd6a9fba5
5 changed files with 115 additions and 38 deletions

View File

@@ -394,6 +394,73 @@ def hub_devices(hub_profile: dict[str, dict[str, Any]]) -> dict[str, str]:
return resolved
def _reset_transmit_history_state(role: str, port: str) -> str:
"""Wipe `/prefs/transmit_history.dat` + in-memory throttle cache via
delete_file_request + reboot. Returns the post-reboot port (nRF52
re-enumerates). Best-effort — errors log to stderr + return original
port so a flaky start doesn't block the session.
"""
from ._port_discovery import resolve_port_by_role
try:
from meshtastic.protobuf import admin_pb2 # type: ignore[import-untyped]
from meshtastic_mcp.connection import connect
with connect(port=port) as iface:
msg = admin_pb2.AdminMessage()
msg.delete_file_request = "/prefs/transmit_history.dat"
iface.localNode._sendAdmin(msg)
time.sleep(1.0)
# Reboot clears in-memory cache; otherwise the 5-min auto-flush
# rewrites the file with pre-reset timestamps.
iface.localNode.reboot(3)
except Exception as exc:
print(
f"[transmit-history-reset] {role} @ {port} clear failed: {exc!r}",
file=sys.stderr,
)
return port
time.sleep(8.0)
try:
fresh = resolve_port_by_role(role, timeout_s=45.0)
except Exception as exc:
print(
f"[transmit-history-reset] {role} didn't reappear: {exc!r}",
file=sys.stderr,
)
return port
for _ in range(20):
try:
if info.device_info(port=fresh, timeout_s=5.0).get("my_node_num"):
return fresh
except Exception:
time.sleep(1.5)
return fresh
@pytest.fixture(scope="session", autouse=True)
def _session_clear_transmit_history(hub_devices: dict[str, str]) -> None:
"""Wipe transmit_history.dat on each device at session start.
Without this, the firmware's per-portnum last-broadcast cache
(`src/mesh/TransmitHistory.h`) carries throttle state across sessions
and suppresses early broadcasts. Mutates `hub_devices` in place with
post-reboot ports since nRF52 re-enumerates.
"""
if not hub_devices:
yield
return
# Iterate over a snapshot — _reset_transmit_history_state can mutate
# hub_devices mid-loop via the update below, and dict-iteration isn't
# safe during mutation.
for role, port in list(hub_devices.items()):
fresh_port = _reset_transmit_history_state(role, port)
if fresh_port != port:
hub_devices[role] = fresh_port
yield
@pytest.fixture(scope="session")
def baked_mesh(
hub_devices: dict[str, str],

View File

@@ -38,41 +38,16 @@ def test_direct_with_ack_roundtrip(
unique = f"mcp-ack-{tx_role}-to-{rx_role}-{int(time.time())}"
# Why the TX interface stays open across the RX wait:
# With wantAck=True, meshtastic-python queues the packet and the firmware
# retransmits until it sees an ACK from the destination. Closing the
# SerialInterface immediately after sendText() races that retry loop —
# empirically the packet never reaches RX.
#
# Why we ping BOTH RX and TX for a fresh NodeInfo before polling:
# Directed packets are PKI-encrypted with the destination's public key.
# The ENCRYPT path needs TX to hold RX's current pubkey; the DECRYPT
# path needs RX to hold TX's current pubkey. After a factory_reset or
# reboot, either side's nodeDB entry for the other can still carry
# a stale pubkey — directed sends then NAK with Routing.Error=35
# (PKI_UNKNOWN_PUBKEY, receiver can't decrypt) or 39
# (PKI_SEND_FAIL_PUBLIC_KEY, sender has no pubkey at all). NodeInfo
# broadcasts are the sole source of fresh pubkeys and the firmware
# rate-limits them to every 10 min. ToRadio.heartbeat(nonce=1)
# bypasses that via the 60-s shorterTimeout path
# (`src/mesh/PhoneAPI.cpp::handleToRadio` for serial,
# `src/mesh/api/PacketAPI.cpp::handlePacket` for TCP/UDP, both
# calling `NodeInfoModule::sendOurNodeInfo(..., true)`).
#
# Earlier revisions of this test only nudged RX — which covers the
# common case of a recently-baked RX whose TX doesn't know its new
# key yet. But when the OPPOSITE side is the one with stale state
# (RX holds an old TX pubkey), the test would silently fail with
# err=35 in the firmware log. Bilateral nudge eliminates that blind
# spot. Poll TX's nodesByNum for RX's publicKey as a proxy for "the
# exchange has propagated"; a matching symmetry on RX's side is
# implied by the firmware's NodeInfo-on-receipt update path.
# TX iface stays open across the RX wait — sendText+wantAck relies on
# the firmware's retransmit loop, which races the SerialInterface close.
# Bilateral NodeInfo nudge: directed packets are PKI-encrypted, so BOTH
# sides need current pubkeys (err=35/39 otherwise). See
# `tests/mesh/_receive.py::nudge_nodeinfo` for the heartbeat-nonce=1
# firmware path.
with ReceiveCollector(rx_port, topic="meshtastic.receive.text") as rx:
rx.broadcast_nodeinfo_ping()
with connect(port=tx_port) as tx_iface:
# Bilateral warmup: nudge TX to broadcast too, so RX's nodeDB
# also gets refreshed with TX's current pubkey.
nudge_nodeinfo(tx_iface)
pk_deadline = time.monotonic() + 45.0
@@ -83,10 +58,8 @@ def test_direct_with_ack_roundtrip(
user = last_rec.get("user", {})
if user.get("publicKey"):
break
# Re-nudge every 15s in case the first NodeInfo broadcast
# was lost to a LoRa collision with concurrent traffic. Both
# sides re-broadcast for the same reason they were nudged
# initially — stale pubkeys can live on either side.
# Re-nudge both sides every 15 s in case a broadcast was
# lost to a LoRa collision.
if time.monotonic() - last_nudge > 15.0:
rx.broadcast_nodeinfo_ping()
nudge_nodeinfo(tx_iface)
@@ -98,9 +71,9 @@ def test_direct_with_ack_roundtrip(
f"within 45s; nodesByNum entry={last_rec!r}"
)
# Directed send + short retry: at most 2 attempts. Each is
# sufficient on its own with fresh keys; the retry is purely
# an airtime-collision safety net.
# Retry covers LoRa collisions. Re-nudge both sides between
# attempts — if RX's cached TX pubkey is stale, just re-sending
# the text doesn't heal it; re-broadcasting NodeInfo does.
got = None
for _attempt in range(2):
packet = tx_iface.sendText(
@@ -115,6 +88,8 @@ def test_direct_with_ack_roundtrip(
)
if got is not None:
break
rx.broadcast_nodeinfo_ping()
nudge_nodeinfo(tx_iface)
time.sleep(5.0)
assert got is not None, (

View File

@@ -17,6 +17,7 @@
#include "Router.h"
#include "SPILock.h"
#include "SafeFile.h"
#include "TransmitHistory.h"
#include "TypeConversions.h"
#include "error.h"
#include "main.h"
@@ -509,6 +510,12 @@ bool NodeDB::factoryReset(bool eraseBleBonds)
}
#endif
spiLock->unlock();
// rmDir above nuked the .dat file, but TransmitHistory's in-memory
// cache auto-flushes every 5 min and would resurrect it.
if (transmitHistory) {
transmitHistory->clear();
}
// second, install default state (this will deal with the duplicate mac address issue)
installDefaultNodeDatabase();
installDefaultDeviceState();

View File

@@ -255,6 +255,21 @@ bool TransmitHistory::saveToDisk()
return false;
}
void TransmitHistory::clear()
{
history.clear();
lastMillis.clear();
dirty = false;
lastDiskSave = 0; // so the next legit broadcast persists immediately
spiLock->lock();
if (FSCom.exists(FILENAME)) {
FSCom.remove(FILENAME);
}
spiLock->unlock();
LOG_INFO("TransmitHistory: cleared in-memory state + on-disk file");
}
#else
// No filesystem available — provide stub with in-memory tracking
TransmitHistory *transmitHistory = nullptr;
@@ -290,4 +305,10 @@ bool TransmitHistory::saveToDisk()
return true;
}
void TransmitHistory::clear()
{
history.clear();
lastMillis.clear();
}
#endif

View File

@@ -76,6 +76,13 @@ class TransmitHistory
*/
bool saveToDisk();
/**
* Wipe in-memory throttle state + remove the on-disk file. Required
* alongside rmDir("/prefs") in factoryReset — otherwise the 5-min
* auto-flush resurrects the file from the still-populated maps.
*/
void clear();
private:
TransmitHistory() = default;