Fix RAK4631 Ethernet gateway API connection loss after W5100S brownout (#9754)

* Fix RAK4631 Ethernet gateway API connection loss after W5100S brownout

PoE power instability can brownout the W5100S while the nRF52 MCU keeps
running, causing all chip registers (MAC, IP, sockets) to revert to
defaults. The firmware had no mechanism to detect or recover from this.

Changes:
- Detect W5100S chip reset by periodically verifying MAC address register
  in reconnectETH(); on mismatch, perform full hardware reset and
  re-initialize Ethernet interface and services
- Add deInitApiServer() for clean API server teardown during recovery
- Add ~APIServerPort destructor to prevent memory leaks
- Switch nRF52 from EthernetServer::available() to accept() to prevent
  the same connected client from being repeatedly re-reported
- Add proactive dead-connection cleanup in APIServerPort::runOnce()
- Add 15-minute TCP idle timeout to close half-open connections that
  consume limited W5100S hardware sockets

Fixes meshtastic/firmware#6970

Made-with: Cursor

* Log actual elapsed idle time instead of constant timeout value

Address Copilot review comment: log millis() - lastContactMsec to show
the real time since last client activity, rather than always logging the
TCP_IDLE_TIMEOUT_MS constant.

Made-with: Cursor

* Update src/mesh/api/ServerAPI.h

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Stop UDP multicast handler during W5100S brownout recovery

After a W5100S chip brownout, the udpHandler isRunning flag stays
true while the underlying socket is dead. Without calling stop(),
the subsequent start() no-ops and multicast is silently broken
after recovery.

Made-with: Cursor

* Address Copilot review: recovery flags and timeout constant

Move ethStartupComplete and ntp_renew reset to immediately after
service teardown, before Ethernet.begin(). Previously, if DHCP
failed the early return left ethStartupComplete=true, preventing
service re-initialization on subsequent retries.

Replace #define TCP_IDLE_TIMEOUT_MS with static constexpr uint32_t
for type safety and better C++ practice.

Made-with: Cursor

---------

Co-authored-by: Ben Meadors <benmmeadors@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Philip Lykov
2026-03-19 15:37:39 +02:00
committed by Ben Meadors
parent 532a63e541
commit 2aefd386b6
4 changed files with 83 additions and 1 deletions

View File

@@ -1,7 +1,10 @@
#include "ServerAPI.h"
#include "Throttle.h"
#include "configuration.h"
#include <Arduino.h>
static constexpr uint32_t TCP_IDLE_TIMEOUT_MS = 15 * 60 * 1000UL;
template <typename T>
ServerAPI<T>::ServerAPI(T &_client) : StreamAPI(&client), concurrency::OSThread("ServerAPI"), client(_client)
{
@@ -28,6 +31,12 @@ template <typename T> bool ServerAPI<T>::checkIsConnected()
template <class T> int32_t ServerAPI<T>::runOnce()
{
if (client.connected()) {
if (lastContactMsec > 0 && !Throttle::isWithinTimespanMs(lastContactMsec, TCP_IDLE_TIMEOUT_MS)) {
LOG_WARN("TCP connection timeout, no data for %lu ms", (unsigned long)(millis() - lastContactMsec));
close();
enabled = false;
return 0;
}
return StreamAPI::runOncePart();
} else {
LOG_INFO("Client dropped connection, suspend API service");
@@ -57,7 +66,7 @@ template <class T, class U> int32_t APIServerPort<T, U>::runOnce()
#else
auto client = U::available();
#endif
#elif defined(ARCH_RP2040)
#elif defined(ARCH_RP2040) || defined(ARCH_NRF52)
auto client = U::accept();
#else
auto client = U::available();

View File

@@ -17,6 +17,15 @@ void initApiServer(int port)
}
}
void deInitApiServer()
{
if (apiPort) {
LOG_INFO("Deinit API server");
delete apiPort;
apiPort = nullptr;
}
}
ethServerAPI::ethServerAPI(EthernetClient &_client) : ServerAPI(_client)
{
LOG_INFO("Incoming ethernet connection");

View File

@@ -24,4 +24,5 @@ class ethServerPort : public APIServerPort<ethServerAPI, EthernetServer>
};
void initApiServer(int port = SERVER_API_DEFAULT_PORT);
void deInitApiServer();
#endif

View File

@@ -32,6 +32,69 @@ static Periodic *ethEvent;
static int32_t reconnectETH()
{
if (config.network.eth_enabled) {
// Detect W5100S chip reset by verifying the MAC address register.
// PoE power instability can brownout the W5100S while the MCU keeps running,
// causing all chip registers (MAC, IP, sockets) to revert to defaults.
uint8_t currentMac[6];
Ethernet.MACAddress(currentMac);
uint8_t expectedMac[6];
getMacAddr(expectedMac);
expectedMac[0] &= 0xfe;
if (memcmp(currentMac, expectedMac, 6) != 0) {
LOG_WARN("W5100S MAC mismatch (chip reset detected), reinitializing Ethernet");
syslog.disable();
#if !MESHTASTIC_EXCLUDE_SOCKETAPI
deInitApiServer();
#endif
#if HAS_UDP_MULTICAST
if (udpHandler) {
udpHandler->stop();
}
#endif
ethStartupComplete = false;
#ifndef DISABLE_NTP
ntp_renew = 0;
#endif
#ifdef PIN_ETHERNET_RESET
pinMode(PIN_ETHERNET_RESET, OUTPUT);
digitalWrite(PIN_ETHERNET_RESET, LOW);
delay(100);
digitalWrite(PIN_ETHERNET_RESET, HIGH);
delay(100);
#endif
#ifdef RAK11310
ETH_SPI_PORT.setSCK(PIN_SPI0_SCK);
ETH_SPI_PORT.setTX(PIN_SPI0_MOSI);
ETH_SPI_PORT.setRX(PIN_SPI0_MISO);
ETH_SPI_PORT.begin();
#endif
Ethernet.init(ETH_SPI_PORT, PIN_ETHERNET_SS);
int status = 0;
if (config.network.address_mode == meshtastic_Config_NetworkConfig_AddressMode_DHCP) {
status = Ethernet.begin(expectedMac);
} else if (config.network.address_mode == meshtastic_Config_NetworkConfig_AddressMode_STATIC) {
Ethernet.begin(expectedMac, config.network.ipv4_config.ip, config.network.ipv4_config.dns,
config.network.ipv4_config.gateway, config.network.ipv4_config.subnet);
status = 1;
}
if (status == 0) {
LOG_ERROR("Ethernet re-initialization failed, will retry");
return 5000;
}
LOG_INFO("Ethernet reinitialized - IP %u.%u.%u.%u", Ethernet.localIP()[0], Ethernet.localIP()[1],
Ethernet.localIP()[2], Ethernet.localIP()[3]);
}
Ethernet.maintain();
if (!ethStartupComplete) {
// Start web server