fall back to pkarr+DNS discovery when mDNS port is unavailable

Iroh's endpoint.bind() fails wholesale if any configured discovery service
fails to initialize. MdnsDiscovery requires binding UDP :5353, which on
most Linux systems (including TrueNAS) is already owned by avahi-daemon.
Result: endpoint creation errors out with "Service 'mdns' error", the
event loop never starts, command_sender stays None, and protocol
registration fails — so sd-server has no working networking at all.

Make mDNS best-effort: on any error whose message mentions "mdns",
retry endpoint creation with only pkarr + DNS discovery. Local-network
auto-discovery is lost but remote pairing via node ID (which uses n0's
DNS infrastructure, not mDNS) continues to work normally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
James Pine
2026-04-17 18:39:28 -07:00
parent 452fbf46d7
commit 1086efc8be

View File

@@ -213,40 +213,78 @@ impl NetworkingService {
// - mDNS for local network discovery
// - PkarrPublisher to publish our address to dns.iroh.link (enables remote discovery)
// - DnsDiscovery to resolve other nodes from dns.iroh.link
let endpoint = Endpoint::builder()
.secret_key(secret_key)
.alpns(vec![
PAIRING_ALPN.to_vec(),
FILE_TRANSFER_ALPN.to_vec(),
MESSAGING_ALPN.to_vec(),
SYNC_ALPN.to_vec(),
JOB_ACTIVITY_ALPN.to_vec(),
])
.relay_mode(iroh::RelayMode::Default)
.discovery(MdnsDiscovery::builder())
.discovery(PkarrPublisher::n0_dns())
.discovery(DnsDiscovery::n0_dns())
.bind_addr_v4(std::net::SocketAddrV4::new(
std::net::Ipv4Addr::UNSPECIFIED,
0,
))
.bind_addr_v6(std::net::SocketAddrV6::new(
std::net::Ipv6Addr::UNSPECIFIED,
0,
0,
0,
))
.bind()
.await
.map_err(|e| NetworkingError::Transport(format!("Failed to create endpoint: {}", e)))?;
//
// mDNS is best-effort: on hosts where another service (e.g. avahi-daemon
// on most Linux boxes / TrueNAS) already owns UDP :5353, Iroh's own mDNS
// service can't bind and endpoint creation fails wholesale. Fall back to
// pkarr + DNS-only discovery in that case — remote pairing via node ID
// continues to work, we just lose local-network auto-discovery.
let build_endpoint = |with_mdns: bool| {
let mut builder = Endpoint::builder()
.secret_key(secret_key.clone())
.alpns(vec![
PAIRING_ALPN.to_vec(),
FILE_TRANSFER_ALPN.to_vec(),
MESSAGING_ALPN.to_vec(),
SYNC_ALPN.to_vec(),
JOB_ACTIVITY_ALPN.to_vec(),
])
.relay_mode(iroh::RelayMode::Default)
.discovery(PkarrPublisher::n0_dns())
.discovery(DnsDiscovery::n0_dns())
.bind_addr_v4(std::net::SocketAddrV4::new(
std::net::Ipv4Addr::UNSPECIFIED,
0,
))
.bind_addr_v6(std::net::SocketAddrV6::new(
std::net::Ipv6Addr::UNSPECIFIED,
0,
0,
0,
));
if with_mdns {
builder = builder.discovery(MdnsDiscovery::builder());
}
builder.bind()
};
let endpoint = match build_endpoint(true).await {
Ok(ep) => {
self.logger
.info("Endpoint bound successfully with mDNS + pkarr discovery enabled")
.await;
ep
}
Err(e) => {
let err_str = e.to_string().to_lowercase();
if err_str.contains("mdns") {
self.logger
.warn(&format!(
"mDNS discovery unavailable ({}); retrying with pkarr + DNS only. \
Local-network auto-discovery is disabled on this host, but remote \
pairing via node ID will still work.",
e
))
.await;
let ep = build_endpoint(false).await.map_err(|e| {
NetworkingError::Transport(format!("Failed to create endpoint: {}", e))
})?;
self.logger
.info("Endpoint bound successfully without mDNS (pkarr + DNS only)")
.await;
ep
} else {
return Err(NetworkingError::Transport(format!(
"Failed to create endpoint: {}",
e
)));
}
}
};
// Store endpoint reference for other methods
self.endpoint = Some(endpoint.clone());
self.logger
.info("Endpoint bound successfully with mDNS + pkarr discovery enabled")
.await;
// Create and start event loop
let event_loop = NetworkingEventLoop::new(
endpoint,