From f5fafd22657a466104c23e756d7f20d489299f4e Mon Sep 17 00:00:00 2001 From: Alex <25013571+alexhb1@users.noreply.github.com> Date: Sat, 9 May 2026 13:19:22 +0100 Subject: [PATCH] Validate IRC DCC offers before download (#964) --- shelfmark/release_sources/irc/client.py | 39 +++++++++-- shelfmark/release_sources/irc/dcc.py | 49 +++++++++++++- shelfmark/release_sources/irc/handler.py | 19 +++++- shelfmark/release_sources/irc/source.py | 7 +- tests/irc/test_client_dcc_security.py | 84 ++++++++++++++++++++++++ tests/irc/test_dcc_security.py | 82 +++++++++++++++++++++++ 6 files changed, 268 insertions(+), 12 deletions(-) create mode 100644 tests/irc/test_client_dcc_security.py create mode 100644 tests/irc/test_dcc_security.py diff --git a/shelfmark/release_sources/irc/client.py b/shelfmark/release_sources/irc/client.py index dac506c..b37e614 100644 --- a/shelfmark/release_sources/irc/client.py +++ b/shelfmark/release_sources/irc/client.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Self from shelfmark.core.logger import setup_logger -from .dcc import DCCOffer, parse_dcc_send +from .dcc import DCCError, DCCOffer, parse_dcc_send, validate_dcc_endpoint if TYPE_CHECKING: from collections.abc import Iterator @@ -400,6 +400,33 @@ class IRCClient: self.send_notice(sender, f"\x01VERSION {self.version}\x01") logger.debug("Sent VERSION to %s", sender) + @staticmethod + def _sender_nick(msg: IRCMessage) -> str | None: + """Extract the nick from a message prefix.""" + if not msg.prefix: + return None + return msg.prefix.split("!", maxsplit=1)[0] + + def _is_allowed_dcc_sender( + self, + msg: IRCMessage, + expected_senders: set[str] | None, + ) -> bool: + allowed_senders = expected_senders or self.online_servers + if not allowed_senders: + return True + + sender = self._sender_nick(msg) + if sender is None: + logger.warning("Ignoring DCC offer without sender prefix") + return False + + normalized_allowed = {nick.casefold() for nick in allowed_senders} + if sender.casefold() not in normalized_allowed: + logger.warning("Ignoring DCC offer from unexpected sender: %s", sender) + return False + return True + def read_messages(self, *, auto_handle: bool = True) -> Iterator[IRCMessage]: """Read and yield IRC messages, optionally auto-handling PING/VERSION.""" for line in self._recv_lines(): @@ -422,6 +449,7 @@ class IRCClient: timeout: float = 60.0, *, result_type: bool = False, + expected_senders: set[str] | None = None, ) -> DCCOffer | None: """Wait for a DCC SEND offer. Returns None on timeout or no results.""" target_event = IRCEvent.SEARCH_RESULT if result_type else IRCEvent.BOOK_RESULT @@ -433,12 +461,15 @@ class IRCClient: return None if msg.event == target_event: + if not self._is_allowed_dcc_sender(msg, expected_senders): + continue try: offer = parse_dcc_send(msg.raw) + validate_dcc_endpoint(offer) logger.info("Received DCC offer: %s", offer.filename) - except Exception: - logger.exception("Failed to parse DCC") - return None + except DCCError: + logger.exception("Rejected DCC offer") + continue else: return offer diff --git a/shelfmark/release_sources/irc/dcc.py b/shelfmark/release_sources/irc/dcc.py index 4e979b0..d8e2002 100644 --- a/shelfmark/release_sources/irc/dcc.py +++ b/shelfmark/release_sources/irc/dcc.py @@ -7,6 +7,8 @@ import re import socket import struct from dataclasses import dataclass +from ipaddress import ip_address +from pathlib import PureWindowsPath from typing import TYPE_CHECKING from shelfmark.core.logger import setup_logger @@ -59,6 +61,10 @@ class DCCConnectionError(DCCError): """Failed to connect to DCC sender.""" +class DCCSecurityError(DCCError): + """Rejected unsafe DCC offer metadata.""" + + def int_to_ip(ip_int: int) -> str: """Convert 32-bit integer (DCC format) to dotted IP notation.""" packed = struct.pack(">I", ip_int) @@ -76,15 +82,53 @@ def parse_dcc_send(text: str) -> DCCOffer: ip_int = int(match.group(2)) port = int(match.group(3)) size = int(match.group(4)) + try: + ip = int_to_ip(ip_int) + except struct.error as e: + msg = f"Invalid DCC IP integer: {ip_int}" + raise DCCParseError(msg) from e return DCCOffer( - filename=filename, - ip=int_to_ip(ip_int), + filename=safe_dcc_filename(filename), + ip=ip, port=port, size=size, ) +def safe_dcc_filename(filename: str) -> str: + """Return a DCC filename that cannot escape its destination directory.""" + safe_name = filename.strip() + windows_path = PureWindowsPath(safe_name) + if ( + not safe_name + or safe_name in {".", ".."} + or "/" in safe_name + or "\\" in safe_name + or windows_path.drive + ): + msg = f"Rejected unsafe DCC filename: {filename!r}" + raise DCCSecurityError(msg) + return safe_name + + +def validate_dcc_endpoint(offer: DCCOffer) -> None: + """Reject DCC endpoints that can target local/internal network services.""" + if not 1 <= offer.port <= 65535: + msg = f"Rejected invalid DCC port: {offer.port}" + raise DCCSecurityError(msg) + + try: + address = ip_address(offer.ip) + except ValueError as e: + msg = f"Rejected invalid DCC IP address: {offer.ip}" + raise DCCSecurityError(msg) from e + + if not address.is_global: + msg = f"Rejected non-public DCC endpoint: {offer.ip}" + raise DCCSecurityError(msg) + + def download_dcc( offer: DCCOffer, dest_path: Path, @@ -93,6 +137,7 @@ def download_dcc( timeout: float = 30.0, ) -> None: """Download file via DCC protocol to dest_path. Raises DCCError on failure.""" + validate_dcc_endpoint(offer) logger.info("DCC connecting to %s:%s for %s", offer.ip, offer.port, offer.filename) try: diff --git a/shelfmark/release_sources/irc/handler.py b/shelfmark/release_sources/irc/handler.py index 66b6f77..4941039 100644 --- a/shelfmark/release_sources/irc/handler.py +++ b/shelfmark/release_sources/irc/handler.py @@ -12,7 +12,7 @@ from shelfmark.core.logger import setup_logger from shelfmark.release_sources import DownloadHandler, register_handler from .connection_manager import connection_manager -from .dcc import DCCError, download_dcc +from .dcc import DCCError, download_dcc, safe_dcc_filename if TYPE_CHECKING: from collections.abc import Callable @@ -23,6 +23,15 @@ if TYPE_CHECKING: logger = setup_logger(__name__) +def _server_from_download_request(download_request: str) -> str | None: + """Extract the expected IRC bot nick from a release request line.""" + stripped = download_request.strip() + if not stripped.startswith("!"): + return None + server = stripped[1:].split(maxsplit=1)[0] + return server or None + + def _config_text(key: str) -> str: """Read a string config value with whitespace trimmed.""" value = config.get(key, "") @@ -72,6 +81,7 @@ class IRCDownloadHandler(DownloadHandler): """Download a release via IRC DCC. task.task_id contains the IRC request string.""" download_request = task.task_id logger.info("IRC download: %s...", download_request[:60]) + expected_server = _server_from_download_request(download_request) # Get IRC settings server = _config_text("IRC_SERVER") @@ -123,7 +133,8 @@ class IRCDownloadHandler(DownloadHandler): # Phase 3: Wait for DCC offer status_callback("resolving", "Waiting for bot response") - offer = client.wait_for_dcc(timeout=120.0, result_type=False) + wait_kwargs = {"expected_senders": {expected_server}} if expected_server else {} + offer = client.wait_for_dcc(timeout=120.0, result_type=False, **wait_kwargs) if not offer: status_callback("error", "No response from bot") @@ -137,7 +148,9 @@ class IRCDownloadHandler(DownloadHandler): status_callback("downloading", "") # Get file extension from offer filename - ext = Path(offer.filename).suffix.lstrip(".") or task.format or "epub" + ext = ( + Path(safe_dcc_filename(offer.filename)).suffix.lstrip(".") or task.format or "epub" + ) # Stage to temp directory (lazy import to avoid circular import) from shelfmark.download.staging import get_staging_path diff --git a/shelfmark/release_sources/irc/source.py b/shelfmark/release_sources/irc/source.py index f0f6a60..5cac0de 100644 --- a/shelfmark/release_sources/irc/source.py +++ b/shelfmark/release_sources/irc/source.py @@ -30,7 +30,7 @@ from shelfmark.release_sources import ( ) from .connection_manager import connection_manager -from .dcc import DCCError, download_dcc +from .dcc import DCCError, download_dcc, safe_dcc_filename from .parser import SearchResult, extract_results_from_zip, parse_results_file logger = setup_logger(__name__) @@ -227,7 +227,8 @@ class IRCReleaseSource(ReleaseSource): # Wait for results DCC - this is the long wait _emit_status(f"Connected to #{channel} - Waiting for results...", phase="searching") - offer = client.wait_for_dcc(timeout=60.0, result_type=True) + wait_kwargs = {"expected_senders": {search_bot}} if search_bot else {} + offer = client.wait_for_dcc(timeout=60.0, result_type=True, **wait_kwargs) if not offer: logger.info("No search results received") _emit_status("No results found", phase="complete") @@ -247,7 +248,7 @@ class IRCReleaseSource(ReleaseSource): # Download results file _emit_status(f"Connected to #{channel} - Downloading results...", phase="downloading") with tempfile.TemporaryDirectory() as tmpdir: - result_path = Path(tmpdir) / offer.filename + result_path = Path(tmpdir) / safe_dcc_filename(offer.filename) download_dcc(offer, result_path, timeout=30.0) # Parse results diff --git a/tests/irc/test_client_dcc_security.py b/tests/irc/test_client_dcc_security.py new file mode 100644 index 0000000..269dd28 --- /dev/null +++ b/tests/irc/test_client_dcc_security.py @@ -0,0 +1,84 @@ +from ipaddress import IPv4Address + +from shelfmark.release_sources.irc.client import IRCClient, IRCEvent, IRCMessage + + +def _dcc_raw(sender: str, filename: str, ip: str, port: int = 443) -> str: + ip_int = int(IPv4Address(ip)) + return f':{sender}!user@example.test PRIVMSG reader :\x01DCC SEND "{filename}" {ip_int} {port} 1\x01' + + +def test_wait_for_dcc_ignores_unexpected_sender(monkeypatch) -> None: + client = IRCClient(nick="reader", server="irc.example.test", port=6697) + client.online_servers = {"BookBot"} + + messages = [ + IRCMessage( + raw=_dcc_raw("Mallory", "evil.epub", "8.8.8.8"), + prefix="Mallory!user@example.test", + event=IRCEvent.BOOK_RESULT, + ), + IRCMessage( + raw=_dcc_raw("BookBot", "book.epub", "8.8.8.8"), + prefix="BookBot!user@example.test", + event=IRCEvent.BOOK_RESULT, + ), + ] + monkeypatch.setattr(client, "read_messages", lambda: iter(messages)) + + offer = client.wait_for_dcc(timeout=1.0, result_type=False) + + assert offer is not None + assert offer.filename == "book.epub" + + +def test_wait_for_dcc_uses_expected_sender_over_online_server_list(monkeypatch) -> None: + client = IRCClient(nick="reader", server="irc.example.test", port=6697) + client.online_servers = {"OtherBot"} + + messages = [ + IRCMessage( + raw=_dcc_raw("BookBot", "book.epub", "8.8.8.8"), + prefix="BookBot!user@example.test", + event=IRCEvent.BOOK_RESULT, + ) + ] + monkeypatch.setattr(client, "read_messages", lambda: iter(messages)) + + offer = client.wait_for_dcc( + timeout=1.0, + result_type=False, + expected_senders={"BookBot"}, + ) + + assert offer is not None + assert offer.filename == "book.epub" + + +def test_wait_for_dcc_ignores_unsafe_offer_and_keeps_waiting(monkeypatch) -> None: + client = IRCClient(nick="reader", server="irc.example.test", port=6697) + client.online_servers = {"BookBot"} + + messages = [ + IRCMessage( + raw=_dcc_raw("BookBot", "../outside.epub", "8.8.8.8"), + prefix="BookBot!user@example.test", + event=IRCEvent.BOOK_RESULT, + ), + IRCMessage( + raw=_dcc_raw("BookBot", "internal.epub", "127.0.0.1"), + prefix="BookBot!user@example.test", + event=IRCEvent.BOOK_RESULT, + ), + IRCMessage( + raw=_dcc_raw("BookBot", "book.epub", "8.8.8.8"), + prefix="BookBot!user@example.test", + event=IRCEvent.BOOK_RESULT, + ), + ] + monkeypatch.setattr(client, "read_messages", lambda: iter(messages)) + + offer = client.wait_for_dcc(timeout=1.0, result_type=False) + + assert offer is not None + assert offer.filename == "book.epub" diff --git a/tests/irc/test_dcc_security.py b/tests/irc/test_dcc_security.py new file mode 100644 index 0000000..250955a --- /dev/null +++ b/tests/irc/test_dcc_security.py @@ -0,0 +1,82 @@ +import socket + +import pytest + +from shelfmark.release_sources.irc.dcc import ( + DCCOffer, + DCCParseError, + DCCSecurityError, + download_dcc, + parse_dcc_send, + safe_dcc_filename, + validate_dcc_endpoint, +) + + +def test_safe_dcc_filename_allows_plain_filenames() -> None: + assert safe_dcc_filename("results.txt") == "results.txt" + assert safe_dcc_filename("Author - Title.epub") == "Author - Title.epub" + + +@pytest.mark.parametrize( + "filename", + [ + "", + ".", + "..", + "../outside.txt", + "/tmp/outside.txt", + r"..\outside.txt", + r"C:\temp\outside.txt", + ], +) +def test_safe_dcc_filename_rejects_paths(filename: str) -> None: + with pytest.raises(DCCSecurityError): + safe_dcc_filename(filename) + + +@pytest.mark.parametrize( + "ip", + [ + "127.0.0.1", + "10.0.0.1", + "172.16.0.1", + "192.168.1.1", + "169.254.169.254", + "0.0.0.0", + ], +) +def test_validate_dcc_endpoint_rejects_non_public_ips(ip: str) -> None: + with pytest.raises(DCCSecurityError): + validate_dcc_endpoint(DCCOffer(filename="book.epub", ip=ip, port=1234, size=1)) + + +@pytest.mark.parametrize("port", [0, 65536]) +def test_validate_dcc_endpoint_rejects_invalid_ports(port: int) -> None: + with pytest.raises(DCCSecurityError): + validate_dcc_endpoint(DCCOffer(filename="book.epub", ip="8.8.8.8", port=port, size=1)) + + +def test_validate_dcc_endpoint_allows_public_endpoint() -> None: + validate_dcc_endpoint(DCCOffer(filename="book.epub", ip="8.8.8.8", port=443, size=1)) + + +def test_parse_dcc_send_rejects_out_of_range_ip_integer() -> None: + with pytest.raises(DCCParseError): + parse_dcc_send('DCC SEND "book.epub" 999999999999999999 443 1') + + +def test_download_dcc_rejects_private_endpoint_before_socket_connect( + monkeypatch, + tmp_path, +) -> None: + def fail_socket(*_args: object, **_kwargs: object) -> socket.socket: + raise AssertionError("download should reject the endpoint before opening a socket") + + monkeypatch.setattr(socket, "socket", fail_socket) + + with pytest.raises(DCCSecurityError): + download_dcc( + DCCOffer(filename="book.epub", ip="127.0.0.1", port=1234, size=1), + tmp_path / "book.epub", + )