Validate IRC DCC offers before download (#964)

This commit is contained in:
Alex
2026-05-09 13:19:22 +01:00
committed by GitHub
parent fbbff8f715
commit f5fafd2265
6 changed files with 268 additions and 12 deletions

View File

@@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Self
from shelfmark.core.logger import setup_logger
from .dcc import DCCOffer, parse_dcc_send
from .dcc import DCCError, DCCOffer, parse_dcc_send, validate_dcc_endpoint
if TYPE_CHECKING:
from collections.abc import Iterator
@@ -400,6 +400,33 @@ class IRCClient:
self.send_notice(sender, f"\x01VERSION {self.version}\x01")
logger.debug("Sent VERSION to %s", sender)
@staticmethod
def _sender_nick(msg: IRCMessage) -> str | None:
"""Extract the nick from a message prefix."""
if not msg.prefix:
return None
return msg.prefix.split("!", maxsplit=1)[0]
def _is_allowed_dcc_sender(
self,
msg: IRCMessage,
expected_senders: set[str] | None,
) -> bool:
allowed_senders = expected_senders or self.online_servers
if not allowed_senders:
return True
sender = self._sender_nick(msg)
if sender is None:
logger.warning("Ignoring DCC offer without sender prefix")
return False
normalized_allowed = {nick.casefold() for nick in allowed_senders}
if sender.casefold() not in normalized_allowed:
logger.warning("Ignoring DCC offer from unexpected sender: %s", sender)
return False
return True
def read_messages(self, *, auto_handle: bool = True) -> Iterator[IRCMessage]:
"""Read and yield IRC messages, optionally auto-handling PING/VERSION."""
for line in self._recv_lines():
@@ -422,6 +449,7 @@ class IRCClient:
timeout: float = 60.0,
*,
result_type: bool = False,
expected_senders: set[str] | None = None,
) -> DCCOffer | None:
"""Wait for a DCC SEND offer. Returns None on timeout or no results."""
target_event = IRCEvent.SEARCH_RESULT if result_type else IRCEvent.BOOK_RESULT
@@ -433,12 +461,15 @@ class IRCClient:
return None
if msg.event == target_event:
if not self._is_allowed_dcc_sender(msg, expected_senders):
continue
try:
offer = parse_dcc_send(msg.raw)
validate_dcc_endpoint(offer)
logger.info("Received DCC offer: %s", offer.filename)
except Exception:
logger.exception("Failed to parse DCC")
return None
except DCCError:
logger.exception("Rejected DCC offer")
continue
else:
return offer

View File

@@ -7,6 +7,8 @@ import re
import socket
import struct
from dataclasses import dataclass
from ipaddress import ip_address
from pathlib import PureWindowsPath
from typing import TYPE_CHECKING
from shelfmark.core.logger import setup_logger
@@ -59,6 +61,10 @@ class DCCConnectionError(DCCError):
"""Failed to connect to DCC sender."""
class DCCSecurityError(DCCError):
"""Rejected unsafe DCC offer metadata."""
def int_to_ip(ip_int: int) -> str:
"""Convert 32-bit integer (DCC format) to dotted IP notation."""
packed = struct.pack(">I", ip_int)
@@ -76,15 +82,53 @@ def parse_dcc_send(text: str) -> DCCOffer:
ip_int = int(match.group(2))
port = int(match.group(3))
size = int(match.group(4))
try:
ip = int_to_ip(ip_int)
except struct.error as e:
msg = f"Invalid DCC IP integer: {ip_int}"
raise DCCParseError(msg) from e
return DCCOffer(
filename=filename,
ip=int_to_ip(ip_int),
filename=safe_dcc_filename(filename),
ip=ip,
port=port,
size=size,
)
def safe_dcc_filename(filename: str) -> str:
"""Return a DCC filename that cannot escape its destination directory."""
safe_name = filename.strip()
windows_path = PureWindowsPath(safe_name)
if (
not safe_name
or safe_name in {".", ".."}
or "/" in safe_name
or "\\" in safe_name
or windows_path.drive
):
msg = f"Rejected unsafe DCC filename: {filename!r}"
raise DCCSecurityError(msg)
return safe_name
def validate_dcc_endpoint(offer: DCCOffer) -> None:
"""Reject DCC endpoints that can target local/internal network services."""
if not 1 <= offer.port <= 65535:
msg = f"Rejected invalid DCC port: {offer.port}"
raise DCCSecurityError(msg)
try:
address = ip_address(offer.ip)
except ValueError as e:
msg = f"Rejected invalid DCC IP address: {offer.ip}"
raise DCCSecurityError(msg) from e
if not address.is_global:
msg = f"Rejected non-public DCC endpoint: {offer.ip}"
raise DCCSecurityError(msg)
def download_dcc(
offer: DCCOffer,
dest_path: Path,
@@ -93,6 +137,7 @@ def download_dcc(
timeout: float = 30.0,
) -> None:
"""Download file via DCC protocol to dest_path. Raises DCCError on failure."""
validate_dcc_endpoint(offer)
logger.info("DCC connecting to %s:%s for %s", offer.ip, offer.port, offer.filename)
try:

View File

@@ -12,7 +12,7 @@ from shelfmark.core.logger import setup_logger
from shelfmark.release_sources import DownloadHandler, register_handler
from .connection_manager import connection_manager
from .dcc import DCCError, download_dcc
from .dcc import DCCError, download_dcc, safe_dcc_filename
if TYPE_CHECKING:
from collections.abc import Callable
@@ -23,6 +23,15 @@ if TYPE_CHECKING:
logger = setup_logger(__name__)
def _server_from_download_request(download_request: str) -> str | None:
"""Extract the expected IRC bot nick from a release request line."""
stripped = download_request.strip()
if not stripped.startswith("!"):
return None
server = stripped[1:].split(maxsplit=1)[0]
return server or None
def _config_text(key: str) -> str:
"""Read a string config value with whitespace trimmed."""
value = config.get(key, "")
@@ -72,6 +81,7 @@ class IRCDownloadHandler(DownloadHandler):
"""Download a release via IRC DCC. task.task_id contains the IRC request string."""
download_request = task.task_id
logger.info("IRC download: %s...", download_request[:60])
expected_server = _server_from_download_request(download_request)
# Get IRC settings
server = _config_text("IRC_SERVER")
@@ -123,7 +133,8 @@ class IRCDownloadHandler(DownloadHandler):
# Phase 3: Wait for DCC offer
status_callback("resolving", "Waiting for bot response")
offer = client.wait_for_dcc(timeout=120.0, result_type=False)
wait_kwargs = {"expected_senders": {expected_server}} if expected_server else {}
offer = client.wait_for_dcc(timeout=120.0, result_type=False, **wait_kwargs)
if not offer:
status_callback("error", "No response from bot")
@@ -137,7 +148,9 @@ class IRCDownloadHandler(DownloadHandler):
status_callback("downloading", "")
# Get file extension from offer filename
ext = Path(offer.filename).suffix.lstrip(".") or task.format or "epub"
ext = (
Path(safe_dcc_filename(offer.filename)).suffix.lstrip(".") or task.format or "epub"
)
# Stage to temp directory (lazy import to avoid circular import)
from shelfmark.download.staging import get_staging_path

View File

@@ -30,7 +30,7 @@ from shelfmark.release_sources import (
)
from .connection_manager import connection_manager
from .dcc import DCCError, download_dcc
from .dcc import DCCError, download_dcc, safe_dcc_filename
from .parser import SearchResult, extract_results_from_zip, parse_results_file
logger = setup_logger(__name__)
@@ -227,7 +227,8 @@ class IRCReleaseSource(ReleaseSource):
# Wait for results DCC - this is the long wait
_emit_status(f"Connected to #{channel} - Waiting for results...", phase="searching")
offer = client.wait_for_dcc(timeout=60.0, result_type=True)
wait_kwargs = {"expected_senders": {search_bot}} if search_bot else {}
offer = client.wait_for_dcc(timeout=60.0, result_type=True, **wait_kwargs)
if not offer:
logger.info("No search results received")
_emit_status("No results found", phase="complete")
@@ -247,7 +248,7 @@ class IRCReleaseSource(ReleaseSource):
# Download results file
_emit_status(f"Connected to #{channel} - Downloading results...", phase="downloading")
with tempfile.TemporaryDirectory() as tmpdir:
result_path = Path(tmpdir) / offer.filename
result_path = Path(tmpdir) / safe_dcc_filename(offer.filename)
download_dcc(offer, result_path, timeout=30.0)
# Parse results

View File

@@ -0,0 +1,84 @@
from ipaddress import IPv4Address
from shelfmark.release_sources.irc.client import IRCClient, IRCEvent, IRCMessage
def _dcc_raw(sender: str, filename: str, ip: str, port: int = 443) -> str:
ip_int = int(IPv4Address(ip))
return f':{sender}!user@example.test PRIVMSG reader :\x01DCC SEND "{filename}" {ip_int} {port} 1\x01'
def test_wait_for_dcc_ignores_unexpected_sender(monkeypatch) -> None:
client = IRCClient(nick="reader", server="irc.example.test", port=6697)
client.online_servers = {"BookBot"}
messages = [
IRCMessage(
raw=_dcc_raw("Mallory", "evil.epub", "8.8.8.8"),
prefix="Mallory!user@example.test",
event=IRCEvent.BOOK_RESULT,
),
IRCMessage(
raw=_dcc_raw("BookBot", "book.epub", "8.8.8.8"),
prefix="BookBot!user@example.test",
event=IRCEvent.BOOK_RESULT,
),
]
monkeypatch.setattr(client, "read_messages", lambda: iter(messages))
offer = client.wait_for_dcc(timeout=1.0, result_type=False)
assert offer is not None
assert offer.filename == "book.epub"
def test_wait_for_dcc_uses_expected_sender_over_online_server_list(monkeypatch) -> None:
client = IRCClient(nick="reader", server="irc.example.test", port=6697)
client.online_servers = {"OtherBot"}
messages = [
IRCMessage(
raw=_dcc_raw("BookBot", "book.epub", "8.8.8.8"),
prefix="BookBot!user@example.test",
event=IRCEvent.BOOK_RESULT,
)
]
monkeypatch.setattr(client, "read_messages", lambda: iter(messages))
offer = client.wait_for_dcc(
timeout=1.0,
result_type=False,
expected_senders={"BookBot"},
)
assert offer is not None
assert offer.filename == "book.epub"
def test_wait_for_dcc_ignores_unsafe_offer_and_keeps_waiting(monkeypatch) -> None:
client = IRCClient(nick="reader", server="irc.example.test", port=6697)
client.online_servers = {"BookBot"}
messages = [
IRCMessage(
raw=_dcc_raw("BookBot", "../outside.epub", "8.8.8.8"),
prefix="BookBot!user@example.test",
event=IRCEvent.BOOK_RESULT,
),
IRCMessage(
raw=_dcc_raw("BookBot", "internal.epub", "127.0.0.1"),
prefix="BookBot!user@example.test",
event=IRCEvent.BOOK_RESULT,
),
IRCMessage(
raw=_dcc_raw("BookBot", "book.epub", "8.8.8.8"),
prefix="BookBot!user@example.test",
event=IRCEvent.BOOK_RESULT,
),
]
monkeypatch.setattr(client, "read_messages", lambda: iter(messages))
offer = client.wait_for_dcc(timeout=1.0, result_type=False)
assert offer is not None
assert offer.filename == "book.epub"

View File

@@ -0,0 +1,82 @@
import socket
import pytest
from shelfmark.release_sources.irc.dcc import (
DCCOffer,
DCCParseError,
DCCSecurityError,
download_dcc,
parse_dcc_send,
safe_dcc_filename,
validate_dcc_endpoint,
)
def test_safe_dcc_filename_allows_plain_filenames() -> None:
assert safe_dcc_filename("results.txt") == "results.txt"
assert safe_dcc_filename("Author - Title.epub") == "Author - Title.epub"
@pytest.mark.parametrize(
"filename",
[
"",
".",
"..",
"../outside.txt",
"/tmp/outside.txt",
r"..\outside.txt",
r"C:\temp\outside.txt",
],
)
def test_safe_dcc_filename_rejects_paths(filename: str) -> None:
with pytest.raises(DCCSecurityError):
safe_dcc_filename(filename)
@pytest.mark.parametrize(
"ip",
[
"127.0.0.1",
"10.0.0.1",
"172.16.0.1",
"192.168.1.1",
"169.254.169.254",
"0.0.0.0",
],
)
def test_validate_dcc_endpoint_rejects_non_public_ips(ip: str) -> None:
with pytest.raises(DCCSecurityError):
validate_dcc_endpoint(DCCOffer(filename="book.epub", ip=ip, port=1234, size=1))
@pytest.mark.parametrize("port", [0, 65536])
def test_validate_dcc_endpoint_rejects_invalid_ports(port: int) -> None:
with pytest.raises(DCCSecurityError):
validate_dcc_endpoint(DCCOffer(filename="book.epub", ip="8.8.8.8", port=port, size=1))
def test_validate_dcc_endpoint_allows_public_endpoint() -> None:
validate_dcc_endpoint(DCCOffer(filename="book.epub", ip="8.8.8.8", port=443, size=1))
def test_parse_dcc_send_rejects_out_of_range_ip_integer() -> None:
with pytest.raises(DCCParseError):
parse_dcc_send('DCC SEND "book.epub" 999999999999999999 443 1')
def test_download_dcc_rejects_private_endpoint_before_socket_connect(
monkeypatch,
tmp_path,
) -> None:
def fail_socket(*_args: object, **_kwargs: object) -> socket.socket:
raise AssertionError("download should reject the endpoint before opening a socket")
monkeypatch.setattr(socket, "socket", fail_socket)
with pytest.raises(DCCSecurityError):
download_dcc(
DCCOffer(filename="book.epub", ip="127.0.0.1", port=1234, size=1),
tmp_path / "book.epub",
)