diff --git a/shelfmark/config/settings.py b/shelfmark/config/settings.py index 9340293..0e5076e 100644 --- a/shelfmark/config/settings.py +++ b/shelfmark/config/settings.py @@ -1448,6 +1448,17 @@ def download_source_settings() -> list[SettingsField]: ), default=False, ), + CheckboxField( + key="DIRECT_DOWNLOAD_LANGUAGE_FROM_PATH", + label="Detect Language From Distant Path", + description=( + "When language metadata is missing or unknown, parse the distant path " + "(file path shown in search results) for language tags like [BD FR] or [En]. " + "Also enables local language filtering so lgli files without AA language " + "metadata are not excluded before the distant path can be checked." + ), + default=False, + ), PasswordField( key="AA_DONATOR_KEY", label="Account Donator Key", diff --git a/shelfmark/core/notifications.py b/shelfmark/core/notifications.py index a4b2a1c..fc7a5e2 100644 --- a/shelfmark/core/notifications.py +++ b/shelfmark/core/notifications.py @@ -393,6 +393,41 @@ def _plugin_label(plugin: object, fallback_scheme: str) -> str: return " ".join(parts) +def _apprise_proxy_env() -> dict[str, str]: + """Build proxy env vars from app config so Apprise respects the proxy setting.""" + import os + + from shelfmark.core.config import config as _cfg + + mode = str(_cfg.get("PROXY_MODE", "") or "").lower() + env: dict[str, str] = {} + + if mode == "http": + http = str(_cfg.get("HTTP_PROXY", "") or "").strip() + https = str(_cfg.get("HTTPS_PROXY", "") or "").strip() or http + if http: + env["HTTP_PROXY"] = http + env["http_proxy"] = http + if https: + env["HTTPS_PROXY"] = https + env["https_proxy"] = https + elif mode == "socks5": + socks = str(_cfg.get("SOCKS5_PROXY", "") or "").strip() + if socks: + env["HTTP_PROXY"] = socks + env["http_proxy"] = socks + env["HTTPS_PROXY"] = socks + env["https_proxy"] = socks + + no_proxy = str(_cfg.get("NO_PROXY", "") or "").strip() + if no_proxy and env: + env["NO_PROXY"] = no_proxy + env["no_proxy"] = no_proxy + + # Don't override if the user already set these in the environment directly + return {k: v for k, v in env.items() if not os.environ.get(k)} + + def _dispatch_to_apprise( urls: Iterable[str], *, @@ -400,6 +435,8 @@ def _dispatch_to_apprise( body: str, notify_type: object, ) -> dict[str, Any]: + import os + normalized_urls = _normalize_urls(list(urls)) url_schemes = _extract_url_schemes(normalized_urls) if not normalized_urls: @@ -408,6 +445,11 @@ def _dispatch_to_apprise( if apprise is None: return {"success": False, "message": "Apprise is not installed"} + proxy_env = _apprise_proxy_env() + if proxy_env: + logger.debug("Applying proxy env for Apprise dispatch: %s", list(proxy_env.keys())) + os.environ.update(proxy_env) + valid_urls = 0 invalid_urls = 0 delivered_urls = 0 diff --git a/shelfmark/core/utils.py b/shelfmark/core/utils.py index 319d429..a012bf4 100644 --- a/shelfmark/core/utils.py +++ b/shelfmark/core/utils.py @@ -52,6 +52,13 @@ def normalize_http_url( if scheme: normalized = f"{scheme}://{normalized}" + # Strip query string and fragment — mirrors are used as base URLs for + # constructing search requests; params/fragments on the configured URL + # produce malformed URLs when paths are appended (issue #999). + parsed = urlparse(normalized) + if parsed.query or parsed.fragment: + normalized = parsed._replace(query="", fragment="").geturl() + if strip_trailing_slash: normalized = normalized.rstrip("/") diff --git a/shelfmark/download/clients/rtorrent.py b/shelfmark/download/clients/rtorrent.py index 8088faa..35098c8 100644 --- a/shelfmark/download/clients/rtorrent.py +++ b/shelfmark/download/clients/rtorrent.py @@ -115,6 +115,7 @@ class RTorrentClient(DownloadClient): self._rpc = _create_rtorrent_server_proxy(self._base_url) self._download_dir = config_text(config.get("RTORRENT_DOWNLOAD_DIR", "")) self._label = config_text(config.get("RTORRENT_LABEL", "")) + self._audiobook_label = config_text(config.get("RTORRENT_AUDIOBOOK_LABEL", "")) @staticmethod def is_configured() -> bool: @@ -161,7 +162,11 @@ class RTorrentClient(DownloadClient): commands = [] - label = category or self._label + is_audiobook = kwargs.get("content_type") == "audiobook" + default_label = ( + self._audiobook_label if is_audiobook and self._audiobook_label else self._label + ) + label = category or default_label if label: logger.debug("Setting rTorrent label: %s", label) commands.append(f"d.custom1.set={label}") diff --git a/shelfmark/download/clients/settings.py b/shelfmark/download/clients/settings.py index 67a17b5..21084c2 100644 --- a/shelfmark/download/clients/settings.py +++ b/shelfmark/download/clients/settings.py @@ -748,11 +748,18 @@ def prowlarr_clients_settings() -> list[SettingsField]: TextField( key="RTORRENT_LABEL", label="Book Label", - description="Label to assign to book downloads in rTorrent", + description="Label to assign to ebook downloads in rTorrent", placeholder="cwabd", default="cwabd", show_when={"field": "PROWLARR_TORRENT_CLIENT", "value": "rtorrent"}, ), + TextField( + key="RTORRENT_AUDIOBOOK_LABEL", + label="Audiobook Label", + description="Label to assign to audiobook downloads in rTorrent (falls back to Book Label if not set)", + placeholder="audiobooks", + show_when={"field": "PROWLARR_TORRENT_CLIENT", "value": "rtorrent"}, + ), TextField( key="RTORRENT_DOWNLOAD_DIR", label="Download Directory", diff --git a/shelfmark/download/postprocess/destination.py b/shelfmark/download/postprocess/destination.py index 76c875f..c44add9 100644 --- a/shelfmark/download/postprocess/destination.py +++ b/shelfmark/download/postprocess/destination.py @@ -2,6 +2,7 @@ from __future__ import annotations +import contextlib import uuid from typing import TYPE_CHECKING @@ -40,9 +41,11 @@ def validate_destination( status_callback("error", f"Destination is not a directory: {destination}") return False + created_by_us = False if not destination_exists: try: run_blocking_io(destination.mkdir, parents=True, exist_ok=True) + created_by_us = True except (OSError, PermissionError) as exc: log_path_permission_context("destination_create", destination) logger.warning("Cannot create destination: %s (%s)", destination, exc) @@ -63,6 +66,9 @@ def validate_destination( log_path_permission_context("destination_write_probe", destination) logger.warning("Destination not writable: %s (%s)", destination, exc) status_callback("error", f"Destination not writable: {destination} ({exc})") + if created_by_us: + with contextlib.suppress(OSError): + run_blocking_io(destination.rmdir) return False return True diff --git a/shelfmark/release_sources/audiobookbay/scraper.py b/shelfmark/release_sources/audiobookbay/scraper.py index 56468ba..1faeb4a 100644 --- a/shelfmark/release_sources/audiobookbay/scraper.py +++ b/shelfmark/release_sources/audiobookbay/scraper.py @@ -417,6 +417,18 @@ def extract_magnet_link(details_url: str, hostname: str = "audiobookbay.lu") -> # Clean up info hash (remove whitespace, ensure uppercase) info_hash = re.sub(r"\s+", "", info_hash).upper() + # Validate: SHA1 = 40 hex chars, SHA256 = 64 hex chars + if not re.match(r"^[0-9A-F]{40}$|^[0-9A-F]{64}$", info_hash): + logger.warning("Info Hash invalid (got %r), trying magnet fallback.", info_hash) + # Fallback: search entire page for a complete magnet link (e.g. posted in comments) + magnet_match = re.search(r"magnet:\?xt=urn:btih:([0-9a-fA-F]{40,64})", detail_html) + if magnet_match: + info_hash = magnet_match.group(1).upper() + logger.info("Found hash via magnet fallback: %s", info_hash) + else: + logger.warning("No valid magnet link found on page, giving up.") + return None + # 2. Extract Trackers # Find all containing udp:// or http:// trackers = [] diff --git a/shelfmark/release_sources/audiobookbay/source.py b/shelfmark/release_sources/audiobookbay/source.py index f7ad32b..66700bf 100644 --- a/shelfmark/release_sources/audiobookbay/source.py +++ b/shelfmark/release_sources/audiobookbay/source.py @@ -238,8 +238,8 @@ class AudiobookBaySource(ReleaseSource): exact_phrase=exact_phrase, ) - # For auto-generated queries, fallback to broad matching if exact phrase returns nothing. - if exact_phrase and not results and not plan.manual_query: + # Fallback to broad matching if exact phrase returns nothing (manual or auto query). + if exact_phrase and not results: logger.info( "No exact phrase results, retrying AudiobookBay search without quotes" ) @@ -288,7 +288,7 @@ class AudiobookBaySource(ReleaseSource): size_str = result.get("size") size_bytes = parse_size(size_str) if size_str else None language_raw = result.get("language") - language_code = _map_language(language_raw) if language_raw else None + language_code = _map_language(language_raw) if language_raw else "en" bitrate = result.get("bitrate") bitrate_kbps = _parse_bitrate_to_kbps(bitrate) diff --git a/shelfmark/release_sources/direct_download.py b/shelfmark/release_sources/direct_download.py index 306e064..2d88900 100644 --- a/shelfmark/release_sources/direct_download.py +++ b/shelfmark/release_sources/direct_download.py @@ -3,9 +3,12 @@ import itertools import json import re +import threading import time +import unicodedata from dataclasses import replace from http import HTTPStatus +from pathlib import Path from typing import TYPE_CHECKING, ClassVar, NoReturn, TypedDict from urllib.parse import quote, urlparse @@ -197,6 +200,49 @@ _SOURCE_FAILURE_THRESHOLD = 4 _MIN_VALID_FILE_SIZE = 10 * 1024 _AA_COUNTDOWN_MAX_SECONDS = 300 +# --- Distant-path language detection --- + +_DISTANT_PATH_EXTENSIONS = ( + "epub", + "mobi", + "azw3", + "fb2", + "djvu", + "cbz", + "cbr", + "pdf", + "zip", + "rar", + "m4b", + "mp3", +) +_DISTANT_PATH_EXTENSION_PATTERN = "|".join(re.escape(e) for e in _DISTANT_PATH_EXTENSIONS) +_DISTANT_PATH_PATTERN = re.compile( + rf"(?:[A-Za-z0-9._-]+/)?[A-Za-z]:(?:\\|/)[^\n\r<>\"]+?\.(?:{_DISTANT_PATH_EXTENSION_PATTERN})\b", + re.IGNORECASE, +) +_DISTANT_PATH_FALLBACK_PATTERN = re.compile( + r"(?:[A-Za-z0-9._-]+/)?[A-Za-z]:(?:\\|/)[^\n\r<>\"]+", + re.IGNORECASE, +) +_BRACKETED_LANGUAGE_CODE_PATTERN = re.compile( + r"\[(?:bd[\s._-]*)?([A-Za-z]{2,3})\]", + re.IGNORECASE, +) +_KEYED_LANGUAGE_CODE_PATTERN = re.compile( + r"\b(?:bd|lang(?:uage)?)\s*[:._-]?\s*([A-Za-z]{2,3})\b", + re.IGNORECASE, +) +_LANGUAGE_CODE_TOKEN_PATTERN = re.compile( + r"(?:^|[\s_./\\\-\[(])([A-Za-z]{2,3})(?=$|[\s_./\\\-)\]])" +) +_LANGUAGE_NAME_TOKEN_PATTERN = re.compile(r"[a-z]{4,}(?:-[a-z0-9]+)?") +_LANGUAGE_ALIAS_TO_CODE: dict[str, str] | None = None +_LANGUAGE_ALIAS_LOCK = threading.Lock() +_LANGUAGE_PLACEHOLDERS = frozenset({"", "-", "--", "unknown", "unk", "n/a", "na"}) +# Short codes that appear in common words — require bracket/key context to accept +_AMBIGUOUS_SHORT_LANGUAGE_CODES = frozenset({"de", "en", "it", "la", "no", "or", "is", "in"}) + # Sources that require Cloudflare bypass _CF_BYPASS_REQUIRED = frozenset({"aa-slow-nowait", "aa-slow-wait", "zlib", "welib"}) @@ -204,6 +250,189 @@ _CF_BYPASS_REQUIRED = frozenset({"aa-slow-nowait", "aa-slow-wait", "zlib", "weli _AA_PAGE_SOURCES = frozenset({"aa-slow-nowait", "aa-slow-wait"}) +def _is_language_from_path_enabled() -> bool: + return bool(config.get("DIRECT_DOWNLOAD_LANGUAGE_FROM_PATH", False)) + + +def _normalize_language_token(value: str) -> str: + normalized = value.strip().lower() + for dash in ("‑", "–", "—", "−"): + normalized = normalized.replace(dash, "-") + return normalized + + +def _fold_text(value: str) -> str: + normalized = unicodedata.normalize("NFKD", value) + return "".join(c for c in normalized if not unicodedata.combining(c)).lower() + + +def _language_alias_to_code() -> dict[str, str]: + """Build alias→code map from bundled language metadata (lazy, cached).""" + global _LANGUAGE_ALIAS_TO_CODE + cached = _LANGUAGE_ALIAS_TO_CODE + if cached is not None: + return cached + + with _LANGUAGE_ALIAS_LOCK: + cached = _LANGUAGE_ALIAS_TO_CODE + if cached is not None: + return cached + + mapping: dict[str, str] = {} + data_path = Path(__file__).resolve().parents[2] / "data" / "book-languages.json" + + try: + raw = json.loads(data_path.read_text(encoding="utf-8")) + except OSError, ValueError, TypeError: + _LANGUAGE_ALIAS_TO_CODE = {} + return _LANGUAGE_ALIAS_TO_CODE + + if not isinstance(raw, list): + _LANGUAGE_ALIAS_TO_CODE = {} + return _LANGUAGE_ALIAS_TO_CODE + + for item in raw: + if not isinstance(item, dict): + continue + code = _normalize_language_token(str(item.get("code", ""))) + name = _normalize_language_token(str(item.get("language", ""))) + if not code: + continue + mapping.setdefault(code, code) + mapping.setdefault(code.replace("-", "_"), code) + mapping.setdefault(code.split("-")[0], code) + mapping.setdefault(_fold_text(code), code) + if name: + mapping.setdefault(name, code) + mapping.setdefault(_fold_text(name), code) + + _LANGUAGE_ALIAS_TO_CODE = mapping + return _LANGUAGE_ALIAS_TO_CODE + + +def _extract_distant_path(row: Tag, *, enabled: bool) -> str | None: + """Extract the Windows-style file path from an AA search result row.""" + if not enabled: + return None + + def _normalize_candidate(text: str) -> str: + normalized = re.sub(r"\s*([\\/])\s*", r"\1", text) + normalized = re.sub(r":\s*([\\/])", r":\1", normalized) + return re.sub( + r"\s+\.(epub|mobi|azw3|fb2|djvu|cbz|cbr|pdf|zip|rar|m4b|mp3)\b", + r".\1", + normalized, + flags=re.IGNORECASE, + ) + + candidates = [row.get_text(" ", strip=True)] + for cell in row.find_all("td"): + cell_text = cell.get_text(" ", strip=True) + if cell_text: + candidates.append(cell_text) + + best: str | None = None + for text in candidates: + for match in _DISTANT_PATH_PATTERN.findall(_normalize_candidate(text)): + candidate = match.strip().rstrip(".,;") + if best is None or len(candidate) > len(best): + best = candidate + + if best is not None: + return best + + for text in candidates: + for match in _DISTANT_PATH_FALLBACK_PATTERN.findall(_normalize_candidate(text)): + candidate = match.strip().rstrip(".,;") + if best is None or len(candidate) > len(best): + best = candidate + + return best + + +def _detect_language_from_distant_path(path: str | None) -> str | None: + """Infer a language code from distant-path tags such as [BD FR] or [Fr].""" + if not path: + return None + + aliases = _language_alias_to_code() + if not aliases: + return None + + folded_path = _fold_text(path) + strong_candidates: list[str] = [] + + for code in _BRACKETED_LANGUAGE_CODE_PATTERN.findall(path): + normalized = _normalize_language_token(code) + if normalized in aliases: + strong_candidates.append(aliases[normalized]) + + for code in _KEYED_LANGUAGE_CODE_PATTERN.findall(path): + normalized = _normalize_language_token(code) + if normalized in aliases: + strong_candidates.append(aliases[normalized]) + + non_ambiguous = [c for c in strong_candidates if c not in _AMBIGUOUS_SHORT_LANGUAGE_CODES] + if non_ambiguous: + return non_ambiguous[0] + + for token in _LANGUAGE_NAME_TOKEN_PATTERN.findall(folded_path): + normalized = _normalize_language_token(token) + if normalized in aliases: + candidate = aliases[normalized] + if candidate not in _AMBIGUOUS_SHORT_LANGUAGE_CODES: + return candidate + + if strong_candidates: + return strong_candidates[0] + + for code in _LANGUAGE_CODE_TOKEN_PATTERN.findall(path): + normalized = _normalize_language_token(code) + if normalized in _AMBIGUOUS_SHORT_LANGUAGE_CODES: + continue + if normalized in aliases: + return aliases[normalized] + + return None + + +def _is_missing_or_placeholder_language(language: str | None) -> bool: + if language is None: + return True + return _normalize_language_token(language) in _LANGUAGE_PLACEHOLDERS + + +def _normalize_requested_languages(languages: list[str] | None) -> set[str]: + if not languages: + return set() + aliases = _language_alias_to_code() + normalized: set[str] = set() + for value in languages: + token = _normalize_language_token(str(value)) + if not token or token == "all": # noqa: S105 - "all" is a language sentinel + continue + normalized.add(aliases.get(token, token)) + return normalized + + +def _book_matches_requested_languages(book_language: str | None, requested: set[str]) -> bool: + """Return True when a book's language matches the requested filter. + + Books with unknown/missing language always pass — the server-side &lang= filter + already narrowed the result set, so dropping unlabelled rows hides valid results. + """ + if not requested: + return True + if not book_language: + return True + aliases = _language_alias_to_code() + normalized_book = aliases.get( + _normalize_language_token(book_language), + _normalize_language_token(book_language), + ) + return normalized_book in requested + + def _is_configured_zlib_link(url: str) -> bool: """Return True when a URL belongs to a configured Z-Library mirror.""" from shelfmark.core.mirrors import get_zlib_cookie_domains @@ -360,9 +589,17 @@ def search_books(query: str, filters: SearchFilters) -> list[BrowseRecord]: filters_query = "" - for value in filters.lang or []: - if value and value != "all": - filters_query += f"&lang={quote(value)}" + path_language_enabled = _is_language_from_path_enabled() + requested_langs = _normalize_requested_languages(filters.lang) + + # When path-language inference is on and a language is requested, skip the + # server-side &lang= filter: lgli files often have no AA language metadata + # and would be excluded before we can infer language from the distant path. + # Local filtering below handles the narrowing instead. + if not (path_language_enabled and requested_langs): + for value in filters.lang or []: + if value and value != "all": + filters_query += f"&lang={quote(value)}" if filters.sort and filters.sort != "relevance": filters_query += f"&sort={quote(filters.sort)}" @@ -417,6 +654,9 @@ def search_books(query: str, filters: SearchFilters) -> list[BrowseRecord]: if book: books.append(book) + if path_language_enabled and requested_langs: + books = [b for b in books if _book_matches_requested_languages(b.language, requested_langs)] + supported_formats = _get_supported_formats() books.sort( @@ -470,10 +710,23 @@ def _parse_search_result_row(row: Tag) -> BrowseRecord | None: if not record_id: return None + path_language_enabled = _is_language_from_path_enabled() + distant_path = _extract_distant_path(row, enabled=path_language_enabled) + preview_img = cells[0].find("img") preview = _get_attr(preview_img, "src") if isinstance(preview_img, Tag) else None - title = _first_stripped_text(cells[1].find("span")) + title_span = cells[1].find("span") + if isinstance(title_span, Tag): + # AA nests related-edition spans inside the main title span — take only direct text. + direct = " ".join( + str(c).strip() + for c in title_span.children + if isinstance(c, NavigableString) and str(c).strip() + ).strip() + title = direct or _first_stripped_text(title_span) + else: + title = None author = _first_stripped_text(cells[2].find("span")) publisher = _first_stripped_text(cells[3].find("span")) year = _first_stripped_text(cells[4].find("span")) @@ -482,18 +735,19 @@ def _parse_search_result_row(row: Tag) -> BrowseRecord | None: file_format = _first_stripped_text(cells[9].find("span")) size = _first_stripped_text(cells[10].find("span")) - if ( - title is None - or author is None - or publisher is None - or year is None - or language is None - or content is None - or file_format is None - or size is None - ): + # Only title and format are truly required — lgli rows often have sparse metadata + if title is None or file_format is None: return None + # Skip entries where the title is a catalog format descriptor, not a real title + # e.g. "Book/Online Audio", "Print book" — lgli metadata pollution + if title and "/" in title and len(title) < 40 and not any(c.isdigit() for c in title): + return None + + if path_language_enabled and _is_missing_or_placeholder_language(language): + detected = _detect_language_from_distant_path(distant_path) + language = detected or "unknown" + return BrowseRecord( id=record_id, title=title, @@ -506,6 +760,7 @@ def _parse_search_result_row(row: Tag) -> BrowseRecord | None: content=content.lower() if content else None, format=file_format.lower() if file_format else None, size=size, + download_path=distant_path, ) except (AttributeError, IndexError, KeyError, TypeError) as e: logger.error_trace(f"Error parsing search result row: {e}") @@ -1228,6 +1483,9 @@ def _get_download_url( return downloader.get_absolute_url(link, url) +_AA_COUNTDOWN_MAX_RETRIES = 3 + + def _extract_slow_download_url( soup: BeautifulSoup, link: str, @@ -1236,6 +1494,7 @@ def _extract_slow_download_url( status_callback: Callable[[str, str | None], None] | None, selector: network.AAMirrorSelector, source_context: str | None = None, + _countdown_attempts: int = 0, ) -> str: """Extract download URL from AA slow download pages.""" html_str = str(soup) @@ -1300,6 +1559,14 @@ def _extract_slow_download_url( countdown_seconds = _extract_countdown_seconds(soup, html_str) if countdown_seconds > 0: + if _countdown_attempts >= _AA_COUNTDOWN_MAX_RETRIES: + logger.warning( + "Countdown retry limit (%s) reached for %s, giving up", + _AA_COUNTDOWN_MAX_RETRIES, + title, + ) + return "" + max_countdown_seconds = 600 sleep_time = min(countdown_seconds, max_countdown_seconds) if countdown_seconds > max_countdown_seconds: @@ -1308,7 +1575,13 @@ def _extract_slow_download_url( countdown_seconds, max_countdown_seconds, ) - logger.info("AA waitlist: %ss for %s", sleep_time, title) + logger.info( + "AA waitlist: %ss for %s (attempt %s/%s)", + sleep_time, + title, + _countdown_attempts + 1, + _AA_COUNTDOWN_MAX_RETRIES, + ) # Live countdown with status updates for remaining in range(sleep_time, 0, -1): @@ -1329,8 +1602,21 @@ def _extract_slow_download_url( if status_callback and source_context: status_callback("resolving", f"{source_context} - Fetching") - return _get_download_url( - link, title, cancel_flag, status_callback, selector, source_context + html = downloader.html_get_page( + link, selector=selector, cancel_flag=cancel_flag, status_callback=status_callback + ) + if not html: + return "" + new_soup = BeautifulSoup(_html_response_text(html), "html.parser") + return _extract_slow_download_url( + new_soup, + link, + title, + cancel_flag, + status_callback, + selector, + source_context, + _countdown_attempts + 1, ) link_texts = [a.get_text(strip=True)[:50] for a in soup.find_all("a", href=True)[:10]] @@ -1645,7 +1931,6 @@ class DirectDownloadSource(ReleaseSource): except Exception: logger.exception("Search error") - logger.info("Found %s releases via title+author", len(all_results)) return [_browse_record_to_release(record) for record in all_results] def is_available(self) -> bool: diff --git a/src/frontend/src/App.tsx b/src/frontend/src/App.tsx index 367d440..a1990ee 100644 --- a/src/frontend/src/App.tsx +++ b/src/frontend/src/App.tsx @@ -1491,7 +1491,7 @@ function App() { const handleCancel = async (id: string) => { try { await cancelDownload(id); - await fetchStatus(); + await Promise.all([fetchStatus(), refreshActivitySnapshot()]); } catch (error) { console.error('Cancel failed:', error); showToast('Failed to cancel/clear download', 'error'); diff --git a/tests/audiobookbay/test_scraper.py b/tests/audiobookbay/test_scraper.py index c3370e8..074b1bc 100644 --- a/tests/audiobookbay/test_scraper.py +++ b/tests/audiobookbay/test_scraper.py @@ -58,7 +58,7 @@ SAMPLE_DETAIL_HTML = """ - + @@ -83,7 +83,7 @@ DETAIL_HTML_NO_TRACKERS = """
Info HashABC123DEF456GHI789JKL012MNO345PQR678STUABC123DEF456789012345678901234567890ABCD
Tracker 1
- +
Info HashABC123DEF456GHI789JKL012MNO345PQR678STUABC123DEF456789012345678901234567890ABCD
@@ -360,7 +360,7 @@ class TestExtractMagnetLink: assert magnet_link is not None assert magnet_link.startswith("magnet:?xt=urn:btih:") - assert "ABC123DEF456GHI789JKL012MNO345PQR678STU" in magnet_link + assert "ABC123DEF456789012345678901234567890ABCD" in magnet_link assert "udp%3A//tracker.openbittorrent.com%3A80" in magnet_link assert "http%3A//tracker.example.com%3A8080" in magnet_link assert mock_html_get.call_count == 2 @@ -395,7 +395,7 @@ class TestExtractMagnetLink: assert magnet_link is not None assert magnet_link.startswith("magnet:?xt=urn:btih:") - assert "ABC123DEF456GHI789JKL012MNO345PQR678STU" in magnet_link + assert "ABC123DEF456789012345678901234567890ABCD" in magnet_link # Should contain default trackers assert "udp%3A//tracker.openbittorrent.com%3A80" in magnet_link @@ -441,7 +441,7 @@ class TestExtractMagnetLink: - +
Info HashABC 123 DEF 456ABC 123 DEF 456 789 012 345 678 901 234 567 890 ABC D
diff --git a/tests/config/test_download_settings.py b/tests/config/test_download_settings.py index 556b1a1..f294371 100644 --- a/tests/config/test_download_settings.py +++ b/tests/config/test_download_settings.py @@ -368,6 +368,20 @@ def test_download_source_settings_include_direct_download_toggle(): assert "Add your own mirror URLs" in toggle_field.description +def test_download_source_settings_include_distant_path_language_toggle(): + from shelfmark.config.settings import download_source_settings + + fields = download_source_settings() + toggle_field = next( + field + for field in fields + if getattr(field, "key", None) == "DIRECT_DOWNLOAD_LANGUAGE_FROM_PATH" + ) + + assert toggle_field.default is False + assert "distant path" in toggle_field.description.lower() + + def test_fast_source_options_lock_entries_without_mirror_or_donator_requirements(monkeypatch): from shelfmark.config.settings import _get_fast_source_options diff --git a/tests/core/test_notifications.py b/tests/core/test_notifications.py index 50d084c..95edede 100644 --- a/tests/core/test_notifications.py +++ b/tests/core/test_notifications.py @@ -597,3 +597,72 @@ def test_resolve_user_routes_expands_multiselect_event_rows(monkeypatch): {"event": "request_fulfilled", "url": "ntfys://ntfy.sh/user-main"}, {"event": "all", "url": "ntfys://ntfy.sh/user-all"}, ] + + +class TestAppriseProxyEnv: + """Regression tests for issue #956 — proxy settings ignored for notifications.""" + + def _patch_config(self, monkeypatch, values): + from shelfmark.core import config as config_module + + def _fake_get(key, default="", **_kwargs): + return values.get(key, default) + + monkeypatch.setattr(config_module.config, "get", _fake_get) + + def test_http_proxy_mode_injects_proxy_env(self, monkeypatch): + self._patch_config( + monkeypatch, + { + "PROXY_MODE": "http", + "HTTP_PROXY": "http://proxy.example.com:8080", + "HTTPS_PROXY": "", + "NO_PROXY": "", + }, + ) + monkeypatch.delenv("HTTP_PROXY", raising=False) + monkeypatch.delenv("HTTPS_PROXY", raising=False) + + result = notifications_module._apprise_proxy_env() + + assert result["HTTP_PROXY"] == "http://proxy.example.com:8080" + assert result["HTTPS_PROXY"] == "http://proxy.example.com:8080" + + def test_socks5_proxy_mode_injects_socks_env(self, monkeypatch): + self._patch_config( + monkeypatch, + { + "PROXY_MODE": "socks5", + "SOCKS5_PROXY": "socks5://proxy.example.com:1080", + "NO_PROXY": "", + }, + ) + monkeypatch.delenv("HTTP_PROXY", raising=False) + monkeypatch.delenv("HTTPS_PROXY", raising=False) + + result = notifications_module._apprise_proxy_env() + + assert result["HTTP_PROXY"] == "socks5://proxy.example.com:1080" + assert result["HTTPS_PROXY"] == "socks5://proxy.example.com:1080" + + def test_no_proxy_mode_returns_empty_dict(self, monkeypatch): + self._patch_config(monkeypatch, {"PROXY_MODE": ""}) + + result = notifications_module._apprise_proxy_env() + + assert result == {} + + def test_does_not_override_already_set_env_vars(self, monkeypatch): + self._patch_config( + monkeypatch, + { + "PROXY_MODE": "http", + "HTTP_PROXY": "http://new-proxy.example.com:8080", + "NO_PROXY": "", + }, + ) + monkeypatch.setenv("HTTP_PROXY", "http://existing-proxy.example.com:3128") + + result = notifications_module._apprise_proxy_env() + + assert "HTTP_PROXY" not in result diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 8869a7e..e901820 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -5,6 +5,31 @@ import types import xmlrpc.client as stdlib_xmlrpc_client from shelfmark.core import utils +from shelfmark.core.utils import normalize_http_url + + +class TestNormalizeHttpUrlQueryStripping: + """Regression tests for issue #999 — mirror URLs with query params/fragments.""" + + def test_strips_query_string_from_configured_url(self) -> None: + result = normalize_http_url("http://mirror.example.com/search?token=abc123") + assert result == "http://mirror.example.com/search" + + def test_strips_fragment_from_configured_url(self) -> None: + result = normalize_http_url("http://mirror.example.com/search#section") + assert result == "http://mirror.example.com/search" + + def test_strips_both_query_and_fragment(self) -> None: + result = normalize_http_url("https://mirror.example.com/path?key=val&x=1#top") + assert result == "https://mirror.example.com/path" + + def test_plain_url_unchanged(self) -> None: + result = normalize_http_url("http://mirror.example.com/search") + assert result == "http://mirror.example.com/search" + + def test_trailing_slash_still_stripped_after_query_removal(self) -> None: + result = normalize_http_url("http://mirror.example.com/?token=x") + assert result == "http://mirror.example.com" def test_get_hardened_xmlrpc_client_tolerates_patch_runtime_error(monkeypatch) -> None: diff --git a/tests/direct_download/test_search_queries.py b/tests/direct_download/test_search_queries.py index 8bd3652..fe106e5 100644 --- a/tests/direct_download/test_search_queries.py +++ b/tests/direct_download/test_search_queries.py @@ -182,3 +182,170 @@ class TestDirectDownloadSearchQueries: ("mistborn custom query", ["en"], ["epub"]), ("mistborn custom query", None, ["epub"]), ] + + +# --- Distant-path language detection tests --- + + +def _patch_path_language(monkeypatch, enabled: bool = True): + import shelfmark.release_sources.direct_download as dd + + original_get = dd.config.get + + def _fake_get(key: str, default=None, user_id=None): + del user_id + if key == "DIRECT_DOWNLOAD_LANGUAGE_FROM_PATH": + return enabled + return original_get(key, default) + + monkeypatch.setattr(dd.config, "get", _fake_get) + return dd + + +def _row_from_html(html: str): + from bs4 import BeautifulSoup + + return BeautifulSoup(html, "html.parser").find("tr") + + +def _make_row(distant_path: str, language: str = "", record_id: str = "rec-1") -> str: + return rf""" + + + A Book Title + Author Name + Publisher + 2024 + - + - + {language} + fiction + epub + 1 mb + {distant_path} + + """ + + +def test_detects_bracketed_language_from_distant_path(monkeypatch): + dd = _patch_path_language(monkeypatch) + row = _row_from_html(_make_row(r"lgli/N:\comics1\emule\2021.08.01\[BD FR] Scrameustache.cbz")) + record = dd._parse_search_result_row(row) + assert record is not None + assert record.language == "fr" + assert record.download_path is not None + + +def test_detects_mixed_case_bracketed_language(monkeypatch): + dd = _patch_path_language(monkeypatch) + row = _row_from_html(_make_row(r"lgli/V:\comics\_0DAY3\[Fr]\BDs [Fr]\!Pdf\S\Book.pdf")) + record = dd._parse_search_result_row(row) + assert record is not None + assert record.language == "fr" + + +def test_overrides_unknown_language_with_path_detection(monkeypatch): + dd = _patch_path_language(monkeypatch) + row = _row_from_html(_make_row(r"lgli/V:\comics\_0DAY3\[Fr]\Book.pdf", language="unknown")) + record = dd._parse_search_result_row(row) + assert record is not None + assert record.language == "fr" + + +def test_sets_unknown_when_path_has_no_language(monkeypatch): + dd = _patch_path_language(monkeypatch) + row = _row_from_html(_make_row(r"lgli/N:\comics1\emule\NoLanguageHere.epub")) + record = dd._parse_search_result_row(row) + assert record is not None + assert record.language == "unknown" + + +def test_avoids_en_false_positive_when_french_present(monkeypatch): + dd = _patch_path_language(monkeypatch) + row = _row_from_html( + _make_row(r"lgli/V:\comics\_0DAY2\Stripboeken Frans - BD en Français\[BD Fr] Book.cbr") + ) + record = dd._parse_search_result_row(row) + assert record is not None + assert record.language == "fr" + + +def test_keeps_row_with_missing_language_when_toggle_disabled(monkeypatch): + dd = _patch_path_language(monkeypatch, enabled=False) + row = _row_from_html(_make_row(r"lgli/N:\comics1\[BD FR] Scrameustache.cbz")) + record = dd._parse_search_result_row(row) + assert record is not None + assert record.language is None + + +def test_keeps_sparse_lgli_row(monkeypatch): + """lgli rows missing author/publisher/year must not be dropped.""" + dd = _patch_path_language(monkeypatch) + html = r""" + + + Gos - 1978 - Le scrameustache T06.cbz + + Comic book + cbz + 17.4MB + lgli/N:\comics1\ftp\[BD.FR] French Comics\Book.cbz + + """ + record = dd._parse_search_result_row(_row_from_html(html)) + assert record is not None + assert record.id == "sparse-1" + assert record.language == "fr" + assert record.author is None + + +def test_search_books_filters_locally_when_path_language_enabled(monkeypatch): + dd = _patch_path_language(monkeypatch) + monkeypatch.setattr(dd.network, "get_aa_base_url", lambda: "https://mirror.example") + monkeypatch.setattr(dd.network, "AAMirrorSelector", lambda: object()) + + captured_url: dict[str, str] = {} + + def _fake_html_get_page(url: str, selector, allow_bypasser_fallback=False): + del selector, allow_bypasser_fallback + captured_url["url"] = url + return r""" + + + + + + + + + + + + + + + + + + + +
Livre FRAuteurEditeur2025--fictionpdf2 mblgli/V:\comics\_0DAY3\[Fr]\Book FR.pdf
Book ENAuthorPublisher2025--fictionpdf2 mblgli/V:\comics\_0DAY3\[En]\Book EN.pdf
+ """ + + monkeypatch.setattr(dd.downloader, "html_get_page", _fake_html_get_page) + + records = dd.search_books("demo", SearchFilters(lang=["fr"], format=["pdf"])) + + assert "&lang=" not in captured_url["url"] + assert len(records) == 1 + assert records[0].id == "rec-fr" + assert records[0].language == "fr" + + +def test_book_matches_requested_languages_logic(): + import shelfmark.release_sources.direct_download as dd + + assert dd._book_matches_requested_languages(None, {"fr"}) is True + assert dd._book_matches_requested_languages(None, set()) is True + assert dd._book_matches_requested_languages("en", {"fr"}) is False + assert dd._book_matches_requested_languages("fr", {"fr"}) is True diff --git a/tests/prowlarr/test_rtorrent_client.py b/tests/prowlarr/test_rtorrent_client.py index b4160ff..eee954c 100644 --- a/tests/prowlarr/test_rtorrent_client.py +++ b/tests/prowlarr/test_rtorrent_client.py @@ -349,6 +349,105 @@ class TestRTorrentClientAddDownload: assert "RPC Error" in str(excinfo.value) +class TestRTorrentClientAudiobookLabel: + """Regression tests for issue #1025 — rTorrent audiobook label selection.""" + + def _make_client(self, monkeypatch, config_values): + monkeypatch.setattr( + "shelfmark.download.clients.rtorrent.config.get", + make_config_getter(config_values), + ) + mock_rpc = MagicMock() + mock_xmlrpc = create_mock_xmlrpc_module() + mock_xmlrpc.ServerProxy.return_value = mock_rpc + + mock_torrent_info = MagicMock() + mock_torrent_info.torrent_data = None + mock_torrent_info.magnet_url = "magnet:?xt=urn:btih:abc123" + mock_torrent_info.info_hash = "abc123" + mock_torrent_info.is_magnet = True + + return mock_rpc, mock_xmlrpc, mock_torrent_info + + def test_uses_audiobook_label_when_content_type_is_audiobook(self, monkeypatch): + config_values = { + "RTORRENT_URL": "http://localhost:8080/RPC2", + "RTORRENT_LABEL": "books", + "RTORRENT_AUDIOBOOK_LABEL": "audiobooks", + "RTORRENT_DOWNLOAD_DIR": "/downloads", + } + mock_rpc, mock_xmlrpc, mock_torrent_info = self._make_client(monkeypatch, config_values) + + with patch.dict("sys.modules", {"xmlrpc.client": mock_xmlrpc}): + with patch( + "shelfmark.download.clients.torrent_utils.extract_torrent_info", + return_value=mock_torrent_info, + ): + if "shelfmark.download.clients.rtorrent" in sys.modules: + del sys.modules["shelfmark.download.clients.rtorrent"] + from shelfmark.download.clients.rtorrent import RTorrentClient + + client = RTorrentClient() + client.add_download( + "magnet:?xt=urn:btih:abc123", "Test Audiobook", content_type="audiobook" + ) + + args = mock_rpc.load.start.call_args[0] + assert "d.custom1.set=audiobooks" in args[2] + assert "d.custom1.set=books" not in args[2] + + def test_falls_back_to_book_label_when_audiobook_label_not_set(self, monkeypatch): + config_values = { + "RTORRENT_URL": "http://localhost:8080/RPC2", + "RTORRENT_LABEL": "books", + "RTORRENT_AUDIOBOOK_LABEL": "", + "RTORRENT_DOWNLOAD_DIR": "/downloads", + } + mock_rpc, mock_xmlrpc, mock_torrent_info = self._make_client(monkeypatch, config_values) + + with patch.dict("sys.modules", {"xmlrpc.client": mock_xmlrpc}): + with patch( + "shelfmark.download.clients.torrent_utils.extract_torrent_info", + return_value=mock_torrent_info, + ): + if "shelfmark.download.clients.rtorrent" in sys.modules: + del sys.modules["shelfmark.download.clients.rtorrent"] + from shelfmark.download.clients.rtorrent import RTorrentClient + + client = RTorrentClient() + client.add_download( + "magnet:?xt=urn:btih:abc123", "Test Audiobook", content_type="audiobook" + ) + + args = mock_rpc.load.start.call_args[0] + assert "d.custom1.set=books" in args[2] + + def test_uses_book_label_for_non_audiobook_content(self, monkeypatch): + config_values = { + "RTORRENT_URL": "http://localhost:8080/RPC2", + "RTORRENT_LABEL": "books", + "RTORRENT_AUDIOBOOK_LABEL": "audiobooks", + "RTORRENT_DOWNLOAD_DIR": "/downloads", + } + mock_rpc, mock_xmlrpc, mock_torrent_info = self._make_client(monkeypatch, config_values) + + with patch.dict("sys.modules", {"xmlrpc.client": mock_xmlrpc}): + with patch( + "shelfmark.download.clients.torrent_utils.extract_torrent_info", + return_value=mock_torrent_info, + ): + if "shelfmark.download.clients.rtorrent" in sys.modules: + del sys.modules["shelfmark.download.clients.rtorrent"] + from shelfmark.download.clients.rtorrent import RTorrentClient + + client = RTorrentClient() + client.add_download("magnet:?xt=urn:btih:abc123", "Test Book") + + args = mock_rpc.load.start.call_args[0] + assert "d.custom1.set=books" in args[2] + assert "d.custom1.set=audiobooks" not in args[2] + + class TestRTorrentClientGetStatus: """Tests for RTorrentClient.get_status().""" diff --git a/tests/prowlarr/test_transmission_client.py b/tests/prowlarr/test_transmission_client.py index d9ae897..3a80fda 100644 --- a/tests/prowlarr/test_transmission_client.py +++ b/tests/prowlarr/test_transmission_client.py @@ -383,6 +383,42 @@ class TestTransmissionClientGetStatus: assert status.complete is True assert "/downloads/Test Torrent" in status.file_path + def test_get_status_stopped_treated_as_complete(self, monkeypatch): + """Regression: torrents stopped after seeding ratio/idle limit must show complete.""" + config_values = { + "TRANSMISSION_URL": "http://localhost:9091", + "TRANSMISSION_USERNAME": "admin", + "TRANSMISSION_PASSWORD": "password", + "TRANSMISSION_CATEGORY": "test", + } + monkeypatch.setattr( + "shelfmark.download.clients.transmission.config.get", + make_config_getter(config_values), + ) + + mock_torrent = MockTorrent( + percent_done=1.0, + status="stopped", + download_dir="/downloads", + ) + mock_client_instance = MagicMock() + mock_client_instance.get_torrent.return_value = mock_torrent + + mock_transmission_rpc = create_mock_transmission_rpc_module() + mock_transmission_rpc.Client.return_value = mock_client_instance + + with patch.dict("sys.modules", {"transmission_rpc": mock_transmission_rpc}): + if "shelfmark.download.clients.transmission" in sys.modules: + del sys.modules["shelfmark.download.clients.transmission"] + + from shelfmark.download.clients.transmission import TransmissionClient + + client = TransmissionClient() + status = client.get_status("abc123") + + assert status.complete is True + assert status.progress == 100.0 + def test_get_status_not_found(self, monkeypatch): """Test status for non-existent torrent.""" config_values = { diff --git a/tor.sh b/tor.sh index a5c223a..60df653 100644 --- a/tor.sh +++ b/tor.sh @@ -304,15 +304,20 @@ rotation_monitor() { echo "[*] Circuit rotation #$rotation_count at $(date)" # Test DNS resolution through Tor + dns_ok=true if ! timeout 10 nslookup google.com 127.0.0.1 > /dev/null 2>&1; then echo "[!] $(date): DNS resolution slow/failing, rotating circuits..." pkill -HUP tor || true sleep 10 + dns_ok=false fi # Proactively rotate circuits every 5 minutes to keep them fresh - echo "[*] $(date): Proactive circuit rotation..." - pkill -HUP tor || true + # Skip if we already rotated for DNS failure this cycle + if $dns_ok; then + echo "[*] $(date): Proactive circuit rotation..." + pkill -HUP tor || true + fi # Verify Tor is still responsive after rotation sleep 5