From 2b5983d2018e6463964da24472dbfeb5724a3930 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 22 Dec 2025 20:07:36 +0000 Subject: [PATCH] Settings UI enhancements - Source priority controls, default sort, caching controls (#353) Also: Adjusted Welib/Zlib/Libgen URLs to be dynamically generated via hash. Fixed Zlib downloads and user agent flow. AA URLS are now fetched lazily if another source is prioritised. --- .../bypass/internal_bypasser.py | 46 +- cwa_book_downloader/config/env.py | 10 +- cwa_book_downloader/config/settings.py | 283 +++++++++-- cwa_book_downloader/core/cache.py | 35 +- cwa_book_downloader/core/image_cache.py | 120 ++++- cwa_book_downloader/core/models.py | 58 ++- cwa_book_downloader/core/settings_registry.py | 38 +- cwa_book_downloader/download/http.py | 55 ++- cwa_book_downloader/main.py | 6 + .../metadata_providers/__init__.py | 25 + .../metadata_providers/hardcover.py | 29 +- .../metadata_providers/openlibrary.py | 27 +- .../release_sources/direct_download.py | 462 +++++++++++------- src/frontend/src/App.tsx | 28 +- .../src/components/resultsViews/ListView.tsx | 8 +- .../components/settings/SettingsContent.tsx | 17 +- .../src/components/settings/SettingsModal.tsx | 15 +- .../settings/fields/OrderableListField.tsx | 326 ++++++++++++ .../src/components/settings/fields/index.ts | 1 + src/frontend/src/types/index.ts | 4 + src/frontend/src/types/settings.ts | 23 + tor.sh | 111 ++--- 22 files changed, 1321 insertions(+), 406 deletions(-) create mode 100644 src/frontend/src/components/settings/fields/OrderableListField.tsx diff --git a/cwa_book_downloader/bypass/internal_bypasser.py b/cwa_book_downloader/bypass/internal_bypasser.py index 95ed356..b0d74e8 100644 --- a/cwa_book_downloader/bypass/internal_bypasser.py +++ b/cwa_book_downloader/bypass/internal_bypasser.py @@ -74,6 +74,9 @@ _dns_rotation_lock = threading.Lock() _cf_cookies: dict[str, dict] = {} _cf_cookies_lock = threading.Lock() +# User-Agent storage - Cloudflare ties cf_clearance to the UA that solved the challenge +_cf_user_agents: dict[str, str] = {} + # Protection cookie names we care about (Cloudflare and DDoS-Guard) CF_COOKIE_NAMES = {'cf_clearance', '__cf_bm', 'cf_chl_2', 'cf_chl_prog'} DDG_COOKIE_NAMES = {'__ddg1_', '__ddg2_', '__ddg5_', '__ddg8_', '__ddg9_', '__ddg10_', '__ddgid_', '__ddgmark_', 'ddg_last_challenge'} @@ -118,8 +121,20 @@ def _extract_cookies_from_driver(driver, url: str) -> None: } if cookies_found: + # Extract User-Agent - Cloudflare ties cf_clearance to the UA + try: + user_agent = driver.execute_script("return navigator.userAgent") + except Exception: + user_agent = None + with _cf_cookies_lock: _cf_cookies[base_domain] = cookies_found + if user_agent: + _cf_user_agents[base_domain] = user_agent + logger.debug(f"Stored UA for {base_domain}: {user_agent[:60]}...") + else: + logger.debug(f"No UA captured for {base_domain}") + cookie_type = "all" if extract_all else "protection" logger.debug(f"Extracted {len(cookies_found)} {cookie_type} cookies for {base_domain}") @@ -158,14 +173,25 @@ def has_valid_cf_cookies(domain: str) -> bool: return bool(get_cf_cookies_for_domain(domain)) +def get_cf_user_agent_for_domain(domain: str) -> Optional[str]: + """Get the User-Agent that was used during bypass for a domain.""" + if not domain: + return None + base_domain = '.'.join(domain.split('.')[-2:]) if '.' in domain else domain + with _cf_cookies_lock: + return _cf_user_agents.get(base_domain) + + def clear_cf_cookies(domain: str = None) -> None: - """Clear stored Cloudflare cookies. If domain is None, clear all.""" + """Clear stored Cloudflare cookies and User-Agent. If domain is None, clear all.""" with _cf_cookies_lock: if domain: base_domain = '.'.join(domain.split('.')[-2:]) if '.' in domain else domain _cf_cookies.pop(base_domain, None) + _cf_user_agents.pop(base_domain, None) else: _cf_cookies.clear() + _cf_user_agents.clear() def _reset_pyautogui_display_state(): @@ -484,7 +510,7 @@ def _bypass(sb, max_retries: Optional[int] = None, cancel_flag: Optional[Event] def _get_chromium_args(): """Build Chrome arguments dynamically, pre-resolving hostnames via Python's DNS. - + Instead of trying to configure Chrome's DNS (which is unreliable), we pre-resolve AA hostnames using Python's patched socket (which uses DoH/custom DNS) and pass the resolved IPs directly to Chrome via --host-resolver-rules. This bypasses @@ -988,11 +1014,17 @@ def get_bypassed_page(url: str, selector: Optional[network.AAMirrorSelector] = N # Before using Chrome, check if cookies are available (from a previous bypass) # This helps concurrent downloads avoid unnecessary Chrome usage parsed = urlparse(attempt_url) - cookies = get_cf_cookies_for_domain(parsed.hostname or "") + hostname = parsed.hostname or "" + cookies = get_cf_cookies_for_domain(hostname) if cookies: try: + # Use stored UA - Cloudflare ties cf_clearance to the UA that solved the challenge + headers = {} + stored_ua = get_cf_user_agent_for_domain(hostname) + if stored_ua: + headers['User-Agent'] = stored_ua logger.debug(f"Trying request with cached cookies before Chrome: {attempt_url}") - response = requests.get(attempt_url, cookies=cookies, proxies=_get_proxies(), timeout=(5, 10)) + response = requests.get(attempt_url, cookies=cookies, headers=headers, proxies=_get_proxies(), timeout=(5, 10)) if response.status_code == 200: logger.debug(f"Cached cookies worked, skipped Chrome bypass") return response.text @@ -1016,7 +1048,7 @@ def get_bypassed_page(url: str, selector: Optional[network.AAMirrorSelector] = N raise logger.debug(f"Cloudflare Bypasser response length: {len(response_html)}") - if response_html.strip() != "": - return response_html - else: + if response_html.strip() == "": raise requests.exceptions.RequestException("Failed to bypass Cloudflare") + + return response_html diff --git a/cwa_book_downloader/config/env.py b/cwa_book_downloader/config/env.py index 3190d73..b76ddb2 100644 --- a/cwa_book_downloader/config/env.py +++ b/cwa_book_downloader/config/env.py @@ -60,8 +60,11 @@ DEBUG = string_to_bool(os.getenv("DEBUG", "false")) # Comma-separated values: aa-fast, aa-slow-nowait, aa-slow-wait, libgen, zlib, welib _DEBUG_SKIP_SOURCES_RAW = os.getenv("DEBUG_SKIP_SOURCES", "").strip().lower() DEBUG_SKIP_SOURCES = set(s.strip() for s in _DEBUG_SKIP_SOURCES_RAW.split(",") if s.strip()) -PRIORITIZE_WELIB = string_to_bool(os.getenv("PRIORITIZE_WELIB", "false")) -ALLOW_USE_WELIB = string_to_bool(os.getenv("ALLOW_USE_WELIB", "true")) + +# Legacy welib settings - replaced by SOURCE_PRIORITY OrderableListField +# Kept for migration: if set, used to build initial SOURCE_PRIORITY config +_LEGACY_PRIORITIZE_WELIB = string_to_bool(os.getenv("PRIORITIZE_WELIB", "false")) +_LEGACY_ALLOW_USE_WELIB = string_to_bool(os.getenv("ALLOW_USE_WELIB", "true")) # Version information from Docker build BUILD_VERSION = os.getenv("BUILD_VERSION", "N/A") @@ -99,8 +102,7 @@ if USING_TOR: HTTP_PROXY = "" HTTPS_PROXY = "" -# Check if this is the Tor variant (has tor binary installed) -# Only the Tor variant image includes the tor binary +# Detect Tor variant (has tor binary installed) TOR_VARIANT_AVAILABLE = shutil.which("tor") is not None # Calibre-Web URL for navigation button diff --git a/cwa_book_downloader/config/settings.py b/cwa_book_downloader/config/settings.py index 7ae50b7..4fc3dfd 100644 --- a/cwa_book_downloader/config/settings.py +++ b/cwa_book_downloader/config/settings.py @@ -48,10 +48,7 @@ logger.debug(f"STAT TMP_DIR: {os.stat(env.TMP_DIR)}") logger.debug(f"STAT INGEST_DIR: {os.stat(env.INGEST_DIR)}") logger.debug(f"CROSS_FILE_SYSTEM: {CROSS_FILE_SYSTEM}") -# Network settings - DNS configuration is managed by network.py -# These are placeholder values that will be set when network.init() is called -# The authoritative DNS state lives in network.py and is configured via set_dns_provider() -# Actual DNS provider is determined from config singleton (settings UI) or ENV var +# DNS placeholders - actual values set by network.init() from config/ENV CUSTOM_DNS: list[str] = [] DOH_SERVER: str = "" @@ -116,6 +113,7 @@ from cwa_book_downloader.core.settings_registry import ( CheckboxField, SelectField, MultiSelectField, + OrderableListField, HeadingField, ActionButton, ) @@ -136,7 +134,17 @@ register_group( ) -# Build format options from supported formats +# Anna's Archive sort options (for Direct mode) +_AA_SORT_OPTIONS = [ + {"value": "relevance", "label": "Most relevant"}, + {"value": "newest", "label": "Newest (publication year)"}, + {"value": "oldest", "label": "Oldest (publication year)"}, + {"value": "largest", "label": "Largest (filesize)"}, + {"value": "smallest", "label": "Smallest (filesize)"}, + {"value": "newest_added", "label": "Newest (open sourced)"}, + {"value": "oldest_added", "label": "Oldest (open sourced)"}, +] + _FORMAT_OPTIONS = [ {"value": "epub", "label": "EPUB"}, {"value": "mobi", "label": "MOBI"}, @@ -181,7 +189,6 @@ def _get_release_source_options(): for source in list_available_sources() ] -# Build language options from supported languages _LANGUAGE_OPTIONS = [{"value": lang["code"], "label": lang["language"]} for lang in _SUPPORTED_BOOK_LANGUAGE] @@ -208,6 +215,27 @@ def _clear_covers_cache(current_values: dict) -> dict: } +def _clear_metadata_cache(current_values: dict) -> dict: + """Clear the in-memory metadata cache.""" + try: + from cwa_book_downloader.core.cache import get_metadata_cache + + cache = get_metadata_cache() + stats_before = cache.stats() + cache.clear() + + return { + "success": True, + "message": f"Cleared {stats_before['size']} cached entries.", + } + except Exception as e: + logger.error(f"Failed to clear metadata cache: {e}") + return { + "success": False, + "message": f"Failed to clear cache: {str(e)}", + } + + @register_settings("general", "General", icon="settings", order=0) def general_settings(): """Core application settings.""" @@ -230,6 +258,15 @@ def general_settings(): ], default="direct", ), + SelectField( + key="AA_DEFAULT_SORT", + label="Default Sort Order", + description="Default sort order for Anna's Archive search results.", + options=_AA_SORT_OPTIONS, + default="relevance", + env_supported=False, # UI-only setting + show_when={"field": "SEARCH_MODE", "value": "direct"}, + ), SelectField( key="METADATA_PROVIDER", label="Metadata Provider for Universal Search", @@ -247,6 +284,12 @@ def general_settings(): env_supported=False, # UI-only setting, not configurable via ENV show_when={"field": "SEARCH_MODE", "value": "universal"}, ), + TextField( + key="CALIBRE_WEB_URL", + label="Book Management App URL", + description="Adds a navigation button to your book manager instance (Calibre-Web Automated, Booklore, etc).", + placeholder="http://calibre-web:8083", + ), MultiSelectField( key="SUPPORTED_FORMATS", label="Supported Formats", @@ -261,35 +304,6 @@ def general_settings(): options=_LANGUAGE_OPTIONS, default=["en"], ), - CheckboxField( - key="USE_BOOK_TITLE", - label="Use Book Title as Filename", - description="Save files using book title instead of ID. May cause issues with special characters.", - default=False, - ), - TextField( - key="CALIBRE_WEB_URL", - label="Book Management App URL", - description="Adds a navigation button to your book manager instance (Calibre-Web Automated, Booklore, etc).", - placeholder="http://calibre-web:8083", - ), - NumberField( - key="MAX_CONCURRENT_DOWNLOADS", - label="Max Concurrent Downloads", - description="Maximum number of simultaneous downloads.", - default=3, - min_value=1, - max_value=10, - requires_restart=True, - ), - NumberField( - key="STATUS_TIMEOUT", - label="Status Timeout (seconds)", - description="How long to keep completed/failed downloads in the queue display.", - default=3600, - min_value=60, - max_value=86400, - ), ] @@ -398,70 +412,200 @@ def network_settings(): ] -@register_settings("ingest_directories", "Ingest Directories", icon="folder", order=5) -def ingest_directory_settings(): - """Configure where different content types are saved.""" +@register_settings("downloads", "Downloads", icon="folder", order=5) +def download_settings(): + """Configure download behavior and file locations.""" return [ TextField( key="INGEST_DIR", - label="Default Ingest Directory", - description="Default directory for all downloads. Used when no specific directory is set.", + label="Download Directory", + description="Directory where downloaded files are saved.", default="/cwa-book-ingest", required=True, ), + CheckboxField( + key="USE_BOOK_TITLE", + label="Use Book Info as Filename", + description="Save files using Author, Title and Year instead of ID. May cause issues with special characters.", + default=False, + ), + CheckboxField( + key="AUTO_OPEN_DOWNLOADS_SIDEBAR", + label="Auto-Open Downloads Sidebar", + description="Automatically open the downloads sidebar when a new download is queued.", + default=True, + env_supported=False, # UI-only setting + ), + CheckboxField( + key="DOWNLOAD_TO_BROWSER", + label="Download to Browser", + description="Automatically download completed files to your browser.", + default=False, + env_supported=False, # UI-only setting + ), + NumberField( + key="MAX_CONCURRENT_DOWNLOADS", + label="Max Concurrent Downloads", + description="Maximum number of simultaneous downloads.", + default=3, + min_value=1, + max_value=10, + requires_restart=True, + ), + NumberField( + key="STATUS_TIMEOUT", + label="Status Timeout (seconds)", + description="How long to keep completed/failed downloads in the queue display.", + default=3600, + min_value=60, + max_value=86400, + ), + CheckboxField( + key="USE_CONTENT_TYPE_DIRECTORIES", + label="Use Content-Type Subdirectories", + description="Save different content types (fiction, non-fiction, comics, etc.) to separate subdirectories.", + default=False, + ), HeadingField( key="content_type_directories_heading", title="Content-Type Directories", - description="Override the default directory for specific content types. Leave empty to use the default.", + description="Configure where each content type is saved. Leave empty to use the default directory with an auto-generated subdirectory name.", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_BOOK_FICTION", label="Fiction Books", placeholder="/cwa-book-ingest/fiction", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_BOOK_NON_FICTION", label="Non-Fiction Books", placeholder="/cwa-book-ingest/non-fiction", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_BOOK_UNKNOWN", label="Unknown Books", placeholder="/cwa-book-ingest/unknown", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_MAGAZINE", label="Magazines", placeholder="/cwa-book-ingest/magazines", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_COMIC_BOOK", label="Comic Books", placeholder="/cwa-book-ingest/comics", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_AUDIOBOOK", label="Audiobooks", placeholder="/cwa-book-ingest/audiobooks", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_STANDARDS_DOCUMENT", label="Standards Documents", placeholder="/cwa-book-ingest/standards", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_MUSICAL_SCORE", label="Musical Scores", placeholder="/cwa-book-ingest/scores", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), TextField( key="INGEST_DIR_OTHER", label="Other", placeholder="/cwa-book-ingest/other", + show_when={"field": "USE_CONTENT_TYPE_DIRECTORIES", "value": True}, ), ] +def _get_source_priority_options(): + """Build source priority options with dynamic disabled states.""" + from cwa_book_downloader.core.config import config + + has_donator_key = bool(config.get("AA_DONATOR_KEY", "")) + use_cf_bypass = config.get("USE_CF_BYPASS", True) + using_external_bypasser = config.get("USING_EXTERNAL_BYPASSER", False) + has_internal_bypasser = use_cf_bypass and not using_external_bypasser + + return [ + { + "id": "aa-fast", + "label": "Anna's Archive (Fast)", + "description": "Fast downloads for donators", + "isLocked": not has_donator_key, + "disabledReason": "Requires AA Donator Key" if not has_donator_key else None, + }, + { + "id": "welib", + "label": "Welib", + "description": "Alternative mirror with good availability", + "isLocked": not has_internal_bypasser, + "disabledReason": "Requires internal bypasser" if not has_internal_bypasser else None, + }, + { + "id": "aa-slow-nowait", + "label": "Anna's Archive (Slowest, No Waitlist)", + "description": "Partner servers without countdown", + }, + { + "id": "aa-slow-wait", + "label": "Anna's Archive (Slow, Waitlist)", + "description": "Partner servers with countdown timer", + }, + { + "id": "libgen", + "label": "Libgen", + "description": "Library Genesis mirrors", + }, + { + "id": "zlib", + "label": "Z-Library", + "description": "Z-Library mirrors (requires Cloudflare bypass)", + "isLocked": not has_internal_bypasser, + "disabledReason": "Requires internal bypasser" if not has_internal_bypasser else None, + }, + ] + + +def _get_default_source_priority(): + """Default source priority order, respecting legacy env vars. + + ALLOW_USE_WELIB (default true) controls whether welib is enabled. + PRIORITIZE_WELIB (default false) controls whether welib is moved to position 1. + """ + from cwa_book_downloader.config.env import _LEGACY_PRIORITIZE_WELIB, _LEGACY_ALLOW_USE_WELIB + + welib_entry = {"id": "welib", "enabled": _LEGACY_ALLOW_USE_WELIB} + + priority = [ + {"id": "aa-fast", "enabled": True}, + {"id": "aa-slow-nowait", "enabled": True}, + {"id": "aa-slow-wait", "enabled": True}, + {"id": "libgen", "enabled": True}, + ] + + if _LEGACY_PRIORITIZE_WELIB: + priority.insert(1, welib_entry) # After aa-fast + else: + priority.append(welib_entry) # Before zlib + + # Z-Library last - it's quite brittle + priority.append({"id": "zlib", "enabled": True}) + + return priority + + @register_settings("download_sources", "Download Sources", icon="download", order=21, group="direct_download") def download_source_settings(): """Settings for download source behavior.""" @@ -489,18 +633,17 @@ def download_source_settings(): label="Anna's Archive Donator Key", description="Optional donator key for faster downloads from Anna's Archive.", ), - CheckboxField( - key="ALLOW_USE_WELIB", - label="Allow Welib Downloads", - description="Enable Welib as a fallback download source.", - default=True, + HeadingField( + key="source_priority_heading", + title="Source Priority", + description="Configure which download sources to use and in what order.", ), - CheckboxField( - key="PRIORITIZE_WELIB", - label="Prioritize Welib", - description="Try Welib before other slow download sources.", - default=False, - show_when={"field": "ALLOW_USE_WELIB", "value": True}, + OrderableListField( + key="SOURCE_PRIORITY", + label="Download Source Order", + description="Drag to reorder. Sources are tried from top to bottom until a download succeeds.", + options=_get_source_priority_options, + default=_get_default_source_priority(), ), NumberField( key="MAX_RETRY", @@ -673,4 +816,40 @@ def advanced_settings(): style="danger", callback=_clear_covers_cache, ), + HeadingField( + key="metadata_cache_heading", + title="Metadata Cache", + description="Cache book metadata from providers (Hardcover, Open Library) to reduce API calls and speed up repeated searches.", + ), + CheckboxField( + key="METADATA_CACHE_ENABLED", + label="Enable Metadata Caching", + description="When disabled, all metadata searches hit the provider API directly.", + default=True, + ), + NumberField( + key="METADATA_CACHE_SEARCH_TTL", + label="Search Results Cache (seconds)", + description="How long to cache search results. Default: 300 (5 minutes). Max: 604800 (7 days).", + default=300, + min_value=60, + max_value=604800, + show_when={"field": "METADATA_CACHE_ENABLED", "value": True}, + ), + NumberField( + key="METADATA_CACHE_BOOK_TTL", + label="Book Details Cache (seconds)", + description="How long to cache individual book details. Default: 600 (10 minutes). Max: 604800 (7 days).", + default=600, + min_value=60, + max_value=604800, + show_when={"field": "METADATA_CACHE_ENABLED", "value": True}, + ), + ActionButton( + key="clear_metadata_cache", + label="Clear Metadata Cache", + description="Clear all cached search results and book details.", + style="danger", + callback=_clear_metadata_cache, + ), ] diff --git a/cwa_book_downloader/core/cache.py b/cwa_book_downloader/core/cache.py index cb1b439..a2643fb 100644 --- a/cwa_book_downloader/core/cache.py +++ b/cwa_book_downloader/core/cache.py @@ -162,21 +162,42 @@ def cache_key(*args, **kwargs) -> str: return ":".join(parts) -def cacheable(ttl: int, key_prefix: str = ""): +def cacheable( + ttl: Optional[int] = None, + ttl_key: Optional[str] = None, + ttl_default: int = 300, + key_prefix: str = "" +): """Decorator for caching function results. Args: - ttl: Time to live in seconds. + ttl: Static time to live in seconds (use this OR ttl_key, not both). + ttl_key: Config key to read TTL from (e.g., "METADATA_CACHE_SEARCH_TTL"). + ttl_default: Default TTL if ttl_key not found in config. key_prefix: Optional prefix for cache keys. - Usage: - @cacheable(ttl=300, key_prefix="hardcover:search") - def search(self, query: str, limit: int = 20): - ... + Examples: + @cacheable(ttl=300, key_prefix="hardcover:search") # Static TTL + @cacheable(ttl_key="METADATA_CACHE_SEARCH_TTL", key_prefix="hardcover:search") # Dynamic TTL """ def decorator(func: Callable[..., T]) -> Callable[..., T]: @wraps(func) def wrapper(*args, **kwargs) -> T: + # Check if metadata caching is enabled + from cwa_book_downloader.core.config import config + + if not config.get("METADATA_CACHE_ENABLED", True): + # Caching disabled, execute function directly + return func(*args, **kwargs) + + # Determine TTL: static or from config + if ttl is not None: + effective_ttl = ttl + elif ttl_key: + effective_ttl = config.get(ttl_key, ttl_default) + else: + effective_ttl = ttl_default + # Generate cache key from function name and arguments # Skip 'self' argument if present (first arg of method) cache_args = args[1:] if args and hasattr(args[0], func.__name__) else args @@ -199,7 +220,7 @@ def cacheable(ttl: int, key_prefix: str = ""): # Only cache non-None results if result is not None: - _metadata_cache.set(key, result, ttl) + _metadata_cache.set(key, result, effective_ttl) return result diff --git a/cwa_book_downloader/core/image_cache.py b/cwa_book_downloader/core/image_cache.py index bf3c601..f4bcc2a 100644 --- a/cwa_book_downloader/core/image_cache.py +++ b/cwa_book_downloader/core/image_cache.py @@ -36,6 +36,10 @@ MAX_IMAGE_SIZE = 5 * 1024 * 1024 # Negative cache TTL (for failed fetches) - 1 hour NEGATIVE_CACHE_TTL = 3600 +# Transient failure cache TTL (for timeouts/connection errors) - 60 seconds +# Short enough to retry soon, long enough to prevent spam during one page view +TRANSIENT_CACHE_TTL = 60 + def _detect_image_type(data: bytes) -> Optional[Tuple[str, str]]: """Detect image type from magic bytes. @@ -82,8 +86,9 @@ class ImageCacheService: # Ensure cache directory exists self.cache_dir.mkdir(parents=True, exist_ok=True) - # Load existing index + # Load existing index and sync with files on disk (once at startup) self._load_index() + self._sync_index_with_files() def _load_index(self) -> None: """Load cache index from disk.""" @@ -91,10 +96,70 @@ class ImageCacheService: if self.index_path.exists(): with open(self.index_path, 'r') as f: self._index = json.load(f) - except (json.JSONDecodeError, IOError) as e: - logger.warning(f"Failed to load cache index, starting fresh: {e}") + except (json.JSONDecodeError, IOError): self._index = {} + def _sync_index_with_files(self) -> None: + """Sync cache index with actual files on disk. + + - Adds entries for files that exist but aren't in index + - Removes entries for files that no longer exist (non-negative only) + - Preserves negative cache entries (they have no files) + """ + image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp'} + added_count = 0 + removed_count = 0 + + # Build set of files that exist on disk + existing_files: Dict[str, Path] = {} + for file_path in self.cache_dir.iterdir(): + if not file_path.is_file(): + continue + if file_path.suffix.lower() not in image_extensions: + continue + existing_files[file_path.stem] = file_path + + # Add files that aren't in the index + for cache_id, file_path in existing_files.items(): + if cache_id in self._index: + continue + + ext = file_path.suffix.lstrip('.') + stat = file_path.stat() + + # Detect content type + try: + with open(file_path, 'rb') as f: + header = f.read(16) + detected = _detect_image_type(header) + content_type = detected[0] if detected else f'image/{ext}' + except IOError: + content_type = f'image/{ext}' + + self._index[cache_id] = { + 'ext': ext, + 'content_type': content_type, + 'size': stat.st_size, + 'cached_at': stat.st_mtime, + 'accessed_at': stat.st_mtime, + } + added_count += 1 + + # Remove index entries for missing files (skip negative cache entries) + stale_entries = [] + for cache_id, entry in self._index.items(): + if entry.get('negative', False): + continue # Negative entries don't have files + if cache_id not in existing_files: + stale_entries.append(cache_id) + + for cache_id in stale_entries: + del self._index[cache_id] + removed_count += 1 + + if added_count > 0 or removed_count > 0: + self._save_index() + def _save_index(self) -> None: """Save cache index to disk.""" try: @@ -103,8 +168,8 @@ class ImageCacheService: with open(temp_path, 'w') as f: json.dump(self._index, f) temp_path.rename(self.index_path) - except IOError as e: - logger.error(f"Failed to save cache index: {e}") + except IOError: + pass def _get_image_path(self, cache_id: str, ext: str) -> Path: """Get the file path for a cached image.""" @@ -119,11 +184,20 @@ class ImageCacheService: return (time.time() - cached_at) > self.ttl_seconds def _is_negative_expired(self, entry: Dict[str, Any]) -> bool: - """Check if a negative cache entry is expired.""" + """Check if a negative cache entry is expired. + + Transient failures (timeouts) expire after TRANSIENT_CACHE_TTL (60s). + Permanent failures (404s) expire after NEGATIVE_CACHE_TTL (1 hour). + """ if not entry.get('negative', False): return False cached_at = entry.get('cached_at', 0) + + # Transient failures (timeouts, connection errors) use shorter TTL + if entry.get('transient', False): + return (time.time() - cached_at) > TRANSIENT_CACHE_TTL + return (time.time() - cached_at) > NEGATIVE_CACHE_TTL def _calculate_total_size(self) -> int: @@ -158,8 +232,8 @@ class ImageCacheService: try: if image_path.exists(): image_path.unlink() - except IOError as e: - logger.warning(f"Failed to delete cached image {cache_id}: {e}") + except IOError: + pass # Update tracking current_size -= entry.get('size', 0) @@ -167,7 +241,6 @@ class ImageCacheService: evicted_count += 1 if evicted_count > 0: - logger.info(f"Evicted {evicted_count} images from cache (LRU)") self._save_index() def get(self, cache_id: str) -> Optional[Tuple[bytes, str]]: @@ -240,8 +313,7 @@ class ImageCacheService: self._hits += 1 return data, content_type - except IOError as e: - logger.warning(f"Failed to read cached image {cache_id}: {e}") + except IOError: self._misses += 1 return None @@ -284,8 +356,7 @@ class ImageCacheService: try: with open(image_path, 'wb') as f: f.write(data) - except IOError as e: - logger.warning(f"Failed to write cached image {cache_id}: {e}") + except IOError: return False # Update index @@ -301,15 +372,17 @@ class ImageCacheService: self._save_index() return True - def put_negative(self, cache_id: str) -> None: + def put_negative(self, cache_id: str, transient: bool = False) -> None: """Store a negative cache entry (failed fetch). Args: cache_id: Cache key + transient: If True, uses shorter TTL (for timeouts/connection errors) """ with self._lock: self._index[cache_id] = { 'negative': True, + 'transient': transient, 'cached_at': time.time(), } self._save_index() @@ -335,8 +408,8 @@ class ImageCacheService: try: if image_path.exists(): image_path.unlink() - except IOError as e: - logger.warning(f"Failed to delete cached image {cache_id}: {e}") + except IOError: + pass del self._index[cache_id] self._save_index() @@ -370,7 +443,6 @@ class ImageCacheService: self._hits = 0 self._misses = 0 - logger.info(f"Cleared {count} entries from image cache") return count def stats(self) -> Dict[str, Any]: @@ -420,7 +492,6 @@ class ImageCacheService: # Validate content type content_type = response.headers.get('content-type', '') if not content_type.startswith('image/'): - logger.warning(f"Invalid content type for cover: {content_type}") self.put_negative(cache_id) return None @@ -429,14 +500,12 @@ class ImageCacheService: for chunk in response.iter_content(chunk_size=8192): data.write(chunk) if data.tell() > MAX_IMAGE_SIZE: - logger.warning(f"Cover image too large: {url}") self.put_negative(cache_id) return None image_data = data.getvalue() if not image_data: - logger.warning(f"Empty image response: {url}") self.put_negative(cache_id) return None @@ -451,17 +520,18 @@ class ImageCacheService: return None except requests.exceptions.Timeout: - logger.warning(f"Timeout fetching cover: {url}") - # Don't cache timeout - it's transient + self.put_negative(cache_id, transient=True) + return None + except requests.exceptions.ConnectionError: + self.put_negative(cache_id, transient=True) return None except requests.exceptions.HTTPError as e: if e.response is not None and e.response.status_code == 404: self.put_negative(cache_id) else: - logger.warning(f"HTTP error fetching cover: {e}") + self.put_negative(cache_id, transient=True) return None - except Exception as e: - logger.warning(f"Error fetching cover: {e}") + except Exception: return None diff --git a/cwa_book_downloader/core/models.py b/cwa_book_downloader/core/models.py index 6fbfa70..5e12638 100644 --- a/cwa_book_downloader/core/models.py +++ b/cwa_book_downloader/core/models.py @@ -1,12 +1,47 @@ """Data structures and models used across the application.""" from dataclasses import dataclass, field +from pathlib import Path from typing import Dict, List, Optional from enum import Enum import re import time +def build_filename( + title: str, + author: Optional[str] = None, + year: Optional[str] = None, + fmt: Optional[str] = None, +) -> str: + """Build sanitized filename: 'Author - Title (Year).format' + + Args: + title: Book title (required) + author: Book author + year: Publication year + fmt: File format/extension + + Returns: + Sanitized filename safe for filesystem use + """ + parts = [] + if author: + parts.append(author) + parts.append(" - ") + parts.append(title) + if year: + parts.append(f" ({year})") + + filename = "".join(parts) + filename = re.sub(r'[\\/:*?"<>|]', '_', filename.strip())[:245] + + if fmt: + filename = f"{filename}.{fmt}" + + return filename + + class QueueStatus(str, Enum): """Enum for possible book queue statuses.""" QUEUED = "queued" @@ -65,6 +100,12 @@ class DownloadTask: return self.priority < other.priority return self.added_time < other.added_time + def get_filename(self) -> str: + """Build sanitized filename from task metadata.""" + if self.download_path: + return Path(self.download_path).name + return build_filename(self.title, self.author, fmt=self.format) + @dataclass class BookInfo: @@ -109,22 +150,7 @@ class BookInfo: self.format = ext break - # Build filename - parts = [] - if self.author: - parts.append(self.author) - parts.append(" - ") - parts.append(self.title) - if self.year: - parts.append(f" ({self.year})") - - filename = "".join(parts) - filename = re.sub(r'[\\/:*?"<>|]', '_', filename.strip())[:245] - - if self.format: - filename = f"{filename}.{self.format}" - - return filename + return build_filename(self.title, self.author, self.year, self.format) @dataclass diff --git a/cwa_book_downloader/core/settings_registry.py b/cwa_book_downloader/core/settings_registry.py index 36e0da3..4a6534b 100644 --- a/cwa_book_downloader/core/settings_registry.py +++ b/cwa_book_downloader/core/settings_registry.py @@ -80,6 +80,28 @@ class MultiSelectField(FieldBase): default: List[str] = field(default_factory=list) +@dataclass +class OrderableListField(FieldBase): + """ + Drag-and-drop reorderable list with enable/disable toggles. + + A generic field for any ordered list of items where each item can be + enabled or disabled. Used for source priority, format preference, etc. + + Options define the available items: + [{"id": "item1", "label": "Item 1", "description": "...", + "disabledReason": "...", "isLocked": False}, ...] + + Value is stored as: + [{"id": "item1", "enabled": True}, {"id": "item2", "enabled": False}, ...] + """ + # Options can be a list or a callable that returns a list (for lazy evaluation) + # Each option: {id, label, description?, disabledReason?, isLocked?} + options: Any = field(default_factory=list) + # Default value: [{id, enabled}, ...] in priority order + default: List[Dict[str, Any]] = field(default_factory=list) + + @dataclass class ActionButton: """ @@ -115,13 +137,14 @@ class HeadingField: description: str = "" # Description text (supports markdown-style links) link_url: str = "" # Optional URL for a link link_text: str = "" # Text for the link (defaults to URL if not provided) + show_when: Optional[Dict[str, Any]] = None # Conditional visibility: {"field": "key", "value": "expected"} def get_field_type(self) -> str: return "HeadingField" # Type alias for all field types -SettingsField = Union[TextField, PasswordField, NumberField, CheckboxField, SelectField, MultiSelectField, ActionButton, HeadingField] +SettingsField = Union[TextField, PasswordField, NumberField, CheckboxField, SelectField, MultiSelectField, OrderableListField, ActionButton, HeadingField] @dataclass @@ -431,6 +454,13 @@ def _parse_env_value(value: str, field: SettingsField) -> Any: return field.default elif isinstance(field, MultiSelectField): return [v.strip() for v in value.split(',') if v.strip()] + elif isinstance(field, OrderableListField): + # Parse JSON array: [{"id": "...", "enabled": true}, ...] + try: + return json.loads(value) + except json.JSONDecodeError: + logger.warning(f"Invalid JSON for {field.key}, using default") + return field.default else: return value @@ -471,6 +501,8 @@ def serialize_field(field: SettingsField, tab_name: str, include_value: bool = T if field.link_url: result["linkUrl"] = field.link_url result["linkText"] = field.link_text or field.link_url + if field.show_when: + result["showWhen"] = field.show_when return result result = { @@ -509,6 +541,10 @@ def serialize_field(field: SettingsField, tab_name: str, include_value: bool = T # Support callable options for lazy evaluation (avoids circular imports) options = field.options() if callable(field.options) else field.options result["options"] = options + elif isinstance(field, OrderableListField): + # Support callable options for lazy evaluation (avoids circular imports) + options = field.options() if callable(field.options) else field.options + result["options"] = options elif isinstance(field, ActionButton): result["style"] = field.style result["description"] = field.description diff --git a/cwa_book_downloader/download/http.py b/cwa_book_downloader/download/http.py index fc1a837..982ee79 100644 --- a/cwa_book_downloader/download/http.py +++ b/cwa_book_downloader/download/http.py @@ -19,10 +19,11 @@ from cwa_book_downloader.core.logger import setup_logger if USE_CF_BYPASS: if USING_EXTERNAL_BYPASSER: from cwa_book_downloader.bypass.external_bypasser import get_bypassed_page - # External bypasser doesn't share cookies + # External bypasser doesn't share cookies/UA get_cf_cookies_for_domain = lambda domain: {} + get_cf_user_agent_for_domain = lambda domain: None else: - from cwa_book_downloader.bypass.internal_bypasser import get_bypassed_page, get_cf_cookies_for_domain + from cwa_book_downloader.bypass.internal_bypasser import get_bypassed_page, get_cf_cookies_for_domain, get_cf_user_agent_for_domain logger = setup_logger(__name__) @@ -132,12 +133,17 @@ def html_get_page( return "" logger.info(f"GET: {current_url}") - # Try with CF cookies if available (from previous bypass) + # Try with CF cookies/UA if available (from previous bypass) cookies = {} + headers = {} if USE_CF_BYPASS: parsed = urlparse(current_url) - cookies = get_cf_cookies_for_domain(parsed.hostname or "") - response = requests.get(current_url, proxies=_get_proxies(), timeout=REQUEST_TIMEOUT, cookies=cookies) + hostname = parsed.hostname or "" + cookies = get_cf_cookies_for_domain(hostname) + stored_ua = get_cf_user_agent_for_domain(hostname) + if stored_ua: + headers['User-Agent'] = stored_ua + response = requests.get(current_url, proxies=_get_proxies(), timeout=REQUEST_TIMEOUT, cookies=cookies, headers=headers) response.raise_for_status() time.sleep(1) return response.text @@ -204,6 +210,7 @@ def download_url( total_size = parse_size_string(size) or 0 attempt = 0 + zlib_cookie_refresh_attempted = False while attempt < MAX_DOWNLOAD_RETRIES: if cancel_flag and cancel_flag.is_set(): @@ -217,13 +224,21 @@ def download_url( status_callback("resolving", f"Connecting (Attempt {attempt + 1}/{MAX_DOWNLOAD_RETRIES})") logger.info(f"Downloading: {current_url} (attempt {attempt + 1}/{MAX_DOWNLOAD_RETRIES})") - # Try with CF cookies if available + # Try with CF cookies/UA if available cookies = {} if USE_CF_BYPASS: parsed = urlparse(current_url) - cookies = get_cf_cookies_for_domain(parsed.hostname or "") + hostname = parsed.hostname or "" + cookies = get_cf_cookies_for_domain(hostname) + # Use stored UA - Cloudflare ties cf_clearance to the UA that solved the challenge + stored_ua = get_cf_user_agent_for_domain(hostname) + if stored_ua: + headers['User-Agent'] = stored_ua + logger.debug(f"Using stored UA for {hostname}") + else: + logger.debug(f"No stored UA available for {hostname}") if cookies: - logger.debug(f"Using {len(cookies)} cookies for {parsed.hostname}: {list(cookies.keys())}") + logger.debug(f"Using {len(cookies)} cookies for {hostname}: {list(cookies.keys())}") response = requests.get(current_url, stream=True, proxies=_get_proxies(), timeout=REQUEST_TIMEOUT, cookies=cookies, headers=headers) response.raise_for_status() @@ -258,6 +273,20 @@ def download_url( status = _get_status_code(e) retryable = _is_retryable_error(e) + # Z-Library 403 - try refreshing cookies via bypasser once before giving up + if status == 403 and USE_CF_BYPASS and not zlib_cookie_refresh_attempted: + parsed = urlparse(current_url) + if parsed.hostname and 'z-lib' in parsed.hostname and referer: + zlib_cookie_refresh_attempted = True + logger.info(f"Z-Library 403 - refreshing cookies via referer: {referer}") + try: + get_bypassed_page(referer, selector, cancel_flag) + time.sleep(0.5) + # Retry with fresh cookies (don't increment attempt) + continue + except Exception as cookie_err: + logger.warning(f"Z-Library cookie refresh failed: {cookie_err}") + # Non-retryable errors if status in (403, 404): logger.warning(f"Download failed ({status}): {current_url}") @@ -316,12 +345,16 @@ def _try_resume( time.sleep(_backoff_delay(attempt + 1, base=0.5, cap=5.0)) try: - # Try with CF cookies if available + # Try with CF cookies/UA if available cookies = {} + resume_headers = {**(base_headers or DOWNLOAD_HEADERS), 'Range': f'bytes={start_byte}-'} if USE_CF_BYPASS: parsed = urlparse(url) - cookies = get_cf_cookies_for_domain(parsed.hostname or "") - resume_headers = {**(base_headers or DOWNLOAD_HEADERS), 'Range': f'bytes={start_byte}-'} + hostname = parsed.hostname or "" + cookies = get_cf_cookies_for_domain(hostname) + stored_ua = get_cf_user_agent_for_domain(hostname) + if stored_ua: + resume_headers['User-Agent'] = stored_ua response = requests.get( url, stream=True, proxies=_get_proxies(), timeout=REQUEST_TIMEOUT, headers=resume_headers, cookies=cookies diff --git a/cwa_book_downloader/main.py b/cwa_book_downloader/main.py index 80ad4e3..11f8982 100644 --- a/cwa_book_downloader/main.py +++ b/cwa_book_downloader/main.py @@ -518,6 +518,7 @@ def api_config() -> Union[Response, Tuple[Response, int]]: from cwa_book_downloader.metadata_providers import ( get_provider_sort_options, get_provider_search_fields, + get_provider_default_sort, ) config = { @@ -532,7 +533,12 @@ def api_config() -> Union[Response, Tuple[Response, int]]: "metadata_sort_options": get_provider_sort_options(), "metadata_search_fields": get_provider_search_fields(), "default_release_source": app_config.get("DEFAULT_RELEASE_SOURCE", "direct_download"), + "auto_open_downloads_sidebar": app_config.get("AUTO_OPEN_DOWNLOADS_SIDEBAR", True), + "download_to_browser": app_config.get("DOWNLOAD_TO_BROWSER", False), "settings_enabled": _is_settings_enabled(), + # Default sort orders + "default_sort": app_config.get("AA_DEFAULT_SORT", "relevance"), # For direct mode (Anna's Archive) + "metadata_default_sort": get_provider_default_sort(), # For universal mode } return jsonify(config) except Exception as e: diff --git a/cwa_book_downloader/metadata_providers/__init__.py b/cwa_book_downloader/metadata_providers/__init__.py index b72b7ec..38e0643 100644 --- a/cwa_book_downloader/metadata_providers/__init__.py +++ b/cwa_book_downloader/metadata_providers/__init__.py @@ -413,6 +413,31 @@ def get_provider_search_fields(provider_name: Optional[str] = None) -> List[Dict return [serialize_search_field(f) for f in fields] +def get_provider_default_sort(provider_name: Optional[str] = None) -> str: + """Get the default sort order for a metadata provider. + + Reads from the provider-specific config setting (e.g., HARDCOVER_DEFAULT_SORT). + + Args: + provider_name: Provider name. If None, uses configured provider. + + Returns: + Default sort value string, or "relevance" if not configured. + """ + from cwa_book_downloader.core.config import config as app_config + + if provider_name is None: + app_config.refresh() + provider_name = app_config.get("METADATA_PROVIDER", "") + + if not provider_name: + return "relevance" + + # Look up provider-specific default sort setting + setting_key = f"{provider_name.upper()}_DEFAULT_SORT" + return app_config.get(setting_key, "relevance") + + def sync_metadata_provider_selection() -> None: """Sync the METADATA_PROVIDER setting based on enabled providers. diff --git a/cwa_book_downloader/metadata_providers/hardcover.py b/cwa_book_downloader/metadata_providers/hardcover.py index b1e5195..5d46c86 100644 --- a/cwa_book_downloader/metadata_providers/hardcover.py +++ b/cwa_book_downloader/metadata_providers/hardcover.py @@ -9,13 +9,10 @@ from cwa_book_downloader.core.settings_registry import ( register_settings, CheckboxField, PasswordField, + SelectField, ActionButton, HeadingField, ) -from cwa_book_downloader.config.env import ( - METADATA_CACHE_SEARCH_TTL, - METADATA_CACHE_BOOK_TTL, -) from cwa_book_downloader.core.config import config as app_config from cwa_book_downloader.metadata_providers import ( BookMetadata, @@ -150,7 +147,7 @@ class HardcoverProvider(MetadataProvider): cache_key = f"{options.query}:{options.search_type.value}:{options.sort.value}:{options.limit}:{options.page}:{fields_key}" return self._search_cached(cache_key, options) - @cacheable(ttl=METADATA_CACHE_SEARCH_TTL, key_prefix="hardcover:search") + @cacheable(ttl_key="METADATA_CACHE_SEARCH_TTL", ttl_default=300, key_prefix="hardcover:search") def _search_cached(self, cache_key: str, options: MetadataSearchOptions) -> List[BookMetadata]: """Cached search implementation. @@ -275,7 +272,7 @@ class HardcoverProvider(MetadataProvider): logger.error(f"Hardcover search error: {e}") return [] - @cacheable(ttl=METADATA_CACHE_BOOK_TTL, key_prefix="hardcover:book") + @cacheable(ttl_key="METADATA_CACHE_BOOK_TTL", ttl_default=600, key_prefix="hardcover:book") def get_book(self, book_id: str) -> Optional[BookMetadata]: """Get book details by Hardcover ID. @@ -331,7 +328,7 @@ class HardcoverProvider(MetadataProvider): logger.error(f"Hardcover get_book error: {e}") return None - @cacheable(ttl=METADATA_CACHE_BOOK_TTL, key_prefix="hardcover:isbn") + @cacheable(ttl_key="METADATA_CACHE_BOOK_TTL", ttl_default=600, key_prefix="hardcover:isbn") def search_by_isbn(self, isbn: str) -> Optional[BookMetadata]: """Search for a book by ISBN. @@ -699,6 +696,16 @@ def _get_connected_username() -> Optional[str]: return config.get("_connected_username") +# Hardcover sort options for settings UI +_HARDCOVER_SORT_OPTIONS = [ + {"value": "relevance", "label": "Most relevant"}, + {"value": "popularity", "label": "Most popular"}, + {"value": "rating", "label": "Highest rated"}, + {"value": "newest", "label": "Newest"}, + {"value": "oldest", "label": "Oldest"}, +] + + @register_settings("hardcover", "Hardcover", icon="book", order=51, group="metadata_providers") def hardcover_settings(): """Hardcover metadata provider settings.""" @@ -734,4 +741,12 @@ def hardcover_settings(): style="primary", callback=_test_hardcover_connection, ), + SelectField( + key="HARDCOVER_DEFAULT_SORT", + label="Default Sort Order", + description="Default sort order for Hardcover search results.", + options=_HARDCOVER_SORT_OPTIONS, + default="relevance", + env_supported=False, # UI-only setting + ), ] diff --git a/cwa_book_downloader/metadata_providers/openlibrary.py b/cwa_book_downloader/metadata_providers/openlibrary.py index 0c4e5c5..bad0027 100644 --- a/cwa_book_downloader/metadata_providers/openlibrary.py +++ b/cwa_book_downloader/metadata_providers/openlibrary.py @@ -12,13 +12,10 @@ from cwa_book_downloader.core.logger import setup_logger from cwa_book_downloader.core.settings_registry import ( register_settings, CheckboxField, + SelectField, ActionButton, HeadingField, ) -from cwa_book_downloader.config.env import ( - METADATA_CACHE_SEARCH_TTL, - METADATA_CACHE_BOOK_TTL, -) from cwa_book_downloader.metadata_providers import ( BookMetadata, DisplayField, @@ -160,7 +157,7 @@ class OpenLibraryProvider(MetadataProvider): cache_key = f"{options.query}:{options.search_type.value}:{options.sort.value}:{options.language}:{options.limit}:{options.page}:{fields_key}" return self._search_cached(cache_key, options) - @cacheable(ttl=METADATA_CACHE_SEARCH_TTL, key_prefix="openlibrary:search") + @cacheable(ttl_key="METADATA_CACHE_SEARCH_TTL", ttl_default=300, key_prefix="openlibrary:search") def _search_cached(self, cache_key: str, options: MetadataSearchOptions) -> List[BookMetadata]: """Cached search implementation. @@ -241,7 +238,7 @@ class OpenLibraryProvider(MetadataProvider): logger.error(f"Open Library search error: {e}") return [] - @cacheable(ttl=METADATA_CACHE_BOOK_TTL, key_prefix="openlibrary:book") + @cacheable(ttl_key="METADATA_CACHE_BOOK_TTL", ttl_default=600, key_prefix="openlibrary:book") def get_book(self, book_id: str) -> Optional[BookMetadata]: """Get book details by Open Library work ID. @@ -282,7 +279,7 @@ class OpenLibraryProvider(MetadataProvider): logger.error(f"Open Library get_book error: {e}") return None - @cacheable(ttl=METADATA_CACHE_BOOK_TTL, key_prefix="openlibrary:isbn") + @cacheable(ttl_key="METADATA_CACHE_BOOK_TTL", ttl_default=600, key_prefix="openlibrary:isbn") def search_by_isbn(self, isbn: str) -> Optional[BookMetadata]: """Search for a book by ISBN. @@ -598,6 +595,14 @@ def _test_openlibrary_connection() -> Dict[str, Any]: return {"success": False, "message": f"Error: {str(e)}"} +# Open Library sort options for settings UI +_OPENLIBRARY_SORT_OPTIONS = [ + {"value": "relevance", "label": "Most relevant"}, + {"value": "newest", "label": "Newest"}, + {"value": "oldest", "label": "Oldest"}, +] + + @register_settings("openlibrary", "Open Library", icon="library", order=52, group="metadata_providers") def openlibrary_settings(): """Open Library metadata provider settings.""" @@ -622,4 +627,12 @@ def openlibrary_settings(): style="primary", callback=_test_openlibrary_connection, ), + SelectField( + key="OPENLIBRARY_DEFAULT_SORT", + label="Default Sort Order", + description="Default sort order for Open Library search results.", + options=_OPENLIBRARY_SORT_OPTIONS, + default="relevance", + env_supported=False, # UI-only setting + ), ] diff --git a/cwa_book_downloader/release_sources/direct_download.py b/cwa_book_downloader/release_sources/direct_download.py index e5a615a..97c8ee5 100644 --- a/cwa_book_downloader/release_sources/direct_download.py +++ b/cwa_book_downloader/release_sources/direct_download.py @@ -56,6 +56,52 @@ _DOWNLOAD_SOURCES = [ _SOURCE_FAILURE_THRESHOLD = 4 _MIN_VALID_FILE_SIZE = 10 * 1024 +# Sources that require Cloudflare bypass +_CF_BYPASS_REQUIRED = frozenset({"aa-slow-nowait", "aa-slow-wait", "zlib", "welib"}) + +# Sources whose URLs come from AA page (multiple mirrors) +_AA_PAGE_SOURCES = frozenset({"aa-slow-nowait", "aa-slow-wait"}) + +# URL templates for sources that generate URLs from MD5 hash +_MD5_URL_TEMPLATES = { + "zlib": "https://z-lib.fm/md5/{md5}", + "libgen": "https://libgen.gl/ads.php?md5={md5}", + "welib": "https://welib.org/md5/{md5}", +} + +def _get_source_priority() -> list[dict]: + """Get the current source priority configuration.""" + return config.get("SOURCE_PRIORITY") or [] + + +def _is_source_enabled(source_id: str) -> bool: + """Check if a source is enabled in the priority config.""" + for item in _get_source_priority(): + if item["id"] == source_id: + return item.get("enabled", True) + return False # Unknown sources are disabled + + +def _get_enabled_source_order() -> list[str]: + """Get ordered list of enabled source IDs.""" + return [ + item["id"] + for item in _get_source_priority() + if item.get("enabled", True) + ] + + +def _get_source_position(source_id: str) -> int: + """Get the position of a source in the priority list (lower = higher priority). + + Returns a high number if source not found or disabled. + """ + priority = _get_source_priority() + for i, item in enumerate(priority): + if item["id"] == source_id and item.get("enabled", True): + return i + return 999 # Not found or disabled + class SearchUnavailable(Exception): """Raised when Anna's Archive cannot be reached via any mirror/DNS.""" @@ -90,7 +136,7 @@ def search_books(query: str, filters: SearchFilters) -> List[BookInfo]: if value != "all": filters_query += f"&lang={quote(value)}" - if filters.sort: + if filters.sort and filters.sort != "relevance": filters_query += f"&sort={quote(filters.sort)}" if filters.content: @@ -225,8 +271,6 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo: slow_urls_no_waitlist: list[str] = [] slow_urls_with_waitlist: list[str] = [] - external_urls_libgen: list[str] = [] - external_urls_z_lib: list[str] = [] def _append_unique(lst: list[str], href: str) -> None: if href and href not in lst: @@ -245,43 +289,30 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo: _append_unique(slow_urls_no_waitlist, href) else: _append_unique(slow_urls_with_waitlist, href) - elif 'libgen.li' in href: - libgen_url = re.sub(r'libgen\.(li|lc|is|bz|st)', 'libgen.gl', href) - _append_unique(external_urls_libgen, libgen_url) - elif text.startswith("z-lib") and ".onion/" not in href: - _append_unique(external_urls_z_lib, href) except Exception: pass logger.debug( - "Source inventory for %s -> aa_no_wait=%d, aa_wait=%d, libgen=%d, zlib=%d", + "Source inventory for %s -> aa_no_wait=%d, aa_wait=%d", book_id, len(slow_urls_no_waitlist), len(slow_urls_with_waitlist), - len(external_urls_libgen), - len(external_urls_z_lib), ) + # Convert to absolute URLs and tag by source type + base_url = network.get_aa_base_url() urls = [] - # Z-Library disabled - download tokens are session-bound - urls += slow_urls_no_waitlist if config.USE_CF_BYPASS else [] - urls += external_urls_libgen - urls += slow_urls_with_waitlist if config.USE_CF_BYPASS else [] - - for i in range(len(urls)): - urls[i] = downloader.get_absolute_url(network.get_aa_base_url(), urls[i]) - - urls = [url for url in urls if url != ""] - - base_url = network.get_aa_base_url() for rel_url in slow_urls_no_waitlist: abs_url = downloader.get_absolute_url(base_url, rel_url) if abs_url: + urls.append(abs_url) _url_source_types[abs_url] = "aa-slow-nowait" + for rel_url in slow_urls_with_waitlist: abs_url = downloader.get_absolute_url(base_url, rel_url) if abs_url: + urls.append(abs_url) _url_source_types[abs_url] = "aa-slow-wait" original_divs = divs @@ -475,12 +506,136 @@ def _friendly_source_name(link: str) -> str: return _get_source_info(link)[1] +def _fetch_aa_page_urls(book_info: BookInfo, urls_by_source: dict[str, list[str]]) -> None: + """Fetch and parse AA page, populating urls_by_source dict. + + Groups existing book_info.download_urls by source type. If book_info + has no URLs, fetches the AA page fresh. + """ + # If book_info already has URLs, group them by source type + if book_info.download_urls: + for url in book_info.download_urls: + source_type = _url_source_types.get(url) + if source_type: + if source_type not in urls_by_source: + urls_by_source[source_type] = [] + urls_by_source[source_type].append(url) + return + + # Otherwise fetch the page fresh + try: + fresh_book_info = get_book_info(book_info.id) + for url in fresh_book_info.download_urls: + source_type = _url_source_types.get(url) + if source_type: + if source_type not in urls_by_source: + urls_by_source[source_type] = [] + urls_by_source[source_type].append(url) + except Exception as e: + logger.warning(f"Failed to fetch AA page: {e}") + + +def _get_urls_for_source( + source_id: str, + book_info: BookInfo, + selector: network.AAMirrorSelector, + cancel_flag: Optional[Event], + status_callback: Optional[Callable[[str, Optional[str]], None]], + urls_by_source: dict[str, list[str]], + aa_page_fetched: bool +) -> list[str]: + """Get URLs for a specific source, fetching lazily if needed.""" + # AA Fast - generate URL dynamically + if source_id == "aa-fast": + if not config.AA_DONATOR_KEY: + return [] + url = f"{network.get_aa_base_url()}/dyn/api/fast_download.json?md5={book_info.id}&key={config.AA_DONATOR_KEY}" + _url_source_types[url] = "aa-fast" + return [url] + + # MD5-based sources - generate URL from template + if source_id in _MD5_URL_TEMPLATES: + url = _MD5_URL_TEMPLATES[source_id].format(md5=book_info.id) + _url_source_types[url] = source_id + return [url] + + # Welib - fetch page and parse for slow_download links + if source_id == "welib": + if status_callback: + status_callback("resolving", "Fetching welib sources...") + return _get_download_urls_from_welib(book_info.id, selector=selector, cancel_flag=cancel_flag) + + # AA page sources - fetch AA page if not already done + if source_id in _AA_PAGE_SOURCES: + if not aa_page_fetched and not urls_by_source: + if status_callback: + status_callback("resolving", "Fetching download sources...") + _fetch_aa_page_urls(book_info, urls_by_source) + + return urls_by_source.get(source_id, []) + + return [] + + +def _try_download_url( + url: str, + source_id: str, + book_info: BookInfo, + book_path: Path, + progress_callback: Optional[Callable[[float], None]], + cancel_flag: Optional[Event], + status_callback: Optional[Callable[[str, Optional[str]], None]], + selector: network.AAMirrorSelector, + source_context: str +) -> Optional[str]: + """Attempt to download from a single URL. + + Returns: download URL on success, None on failure. + """ + try: + logger.info(f"Trying download source [{source_id}]: {url}") + + if status_callback: + status_callback("resolving", f"Trying {source_context}") + + download_url = _get_download_url(url, book_info.title, cancel_flag, status_callback, selector, source_context) + if not download_url: + raise Exception("No download URL resolved") + + logger.info(f"Resolved download URL [{source_id}]: {download_url}") + + data = downloader.download_url( + download_url, book_info.size or "", + progress_callback, cancel_flag, selector, + status_callback, referer=url + ) + + if not data: + raise Exception("No data received from download") + + file_size = data.tell() + if file_size < _MIN_VALID_FILE_SIZE: + logger.warning(f"Downloaded file too small ({file_size} bytes), likely an error page") + raise Exception(f"File too small ({file_size} bytes)") + + logger.debug(f"Download finished ({file_size} bytes). Writing to {book_path}") + data.seek(0) + with open(book_path, "wb") as f: + f.write(data.getbuffer()) + + return download_url + + except Exception as e: + logger.warning(f"Failed to download from {url} (source={source_id}): {e}") + return None + + def _get_download_urls_from_welib(book_id: str, selector: Optional[network.AAMirrorSelector] = None, cancel_flag: Optional[Event] = None) -> list[str]: """Get download URLs from welib.org (bypasser required).""" - if not config.ALLOW_USE_WELIB: + if not _is_source_enabled("welib"): return [] - url = f"https://welib.org/md5/{book_id}" - logger.info(f"Fetching welib.org download URLs for {book_id}") + url = _MD5_URL_TEMPLATES["welib"].format(md5=book_id) + logger.info(f"Fetching welib download URLs for {book_id}") try: html = downloader.html_get_page(url, use_bypasser=True, selector=selector or network.AAMirrorSelector(), cancel_flag=cancel_flag) except Exception as exc: @@ -506,167 +661,88 @@ def _download_book( cancel_flag: Optional[Event] = None, status_callback: Optional[Callable[[str, Optional[str]], None]] = None ) -> Optional[str]: - """Download a book from available sources. + """Download a book using sources in configured priority order. - Args: - book_info: Book information with download URLs - book_path: Path to save the downloaded file - progress_callback: Optional callback for download progress updates - cancel_flag: Optional cancellation flag - status_callback: Optional callback for status updates (status, message) - - Returns: - str: Download URL if successful, None otherwise + Returns: Download URL if successful, None otherwise. """ selector = network.AAMirrorSelector() - - if len(book_info.download_urls) == 0: - book_info = get_book_info(book_info.id) - download_links = list(book_info.download_urls) - - # If config.AA_DONATOR_KEY is set, use the fast download URL. Else try other sources. - # Use truthiness check to handle both None and empty string - if config.AA_DONATOR_KEY: - download_links.insert( - 0, - f"{network.get_aa_base_url()}/dyn/api/fast_download.json?md5={book_info.id}&key={config.AA_DONATOR_KEY}", - ) - - # Preserve order but drop duplicates to avoid retrying the same host - download_links = list(dict.fromkeys(download_links)) - - # Round-robin rotation for AA slow download URLs to distribute load across mirrors - # This prevents all concurrent downloads from hitting the same partner server first - # Rotate aa-slow-nowait and aa-slow-wait independently to preserve priority ordering - rotation_value = next(_aa_slow_rotation) - - def _rotate_category_in_place(links: list, source_type: str) -> int: - """Rotate URLs of a specific source type within the list, preserving their positions.""" - indices = [i for i, u in enumerate(links) if _url_source_types.get(u) == source_type] - if len(indices) <= 1: - return 0 - rotation = rotation_value % len(indices) - if rotation == 0: - return 0 - # Extract values, rotate, put back - values = [links[i] for i in indices] - rotated = values[rotation:] + values[:rotation] - for idx, val in zip(indices, rotated): - links[idx] = val - return rotation - - nowait_rotation = _rotate_category_in_place(download_links, "aa-slow-nowait") - wait_rotation = _rotate_category_in_place(download_links, "aa-slow-wait") - - if nowait_rotation or wait_rotation: - logger.info(f"AA source rotation: nowait={nowait_rotation}, wait={wait_rotation}") - - links_queue = download_links - - # Fetch welib URLs upfront when prioritized - welib_fallback_loaded = "welib" in DEBUG_SKIP_SOURCES # Skip welib entirely if in debug skip list - if config.USE_CF_BYPASS and config.PRIORITIZE_WELIB and config.ALLOW_USE_WELIB and not welib_fallback_loaded: - logger.info("Fetching welib.org download URLs (config.PRIORITIZE_WELIB enabled)") - if status_callback: - status_callback("resolving", "Fetching welib sources...") - welib_links = _get_download_urls_from_welib(book_info.id, selector=selector, cancel_flag=cancel_flag) - if welib_links: - links_queue = welib_links + [l for l in links_queue if l not in welib_links] - welib_fallback_loaded = True - - total_sources = len(links_queue) - - # Handle case where no download sources are available - if total_sources == 0: - logger.warning(f"No download sources available for: {book_info.title}") - if status_callback: - status_callback("error", "No download sources found") - return None - - # Track consecutive failures per source type to skip after threshold source_failures: dict[str, int] = {} - # Iterate with index so we can append welib links later - idx = 0 - while idx < len(links_queue): - link = links_queue[idx] - source_label = _label_source(link) - friendly_name = _friendly_source_name(link) + urls_by_source: dict[str, list[str]] = {} + aa_page_fetched = False + url_attempt_counter = 0 + + # Get enabled sources in priority order + priority = [s for s in _get_source_priority() if s.get("enabled", True)] + + for source_config in priority: + source_id = source_config["id"] + + if cancel_flag and cancel_flag.is_set(): + return None # Debug: skip sources for testing fallback chains - if source_label in DEBUG_SKIP_SOURCES: - logger.info("DEBUG_SKIP_SOURCES: skipping %s (%s)", source_label, link) - idx += 1 + if source_id in DEBUG_SKIP_SOURCES: + logger.info("DEBUG_SKIP_SOURCES: skipping %s", source_id) continue - # Skip source types that have failed too many times - if source_failures.get(source_label, 0) >= _SOURCE_FAILURE_THRESHOLD: - logger.info("Skipping %s - source type '%s' failed %d times", link, source_label, _SOURCE_FAILURE_THRESHOLD) - idx += 1 + # Skip if source requires CF bypass and it's not enabled + if source_id in _CF_BYPASS_REQUIRED and not config.USE_CF_BYPASS: + logger.debug(f"Skipping {source_id} - requires CF bypass") continue - try: - current_pos = idx + 1 - # Update total if we added more sources - total_sources = len(links_queue) - - logger.info("Trying download source [%s]: %s (%d/%d)", source_label, link, current_pos, total_sources) - - # Build source context for status messages (e.g., "Welib (1/12)") - source_context = f"{friendly_name} (Server #{current_pos})" - - # Update status with simple message showing which source we're trying - if status_callback: - status_callback("resolving", f"Trying {source_context}") - - download_url = _get_download_url(link, book_info.title, cancel_flag, status_callback, selector, source_context) - if download_url == "": - raise Exception("No download URL resolved") - - logger.info("Resolved download URL [%s]: %s", source_label, download_url) - - # Pass source page as referer (required by some sites) - data = downloader.download_url(download_url, book_info.size or "", progress_callback, cancel_flag, selector, status_callback, referer=link) - if not data: - raise Exception("No data received from download") - - # Validate file size - reject suspiciously small files - file_size = data.tell() - if file_size < _MIN_VALID_FILE_SIZE: - logger.warning(f"Downloaded file too small ({file_size} bytes), likely an error page") - raise Exception(f"File too small ({file_size} bytes)") - - logger.debug(f"Download finished ({file_size} bytes). Writing to {book_path}") - data.seek(0) # Reset buffer position before writing - with open(book_path, "wb") as f: - f.write(data.getbuffer()) - return download_url - - except Exception as e: - logger.warning(f"Failed to download from {link} (source={source_label}): {e}") - source_failures[source_label] = source_failures.get(source_label, 0) + 1 - idx += 1 - # If we exhausted primary links and haven't loaded welib yet, fetch them lazily - if ( - idx >= len(links_queue) - and not welib_fallback_loaded - and config.USE_CF_BYPASS - and config.ALLOW_USE_WELIB - ): - welib_selector = selector # reuse AA mirror selector for consistency - welib_links = _get_download_urls_from_welib(book_info.id, selector=welib_selector, cancel_flag=cancel_flag) - welib_fallback_loaded = True - if welib_links: - new_links = [wl for wl in welib_links if wl not in links_queue] - if new_links: - logger.info("Adding welib fallback links (%d)", len(new_links)) - links_queue.extend(new_links) - # continue loop to try newly added links + # Skip if source has failed too many times + if source_failures.get(source_id, 0) >= _SOURCE_FAILURE_THRESHOLD: + logger.debug(f"Skipping {source_id} - too many failures") continue - # All sources exhausted - report final error to UI + # Get URLs for this source (lazy-loads as needed) + urls_to_try = _get_urls_for_source( + source_id, book_info, selector, cancel_flag, status_callback, + urls_by_source, aa_page_fetched + ) + + # Track if we fetched AA page + if source_id in _AA_PAGE_SOURCES and not aa_page_fetched: + aa_page_fetched = bool(urls_by_source) + + if not urls_to_try: + continue + + # Apply round-robin rotation if multiple URLs + if len(urls_to_try) > 1: + rotation_value = next(_aa_slow_rotation) + rotation = rotation_value % len(urls_to_try) + urls_to_try = urls_to_try[rotation:] + urls_to_try[:rotation] + if rotation: + logger.debug(f"Rotated {source_id} URLs by {rotation}") + + # Try each URL for this source + for url in urls_to_try: + if cancel_flag and cancel_flag.is_set(): + return None + + url_attempt_counter += 1 + friendly_name = _friendly_source_name(url) + source_context = f"{friendly_name} (Server #{url_attempt_counter})" + + result = _try_download_url( + url, source_id, book_info, book_path, + progress_callback, cancel_flag, status_callback, selector, + source_context + ) + + if result: + return result + + source_failures[source_id] = source_failures.get(source_id, 0) + 1 + + # Check if we've hit the failure threshold + if source_failures[source_id] >= _SOURCE_FAILURE_THRESHOLD: + logger.info(f"Source {source_id} hit failure threshold, moving to next source") + break + if status_callback: - status_callback("error", f"All {len(links_queue)} sources failed") - + status_callback("error", "All sources failed") return None @@ -705,6 +781,13 @@ def _get_download_url( # Z-Library if link.startswith("https://z-lib."): dl = soup.find("a", href=True, class_="addDownloadedBook") + if not dl: + # Retry after delay if page not fully loaded + time.sleep(2) + html = downloader.html_get_page(link, selector=sel, cancel_flag=cancel_flag) + if html: + soup = BeautifulSoup(html, "html.parser") + dl = soup.find("a", href=True, class_="addDownloadedBook") url = dl["href"] if dl else "" # AA slow download / partner servers @@ -963,9 +1046,9 @@ class DirectDownloadHandler(DownloadHandler): """ Handler for direct HTTP downloads from Anna's Archive, Libgen, etc. - Receives a DownloadTask with task_id (AA MD5 hash) and fetches the - book page internally to get download URLs, then cascades through - fallback sources (AA Fast → AA Slow → Libgen → Welib → Z-Lib). + Receives a DownloadTask with task_id (AA MD5 hash) and cascades through + sources in priority order. The AA page is only fetched if AA slow sources + are enabled in the user's source priority configuration. """ def download( @@ -978,8 +1061,8 @@ class DirectDownloadHandler(DownloadHandler): """ Execute a direct HTTP download. - Uses task.task_id to fetch the book page from Anna's Archive, - extract download URLs, and cascade through fallback sources. + Uses task.task_id (AA MD5 hash) to cascade through sources in priority + order. The AA page is only fetched if AA slow sources are enabled. Args: task: Download task with task_id (AA MD5 hash) @@ -996,19 +1079,18 @@ class DirectDownloadHandler(DownloadHandler): logger.info(f"Download cancelled before starting: {task.task_id}") return None - # Fetch book info from Anna's Archive using task_id - status_callback("resolving", "Fetching book details...") - book_info = get_book_info(task.task_id) + # Create BookInfo from task data - NO AA page fetch here + # AA page is fetched lazily by _fetch_aa_page_urls only when + # we actually reach an AA slow source in the priority order + book_info = BookInfo( + id=task.task_id, + title=task.title, + author=task.author, + format=task.format, + size=task.size, + preview=task.preview, + ) - if not book_info: - status_callback("error", "Could not fetch book details") - return None - - if not book_info.download_urls: - status_callback("error", "No download sources found") - return None - - # Execute the download with the fetched book info return self._execute_download( book_info, cancel_flag, diff --git a/src/frontend/src/App.tsx b/src/frontend/src/App.tsx index 9dac19f..ec28f16 100644 --- a/src/frontend/src/App.tsx +++ b/src/frontend/src/App.tsx @@ -89,7 +89,6 @@ function App() { handleSearch, handleResetSearch, handleSortChange, - resetSortFilter, searchFieldValues, updateSearchFieldValue, } = useSearch({ @@ -157,6 +156,10 @@ function App() { if (!prevQueued[bookId]) { const book = currQueued[bookId]; showToast(`${book.title || 'Book'} added to queue`, 'info'); + // Auto-open downloads sidebar if enabled + if (config?.auto_open_downloads_sidebar !== false) { + setDownloadsSidebarOpen(true); + } } }); @@ -181,6 +184,16 @@ function App() { const book = currComplete[bookId]; showToast(`${book.title || 'Book'} completed`, 'success'); + // Auto-download to browser if enabled + if (config?.download_to_browser && book.download_path) { + const link = document.createElement('a'); + link.href = `/api/localdownload?id=${encodeURIComponent(bookId)}`; + link.download = ''; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + } + // Track completed release IDs in session state for universal mode Object.entries(bookToReleaseMap).forEach(([metadataBookId, releaseIds]) => { if (releaseIds.includes(bookId)) { @@ -199,7 +212,7 @@ function App() { showToast(`${book.title || 'Book'}: ${errorMsg}`, 'error'); } }); - }, [showToast, bookToReleaseMap, markBookCompleted]); + }, [showToast, bookToReleaseMap, markBookCompleted, config]); // Detect status changes when currentStatus updates useEffect(() => { @@ -218,30 +231,37 @@ function App() { if (mode === 'settings-saved' && prevSearchModeRef.current !== cfg.search_mode) { setBooks([]); setSelectedBook(null); - resetSortFilter(); clearTracking(); } prevSearchModeRef.current = cfg.search_mode; setConfig(cfg); + // Determine the default sort based on search mode + const defaultSort = cfg.search_mode === 'universal' + ? (cfg.metadata_default_sort || 'relevance') + : (cfg.default_sort || 'relevance'); + if (cfg?.supported_formats) { if (mode === 'initial') { setAdvancedFilters(prev => ({ ...prev, formats: cfg.supported_formats, + sort: defaultSort, })); } else if (mode === 'settings-saved') { + // On settings save, update formats and reset sort to new default setAdvancedFilters(prev => ({ ...prev, formats: prev.formats.filter(f => cfg.supported_formats.includes(f)), + sort: defaultSort, })); } } } catch (error) { console.error('Failed to load config:', error); } - }, [setBooks, setAdvancedFilters, resetSortFilter, clearTracking]); + }, [setBooks, setAdvancedFilters, clearTracking]); // Fetch config when authenticated useEffect(() => { diff --git a/src/frontend/src/components/resultsViews/ListView.tsx b/src/frontend/src/components/resultsViews/ListView.tsx index 40e2109..925f625 100644 --- a/src/frontend/src/components/resultsViews/ListView.tsx +++ b/src/frontend/src/components/resultsViews/ListView.tsx @@ -92,6 +92,10 @@ export const ListView = ({ books, onDetails, onDownload, onGetReleases, getButto : getButtonState(book.id); const isLoadingDetails = detailsLoadingId === book.id; + // Compute color styles for direct mode badges + const languageColor = getLanguageColor(book.language); + const formatColor = getFormatColor(book.format); + return (
{book.language || '-'} @@ -185,7 +189,7 @@ export const ListView = ({ books, onDetails, onDownload, onGetReleases, getButto {searchMode !== 'universal' && (
{book.format || '-'} diff --git a/src/frontend/src/components/settings/SettingsContent.tsx b/src/frontend/src/components/settings/SettingsContent.tsx index e04e158..ad44341 100644 --- a/src/frontend/src/components/settings/SettingsContent.tsx +++ b/src/frontend/src/components/settings/SettingsContent.tsx @@ -9,6 +9,8 @@ import { CheckboxFieldConfig, SelectFieldConfig, MultiSelectFieldConfig, + OrderableListFieldConfig, + OrderableListItem, ActionButtonConfig, HeadingFieldConfig, } from '../../types/settings'; @@ -20,6 +22,7 @@ import { CheckboxField, SelectField, MultiSelectField, + OrderableListField, ActionButton, HeadingField, } from './fields'; @@ -39,11 +42,6 @@ function isFieldVisible( field: SettingsField, values: Record ): boolean { - // HeadingField doesn't have showWhen - if (field.type === 'HeadingField') { - return true; - } - const showWhen = field.showWhen; if (!showWhen) return true; @@ -161,6 +159,15 @@ const renderField = ( disabled={isDisabled} /> ); + case 'OrderableListField': + return ( + + ); case 'ActionButton': return ; case 'HeadingField': diff --git a/src/frontend/src/components/settings/SettingsModal.tsx b/src/frontend/src/components/settings/SettingsModal.tsx index 9122bba..c42f1d5 100644 --- a/src/frontend/src/components/settings/SettingsModal.tsx +++ b/src/frontend/src/components/settings/SettingsModal.tsx @@ -1,4 +1,4 @@ -import { useEffect, useState, useCallback } from 'react'; +import { useEffect, useState, useCallback, useRef } from 'react'; import { useSettings } from '../../hooks/useSettings'; import { SettingsHeader } from './SettingsHeader'; import { SettingsSidebar } from './SettingsSidebar'; @@ -32,6 +32,9 @@ export const SettingsModal = ({ isOpen, onClose, onShowToast, onSettingsSaved }: const [showMobileDetail, setShowMobileDetail] = useState(false); const [isClosing, setIsClosing] = useState(false); + // Track previous isOpen state to detect modal open transition + const prevIsOpenRef = useRef(false); + // Check for mobile viewport useEffect(() => { const checkMobile = () => { @@ -87,12 +90,16 @@ export const SettingsModal = ({ isOpen, onClose, onShowToast, onSettingsSaved }: } }, [isOpen]); - // On desktop, select first tab when modal first opens (only if no tab selected) + // Reset to first tab when modal transitions from closed to open useEffect(() => { - if (isOpen && !isMobile && tabs.length > 0 && !selectedTab) { + const justOpened = isOpen && !prevIsOpenRef.current; + prevIsOpenRef.current = isOpen; + + // On desktop, select first tab when modal opens (reset on each open) + if (justOpened && !isMobile && tabs.length > 0) { setSelectedTab(tabs[0].name); } - }, [isOpen, isMobile, tabs, selectedTab, setSelectedTab]); + }, [isOpen, isMobile, tabs, setSelectedTab]); const handleSelectTab = useCallback( (tabName: string) => { diff --git a/src/frontend/src/components/settings/fields/OrderableListField.tsx b/src/frontend/src/components/settings/fields/OrderableListField.tsx new file mode 100644 index 0000000..9a7adf9 --- /dev/null +++ b/src/frontend/src/components/settings/fields/OrderableListField.tsx @@ -0,0 +1,326 @@ +import { useState, useRef } from 'react'; +import { + OrderableListFieldConfig, + OrderableListItem, + OrderableListOption, +} from '../../../types/settings'; + +interface OrderableListFieldProps { + field: OrderableListFieldConfig; + value: OrderableListItem[]; + onChange: (value: OrderableListItem[]) => void; + disabled?: boolean; +} + +// Represents where the drop indicator should appear +type DropPosition = { index: number; position: 'before' | 'after' } | null; + +/** + * Merge current value with options to get full item info. + * Items in value take precedence; any options not in value are appended. + */ +const mergeValueWithOptions = ( + value: OrderableListItem[], + options: OrderableListOption[] +): Array => { + const optionsMap = new Map(options.map((opt) => [opt.id, opt])); + const result: Array = []; + + // Add items from value (preserves order) + for (const item of value) { + const option = optionsMap.get(item.id); + if (option) { + result.push({ ...option, ...item }); + optionsMap.delete(item.id); + } + } + + // Add any remaining options not in value (shouldn't happen normally) + for (const option of optionsMap.values()) { + result.push({ ...option, id: option.id, enabled: false }); + } + + return result; +}; + +export const OrderableListField = ({ + field, + value, + onChange, + disabled, +}: OrderableListFieldProps) => { + const isDisabled = disabled ?? false; + const [draggedIndex, setDraggedIndex] = useState(null); + const [dropPosition, setDropPosition] = useState(null); + const dragNodeRef = useRef(null); + + const items = mergeValueWithOptions(value ?? [], field.options); + + const handleDragStart = (e: React.DragEvent, index: number) => { + if (isDisabled) return; + setDraggedIndex(index); + dragNodeRef.current = e.currentTarget as HTMLDivElement; + e.dataTransfer.effectAllowed = 'move'; + e.dataTransfer.setData('text/plain', String(index)); + // Add a slight delay before adding the dragging class for better visual feedback + requestAnimationFrame(() => { + if (dragNodeRef.current) { + dragNodeRef.current.classList.add('opacity-50'); + } + }); + }; + + const handleDragEnd = () => { + if (dragNodeRef.current) { + dragNodeRef.current.classList.remove('opacity-50'); + } + setDraggedIndex(null); + setDropPosition(null); + dragNodeRef.current = null; + }; + + const handleDragOver = (e: React.DragEvent, index: number) => { + e.preventDefault(); + if (draggedIndex === null || draggedIndex === index) { + setDropPosition(null); + return; + } + + // Determine if we're in the top or bottom half of the target + const rect = e.currentTarget.getBoundingClientRect(); + const midpoint = rect.top + rect.height / 2; + const position = e.clientY < midpoint ? 'before' : 'after'; + + setDropPosition({ index, position }); + }; + + const handleDragLeave = (e: React.DragEvent) => { + // Only clear if we're leaving the item entirely (not entering a child) + const relatedTarget = e.relatedTarget as Node | null; + if (!e.currentTarget.contains(relatedTarget)) { + setDropPosition(null); + } + }; + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + if (draggedIndex === null || dropPosition === null) { + handleDragEnd(); + return; + } + + // Calculate the actual target index based on drop position + let targetIndex = dropPosition.index; + if (dropPosition.position === 'after') { + targetIndex += 1; + } + // Adjust if dragging from before the target + if (draggedIndex < targetIndex) { + targetIndex -= 1; + } + + if (draggedIndex === targetIndex) { + handleDragEnd(); + return; + } + + // Reorder the items + const newItems = [...items]; + const [removed] = newItems.splice(draggedIndex, 1); + newItems.splice(targetIndex, 0, removed); + + // Convert back to value format + const newValue: OrderableListItem[] = newItems.map((item) => ({ + id: item.id, + enabled: item.enabled, + })); + + onChange(newValue); + handleDragEnd(); + }; + + const toggleItem = (index: number) => { + if (isDisabled) return; + const item = items[index]; + if (item.isLocked) return; + + const newValue: OrderableListItem[] = items.map((it, i) => ({ + id: it.id, + enabled: i === index ? !it.enabled : it.enabled, + })); + + onChange(newValue); + }; + + const moveItem = (fromIndex: number, direction: 'up' | 'down') => { + if (isDisabled) return; + const toIndex = direction === 'up' ? fromIndex - 1 : fromIndex + 1; + if (toIndex < 0 || toIndex >= items.length) return; + + const newItems = [...items]; + [newItems[fromIndex], newItems[toIndex]] = [newItems[toIndex], newItems[fromIndex]]; + + const newValue: OrderableListItem[] = newItems.map((item) => ({ + id: item.id, + enabled: item.enabled, + })); + + onChange(newValue); + }; + + // Calculate which gap index to show the indicator at (0 = before first item, N = after last item) + const getDropGapIndex = (): number | null => { + if (!dropPosition) return null; + if (dropPosition.position === 'before') { + return dropPosition.index; + } else { + return dropPosition.index + 1; + } + }; + + const dropGapIndex = getDropGapIndex(); + + return ( +
+ {items.map((item, index) => { + const isDragging = draggedIndex === index; + const isItemDisabled = isDisabled || item.isLocked; + // Show indicator before this item if the gap index matches + const showIndicatorBefore = dropGapIndex === index; + + return ( +
+ {/* Drop indicator - absolutely positioned so it doesn't affect layout */} + {showIndicatorBefore && ( +
+ )} + +
handleDragStart(e, index)} + onDragEnd={handleDragEnd} + onDragOver={(e) => handleDragOver(e, index)} + onDragLeave={handleDragLeave} + onDrop={handleDrop} + className={` + flex items-center gap-3 p-3 rounded-lg border + transition-all duration-150 + ${isDragging ? 'opacity-50' : ''} + border-[var(--border-muted)] + ${isDisabled ? 'opacity-60' : 'hover:bg-[var(--hover-surface)]'} + `} + > + {/* Reorder Controls */} +
+ + +
+ + {/* Label and Description */} +
+
{item.label}
+ {item.description && ( +
+ {item.description} +
+ )} + {item.isLocked && item.disabledReason && ( +
+ + + + {item.disabledReason} +
+ )} +
+ + {/* Toggle Switch */} + {(() => { + // Locked items always show as "off" regardless of enabled state + const showAsEnabled = item.enabled && !item.isLocked; + return ( + + ); + })()} +
+
+ ); + })} + {/* Drop indicator after last item - use relative container with absolute indicator */} + {dropGapIndex === items.length && ( +
+
+
+ )} +
+ ); +}; diff --git a/src/frontend/src/components/settings/fields/index.ts b/src/frontend/src/components/settings/fields/index.ts index 484ad69..0589c43 100644 --- a/src/frontend/src/components/settings/fields/index.ts +++ b/src/frontend/src/components/settings/fields/index.ts @@ -4,5 +4,6 @@ export { NumberField } from './NumberField'; export { CheckboxField } from './CheckboxField'; export { SelectField } from './SelectField'; export { MultiSelectField } from './MultiSelectField'; +export { OrderableListField } from './OrderableListField'; export { ActionButton } from './ActionButton'; export { HeadingField } from './HeadingField'; diff --git a/src/frontend/src/types/index.ts b/src/frontend/src/types/index.ts index 40d4703..3e3503b 100644 --- a/src/frontend/src/types/index.ts +++ b/src/frontend/src/types/index.ts @@ -150,7 +150,11 @@ export interface AppConfig { metadata_sort_options: SortOption[]; metadata_search_fields: MetadataSearchField[]; default_release_source?: string; // Default tab in ReleaseModal (e.g., 'direct_download') + auto_open_downloads_sidebar: boolean; // Auto-open sidebar when download is queued + download_to_browser: boolean; // Auto-download completed files to browser settings_enabled: boolean; // Whether config directory is mounted and writable + default_sort: string; // Default sort for direct mode (Anna's Archive) + metadata_default_sort: string; // Default sort for universal mode (from metadata provider) } // Authentication types diff --git a/src/frontend/src/types/settings.ts b/src/frontend/src/types/settings.ts index 69461cb..8057ab6 100644 --- a/src/frontend/src/types/settings.ts +++ b/src/frontend/src/types/settings.ts @@ -7,6 +7,7 @@ export type FieldType = | 'CheckboxField' | 'SelectField' | 'MultiSelectField' + | 'OrderableListField' | 'ActionButton' | 'HeadingField'; @@ -82,6 +83,26 @@ export interface MultiSelectFieldConfig extends BaseField { options: SelectOption[]; } +// OrderableListField types - generic drag-and-drop reorderable list +export interface OrderableListItem { + id: string; + enabled: boolean; +} + +export interface OrderableListOption { + id: string; + label: string; + description?: string; + disabledReason?: string; // Explanation when item cannot be enabled + isLocked?: boolean; // Item cannot be toggled (e.g., missing dependency) +} + +export interface OrderableListFieldConfig extends BaseField { + type: 'OrderableListField'; + value: OrderableListItem[]; + options: OrderableListOption[]; +} + export interface ActionButtonConfig extends BaseField { type: 'ActionButton'; style: 'default' | 'primary' | 'danger'; @@ -94,6 +115,7 @@ export interface HeadingFieldConfig { description?: string; linkUrl?: string; linkText?: string; + showWhen?: ShowWhenCondition; // Conditional visibility based on another field's value } // Union type for all fields @@ -104,6 +126,7 @@ export type SettingsField = | CheckboxFieldConfig | SelectFieldConfig | MultiSelectFieldConfig + | OrderableListFieldConfig | ActionButtonConfig | HeadingFieldConfig; diff --git a/tor.sh b/tor.sh index b8a551a..8d8b5d5 100644 --- a/tor.sh +++ b/tor.sh @@ -115,17 +115,28 @@ cat <<'HC' > /app/tor_healthcheck.sh # Function to dynamically wait for Tor bootstrap wait_for_tor() { echo "$(date): Waiting for Tor to finish bootstrapping..." - sleep 30 - # Reuse the timeout logic from the main script - timeout 300 bash -c ' - while ! grep -q "Bootstrapped 100%" <(tail -n 20 -F /var/log/tor/notices.log 2>/dev/null); do - sleep 1 - done - ' - echo "$(date): Tor seems ready (log message found)." -} -# Wait for Tor to bootstrap initially + > /var/log/tor/notices.log 2>/dev/null || true + + sleep 10 + + TIMEOUT=300 + ELAPSED=0 + while [ $ELAPSED -lt $TIMEOUT ]; do + if grep -q "Bootstrapped 100%" /var/log/tor/notices.log 2>/dev/null; then + echo "$(date): Tor bootstrap complete." + return 0 + fi + sleep 5 + ELAPSED=$((ELAPSED + 5)) + # Show progress + CURRENT=$(tail -n 1 /var/log/tor/notices.log 2>/dev/null | grep -oP 'Bootstrapped \d+%' || echo "waiting...") + echo "$(date): Bootstrap progress: $CURRENT ($ELAPSED/${TIMEOUT}s)" + done + + echo "$(date): WARNING - Tor bootstrap timed out after ${TIMEOUT}s" + return 1 +} FAIL_COUNT=0 while true; do @@ -143,7 +154,7 @@ while true; do echo "$(date): restart trigger - Restarting Tor..." supervisorctl restart tor FAIL_COUNT=0 - + # Wait for it to come back using the dynamic check wait_for_tor fi @@ -251,71 +262,43 @@ else echo "[*] Falling back to container's default timezone: $TZ" fi -# Start a background health check process to monitor Tor -echo "[*] Starting Tor health check monitor..." +# Start a background circuit rotation process +echo "[*] Starting Tor circuit rotation monitor..." ( - check_count=0 - first_check=true + rotation_count=0 + + # Wait for initial stability + sleep 120 while true; do - if [ "$first_check" = true ]; then - sleep 60 - first_check=false - else - sleep 300 - fi + rotation_count=$((rotation_count + 1)) + echo "[*] Circuit rotation #$rotation_count at $(date)" - check_count=$((check_count + 1)) - echo "[*] Tor health check #$check_count at $(date)" - - # Check Tor service - if ! service tor status > /dev/null 2>&1; then - echo "[!] $(date): Tor service not running, restarting..." - service tor restart - sleep 10 - continue - fi - - # Test DNS + # Test DNS resolution through Tor if ! timeout 10 nslookup google.com 127.0.0.1 > /dev/null 2>&1; then - echo "[!] $(date): DNS resolution failed, reloading Tor..." - service tor reload - sleep 5 - if timeout 10 nslookup google.com 127.0.0.1 > /dev/null 2>&1; then - echo "[✓] $(date): DNS resolution restored" - else - echo "[✗] $(date): DNS still failing after reload, restarting Tor..." - service tor restart - sleep 10 - continue - fi - fi - - # Test TCP connectivity - if ! timeout 15 curl -s --max-time 10 https://check.torproject.org/api/ip > /dev/null 2>&1; then - echo "[!] $(date): TCP connectivity test failed, rotating circuits..." + echo "[!] $(date): DNS resolution slow/failing, rotating circuits..." pkill -HUP tor || true - sleep 5 - if ! timeout 15 curl -s --max-time 10 https://check.torproject.org/api/ip > /dev/null 2>&1; then - echo "[✗] $(date): TCP still failing after rotation, restarting Tor..." - service tor restart - sleep 10 - continue - else - echo "[✓] $(date): TCP connectivity restored after circuit rotation" - fi - else - echo "[✓] $(date): Health check passed (DNS + TCP OK)" + sleep 10 fi - # Rotate circuits - echo "[*] $(date): Rotating Tor circuits..." + # Proactively rotate circuits every 5 minutes to keep them fresh + echo "[*] $(date): Proactive circuit rotation..." pkill -HUP tor || true + + # Verify Tor is still responsive after rotation + sleep 5 + if timeout 10 curl -s --max-time 8 https://check.torproject.org/api/ip > /dev/null 2>&1; then + echo "[✓] $(date): Circuit rotation successful, Tor responsive" + else + echo "[!] $(date): Tor unresponsive after rotation - supervisor healthcheck will handle recovery" + fi + + sleep 300 done ) >> $LOG_FILE 2>&1 & -TOR_MONITOR_PID=$! -echo "[✓] Tor health check monitor started in background (PID: $TOR_MONITOR_PID)" +ROTATION_PID=$! +echo "[✓] Tor circuit rotation monitor started in background (PID: $ROTATION_PID)" # Run the entrypoint script echo "[*] End of tor script"