mirror of
https://github.com/calibrain/shelfmark.git
synced 2026-04-20 05:51:21 -04:00
## Changelog ### 🌐 Network Resilience - **Auto DNS rotation**: New `CUSTOM_DNS=auto` mode (now default) starts with system DNS and automatically rotates through Cloudflare, Google, Quad9, and OpenDNS when failures are detected. DNS results are cached to improve performance. - **Mirror failover**: Anna's Archive requests automatically fail over between mirrors (.org, .se, .li) when one is unreachable - **Round-robin source distribution**: Concurrent downloads are distributed across different AA partner servers to avoid rate limiting ### 📥 Download Reliability - **Much more reliable downloads**: Improved parsing of Anna's Archive pages, smarter source prioritization, and better retry logic with exponential backoff - **Download resume support**: Interrupted downloads can now resume from where they left off (if the server supports Range requests) - **Cookie sharing**: Cloudflare bypass cookies are extracted and shared with subsequent requests, often avoiding the need for re-bypass entirely - **Stall detection**: Downloads with no progress for 5 minutes are automatically cancelled and retried - **Staggered concurrent downloads**: Small delays between starting concurrent downloads to avoid hitting rate limits - **Source failure tracking**: After multiple failures from the same source type (e.g., Libgen), that source is temporarily skipped - **Lazy welib loading**: Welib sources are fetched as a fallback only when primary sources fail (unless `PRIORITIZE_WELIB` is enabled) ### 🛡️ Cloudflare & Protection Bypass - **DDOS-Guard support**: Internal bypasser now detects and handles DDOS-Guard challenges with dedicated bypass strategies - **Cancellation support**: Bypass operations can now be cancelled mid-operation when user cancels a download - **Smart warmup**: Chrome driver is pre-warmed when first client connects (controlled by `BYPASS_WARMUP_ON_CONNECT` env var) and shuts down after periods of inactivity ### 🔌 External Bypasser (FlareSolverr) - **Improved resilience**: Retry with exponential backoff, mirror/DNS rotation on failure, and proper timeout handling - **Cancellation support**: External bypasser operations respect cancellation flags ### 🖥️ Web UI Improvements - **Simplified download status**: Removed intermediate states (bypassing, verifying, ingesting) — now just shows Queued → Resolving → Downloading → Complete - **Status messages**: Downloads show detailed status like "Trying Anna's Archive (Server 3)" or "Server busy, trying next...", or live waitlist countdowns. - **Improved download sidebar**: - Downloads sorted by add time (newest first) - X button moved to top-right corner for better UX - Wave animation on in-progress items - Error messages shown directly on failed items - X button on completed/errored items clears them from the list ### ⚙️ Configuration Changes - **`CUSTOM_DNS=auto`** is now the default (previously empty/system DNS) - **`DOWNLOAD_PROGRESS_UPDATE_INTERVAL`** default changed from 5s to 1s for smoother progress - **`BYPASS_WARMUP_ON_CONNECT`** (default: true) — warm up Chrome when first client connects ### 🐛 Bug Fixes - **Download cancellation actually works**: Fixed issue where cancelling downloads didn't properly stop in-progress operations - **WELIB prioritization**: Fixed `PRIORITIZE_WELIB` not being respected - **File exists handling**: Downloads to same filename now get `_1`, `_2` suffix instead of overwriting - **Empty search results**: "No books found" now returns empty list instead of throwing exception - **Search unavailable error**: Network/mirror failures during search now return proper 503 error to client
432 lines
17 KiB
Python
432 lines
17 KiB
Python
"""Data structures and models used across the application."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional, Tuple
|
|
from enum import Enum
|
|
from datetime import datetime, timedelta
|
|
from threading import Lock, Event
|
|
from pathlib import Path
|
|
import queue
|
|
import re
|
|
import time
|
|
from env import INGEST_DIR, STATUS_TIMEOUT
|
|
|
|
class QueueStatus(str, Enum):
|
|
"""Enum for possible book queue statuses."""
|
|
QUEUED = "queued"
|
|
RESOLVING = "resolving"
|
|
DOWNLOADING = "downloading"
|
|
COMPLETE = "complete"
|
|
AVAILABLE = "available"
|
|
ERROR = "error"
|
|
DONE = "done"
|
|
CANCELLED = "cancelled"
|
|
|
|
@dataclass
|
|
class QueueItem:
|
|
"""Queue item with priority and metadata."""
|
|
book_id: str
|
|
priority: int
|
|
added_time: float
|
|
|
|
def __lt__(self, other):
|
|
"""Compare items for priority queue (lower priority number = higher precedence)."""
|
|
if self.priority != other.priority:
|
|
return self.priority < other.priority
|
|
return self.added_time < other.added_time
|
|
|
|
@dataclass
|
|
class BookInfo:
|
|
"""Data class representing book information."""
|
|
id: str
|
|
title: str
|
|
preview: Optional[str] = None
|
|
author: Optional[str] = None
|
|
publisher: Optional[str] = None
|
|
year: Optional[str] = None
|
|
language: Optional[str] = None
|
|
content: Optional[str] = None
|
|
format: Optional[str] = None
|
|
size: Optional[str] = None
|
|
info: Optional[Dict[str, List[str]]] = None
|
|
description: Optional[str] = None
|
|
download_urls: List[str] = field(default_factory=list)
|
|
download_path: Optional[str] = None
|
|
priority: int = 0
|
|
progress: Optional[float] = None
|
|
status_message: Optional[str] = None # Detailed status message for UI display
|
|
added_time: Optional[float] = None # Timestamp when added to queue
|
|
|
|
def get_filename(self, fallback_url: Optional[str] = None) -> str:
|
|
"""Build sanitized filename: 'Author - Title (Year).format'
|
|
|
|
Resolves format from self.format, download_urls, or fallback_url.
|
|
|
|
Args:
|
|
fallback_url: URL to extract format from if not already known
|
|
|
|
Returns:
|
|
Sanitized filename safe for filesystem use
|
|
"""
|
|
# Resolve format if needed
|
|
if not self.format:
|
|
for url in (self.download_urls[0] if self.download_urls else None, fallback_url):
|
|
if url:
|
|
ext = url.split(".")[-1].lower()
|
|
if ext and len(ext) <= 5 and ext.isalnum():
|
|
self.format = ext
|
|
break
|
|
|
|
# Build filename
|
|
parts = []
|
|
if self.author:
|
|
parts.append(self.author)
|
|
parts.append(" - ")
|
|
parts.append(self.title)
|
|
if self.year:
|
|
parts.append(f" ({self.year})")
|
|
|
|
filename = "".join(parts)
|
|
filename = re.sub(r'[\\/:*?"<>|]', '_', filename.strip())[:245]
|
|
|
|
if self.format:
|
|
filename = f"{filename}.{self.format}"
|
|
|
|
return filename
|
|
|
|
class BookQueue:
|
|
"""Thread-safe book queue manager with priority support and cancellation."""
|
|
def __init__(self) -> None:
|
|
self._queue: queue.PriorityQueue[QueueItem] = queue.PriorityQueue()
|
|
self._lock = Lock()
|
|
self._status: dict[str, QueueStatus] = {}
|
|
self._book_data: dict[str, BookInfo] = {}
|
|
self._status_timestamps: dict[str, datetime] = {} # Track when each status was last updated
|
|
self._status_timeout = timedelta(seconds=STATUS_TIMEOUT) # 1 hour timeout
|
|
self._cancel_flags: dict[str, Event] = {} # Cancellation flags for active downloads
|
|
self._active_downloads: dict[str, bool] = {} # Track currently downloading books
|
|
|
|
def add(self, book_id: str, book_data: BookInfo, priority: int = 0) -> None:
|
|
"""Add a book to the queue with specified priority.
|
|
|
|
Args:
|
|
book_id: Unique identifier for the book
|
|
book_data: Book information
|
|
priority: Priority level (lower number = higher priority)
|
|
"""
|
|
with self._lock:
|
|
# Don't add if already exists and not in error/done state
|
|
if book_id in self._status and self._status[book_id] not in [QueueStatus.ERROR, QueueStatus.DONE, QueueStatus.CANCELLED]:
|
|
return
|
|
|
|
added_time = time.time()
|
|
book_data.priority = priority
|
|
book_data.added_time = added_time
|
|
queue_item = QueueItem(book_id, priority, added_time)
|
|
self._queue.put(queue_item)
|
|
self._book_data[book_id] = book_data
|
|
self._update_status(book_id, QueueStatus.QUEUED)
|
|
|
|
def get_next(self) -> Optional[Tuple[str, Event]]:
|
|
"""Get next book ID from queue with cancellation flag.
|
|
|
|
Returns:
|
|
Tuple of (book_id, cancel_flag) or None if queue is empty
|
|
"""
|
|
# Use iterative approach to avoid stack overflow if many items are cancelled
|
|
while True:
|
|
try:
|
|
queue_item = self._queue.get_nowait()
|
|
book_id = queue_item.book_id
|
|
|
|
with self._lock:
|
|
# Check if book was cancelled while in queue
|
|
if book_id in self._status and self._status[book_id] == QueueStatus.CANCELLED:
|
|
continue # Skip cancelled items, try next
|
|
|
|
# Create cancellation flag for this download
|
|
cancel_flag = Event()
|
|
self._cancel_flags[book_id] = cancel_flag
|
|
self._active_downloads[book_id] = True
|
|
|
|
return book_id, cancel_flag
|
|
except queue.Empty:
|
|
return None
|
|
|
|
def _update_status(self, book_id: str, status: QueueStatus) -> None:
|
|
"""Internal method to update status and timestamp."""
|
|
self._status[book_id] = status
|
|
self._status_timestamps[book_id] = datetime.now()
|
|
|
|
def update_status(self, book_id: str, status: QueueStatus) -> None:
|
|
"""Update status of a book in the queue."""
|
|
with self._lock:
|
|
self._update_status(book_id, status)
|
|
|
|
# Clean up active download tracking when finished
|
|
if status in [QueueStatus.COMPLETE, QueueStatus.AVAILABLE, QueueStatus.ERROR, QueueStatus.DONE, QueueStatus.CANCELLED]:
|
|
self._active_downloads.pop(book_id, None)
|
|
self._cancel_flags.pop(book_id, None)
|
|
|
|
def update_download_path(self, book_id: str, download_path: str) -> None:
|
|
"""Update the download path of a book in the queue."""
|
|
with self._lock:
|
|
if book_id in self._book_data:
|
|
self._book_data[book_id].download_path = download_path
|
|
|
|
def update_progress(self, book_id: str, progress: float) -> None:
|
|
"""Update download progress for a book."""
|
|
with self._lock:
|
|
if book_id in self._book_data:
|
|
self._book_data[book_id].progress = progress
|
|
|
|
def update_status_message(self, book_id: str, message: str) -> None:
|
|
"""Update detailed status message for a book."""
|
|
with self._lock:
|
|
if book_id in self._book_data:
|
|
self._book_data[book_id].status_message = message
|
|
|
|
def get_status(self) -> Dict[QueueStatus, Dict[str, BookInfo]]:
|
|
"""Get current queue status."""
|
|
self.refresh()
|
|
with self._lock:
|
|
result: Dict[QueueStatus, Dict[str, BookInfo]] = {status: {} for status in QueueStatus}
|
|
for book_id, status in self._status.items():
|
|
if book_id in self._book_data:
|
|
result[status][book_id] = self._book_data[book_id]
|
|
return result
|
|
|
|
def get_queue_order(self) -> List[Dict[str, any]]:
|
|
"""Get current queue order for display."""
|
|
with self._lock:
|
|
queue_items = []
|
|
|
|
# Get items from priority queue without removing them
|
|
temp_items = []
|
|
while not self._queue.empty():
|
|
try:
|
|
item = self._queue.get_nowait()
|
|
temp_items.append(item)
|
|
if item.book_id in self._book_data:
|
|
book_info = self._book_data[item.book_id]
|
|
queue_items.append({
|
|
'id': item.book_id,
|
|
'title': book_info.title,
|
|
'author': book_info.author,
|
|
'priority': item.priority,
|
|
'added_time': item.added_time,
|
|
'status': self._status.get(item.book_id, QueueStatus.QUEUED)
|
|
})
|
|
except queue.Empty:
|
|
break
|
|
|
|
# Put items back in queue
|
|
for item in temp_items:
|
|
self._queue.put(item)
|
|
|
|
return sorted(queue_items, key=lambda x: (x['priority'], x['added_time']))
|
|
|
|
def cancel_download(self, book_id: str) -> bool:
|
|
"""Cancel a download or clear a completed/errored item.
|
|
|
|
Args:
|
|
book_id: Book identifier to cancel or clear
|
|
|
|
Returns:
|
|
bool: True if cancellation/clearing was successful
|
|
"""
|
|
with self._lock:
|
|
current_status = self._status.get(book_id)
|
|
|
|
# Allow cancellation during any active state
|
|
if current_status in [QueueStatus.RESOLVING, QueueStatus.DOWNLOADING]:
|
|
# Signal active download to stop
|
|
if book_id in self._cancel_flags:
|
|
self._cancel_flags[book_id].set()
|
|
self._update_status(book_id, QueueStatus.CANCELLED)
|
|
return True
|
|
elif current_status == QueueStatus.QUEUED:
|
|
# Remove from queue and mark as cancelled
|
|
self._update_status(book_id, QueueStatus.CANCELLED)
|
|
return True
|
|
elif current_status in [QueueStatus.COMPLETE, QueueStatus.DONE, QueueStatus.AVAILABLE, QueueStatus.ERROR, QueueStatus.CANCELLED]:
|
|
# Clear completed/errored/cancelled items from tracking
|
|
self._status.pop(book_id, None)
|
|
self._status_timestamps.pop(book_id, None)
|
|
self._book_data.pop(book_id, None)
|
|
self._cancel_flags.pop(book_id, None)
|
|
self._active_downloads.pop(book_id, None)
|
|
return True
|
|
|
|
return False
|
|
|
|
def set_priority(self, book_id: str, new_priority: int) -> bool:
|
|
"""Change the priority of a queued book.
|
|
|
|
Args:
|
|
book_id: Book identifier
|
|
new_priority: New priority level (lower = higher priority)
|
|
|
|
Returns:
|
|
bool: True if priority was successfully changed
|
|
"""
|
|
with self._lock:
|
|
if book_id not in self._status or self._status[book_id] != QueueStatus.QUEUED:
|
|
return False
|
|
|
|
# Remove book from queue and re-add with new priority
|
|
temp_items = []
|
|
found = False
|
|
|
|
while not self._queue.empty():
|
|
try:
|
|
item = self._queue.get_nowait()
|
|
if item.book_id == book_id:
|
|
# Create new item with updated priority
|
|
new_item = QueueItem(book_id, new_priority, item.added_time)
|
|
temp_items.append(new_item)
|
|
found = True
|
|
# Update book data priority
|
|
if book_id in self._book_data:
|
|
self._book_data[book_id].priority = new_priority
|
|
else:
|
|
temp_items.append(item)
|
|
except queue.Empty:
|
|
break
|
|
|
|
# Put all items back
|
|
for item in temp_items:
|
|
self._queue.put(item)
|
|
|
|
return found
|
|
|
|
def reorder_queue(self, book_priorities: Dict[str, int]) -> bool:
|
|
"""Bulk reorder queue by setting new priorities.
|
|
|
|
Args:
|
|
book_priorities: Dict mapping book_id to new priority
|
|
|
|
Returns:
|
|
bool: True if reordering was successful
|
|
"""
|
|
with self._lock:
|
|
# Extract all items from queue
|
|
all_items = []
|
|
while not self._queue.empty():
|
|
try:
|
|
item = self._queue.get_nowait()
|
|
# Update priority if specified
|
|
if item.book_id in book_priorities:
|
|
new_priority = book_priorities[item.book_id]
|
|
item = QueueItem(item.book_id, new_priority, item.added_time)
|
|
# Update book data priority
|
|
if item.book_id in self._book_data:
|
|
self._book_data[item.book_id].priority = new_priority
|
|
all_items.append(item)
|
|
except queue.Empty:
|
|
break
|
|
|
|
# Put all items back with updated priorities
|
|
for item in all_items:
|
|
self._queue.put(item)
|
|
|
|
return True
|
|
|
|
def get_active_downloads(self) -> List[str]:
|
|
"""Get list of currently active download book IDs."""
|
|
with self._lock:
|
|
return list(self._active_downloads.keys())
|
|
|
|
def has_pending_work(self) -> bool:
|
|
"""Check if there are any active downloads or queued items.
|
|
|
|
This is useful for determining if the bypasser should stay active
|
|
even when the UI is closed.
|
|
|
|
Returns:
|
|
bool: True if there are active downloads or queued items
|
|
"""
|
|
with self._lock:
|
|
# Check for active downloads
|
|
if self._active_downloads:
|
|
return True
|
|
|
|
# Check for queued items (excluding cancelled ones)
|
|
for book_id, status in self._status.items():
|
|
if status == QueueStatus.QUEUED:
|
|
return True
|
|
|
|
return False
|
|
|
|
def clear_completed(self) -> int:
|
|
"""Remove all completed, errored, or cancelled books from tracking.
|
|
|
|
Returns:
|
|
int: Number of books removed
|
|
"""
|
|
with self._lock:
|
|
to_remove = []
|
|
for book_id, status in self._status.items():
|
|
if status in [QueueStatus.COMPLETE, QueueStatus.DONE, QueueStatus.AVAILABLE, QueueStatus.ERROR, QueueStatus.CANCELLED]:
|
|
to_remove.append(book_id)
|
|
|
|
removed_count = len(to_remove)
|
|
for book_id in to_remove:
|
|
self._status.pop(book_id, None)
|
|
self._status_timestamps.pop(book_id, None)
|
|
self._book_data.pop(book_id, None)
|
|
self._cancel_flags.pop(book_id, None)
|
|
self._active_downloads.pop(book_id, None)
|
|
|
|
return removed_count
|
|
|
|
def refresh(self) -> None:
|
|
"""Remove any books that are done downloading or have stale status."""
|
|
with self._lock:
|
|
current_time = datetime.now()
|
|
|
|
# Create a list of items to remove to avoid modifying dict during iteration
|
|
to_remove = []
|
|
|
|
for book_id, status in self._status.items():
|
|
path = self._book_data[book_id].download_path
|
|
if path and not Path(path).exists():
|
|
self._book_data[book_id].download_path = None
|
|
path = None
|
|
|
|
# Check for completed downloads
|
|
if status == QueueStatus.AVAILABLE:
|
|
if not path:
|
|
self._update_status(book_id, QueueStatus.DONE)
|
|
|
|
# Check for stale status entries
|
|
last_update = self._status_timestamps.get(book_id)
|
|
if last_update and (current_time - last_update) > self._status_timeout:
|
|
if status in [QueueStatus.COMPLETE, QueueStatus.DONE, QueueStatus.ERROR, QueueStatus.AVAILABLE, QueueStatus.CANCELLED]:
|
|
to_remove.append(book_id)
|
|
|
|
# Remove stale entries
|
|
for book_id in to_remove:
|
|
del self._status[book_id]
|
|
del self._status_timestamps[book_id]
|
|
if book_id in self._book_data:
|
|
del self._book_data[book_id]
|
|
|
|
def set_status_timeout(self, hours: int) -> None:
|
|
"""Set the status timeout duration in hours."""
|
|
with self._lock:
|
|
self._status_timeout = timedelta(hours=hours)
|
|
|
|
|
|
# Global instance of BookQueue
|
|
book_queue = BookQueue()
|
|
|
|
@dataclass
|
|
class SearchFilters:
|
|
isbn: Optional[List[str]] = None
|
|
author: Optional[List[str]] = None
|
|
title: Optional[List[str]] = None
|
|
lang: Optional[List[str]] = None
|
|
sort: Optional[str] = None
|
|
content: Optional[List[str]] = None
|
|
format: Optional[List[str]] = None |