mirror of
https://github.com/calibrain/shelfmark.git
synced 2026-04-19 21:39:17 -04:00
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, Tag, NavigableString, ResultSet
|
||||
import downloader
|
||||
from logger import setup_logger
|
||||
from config import SUPPORTED_FORMATS, BOOK_LANGUAGE, AA_BASE_URL
|
||||
from env import AA_DONATOR_KEY, USE_CF_BYPASS, PRIORITIZE_WELIB
|
||||
from env import AA_DONATOR_KEY, USE_CF_BYPASS, PRIORITIZE_WELIB, ALLOW_USE_WELIB
|
||||
from models import BookInfo, SearchFilters
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
@@ -169,21 +169,6 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
|
||||
|
||||
data = soup.find_all("div", {"class": "main-inner"})[0].find_next("div")
|
||||
divs = list(data.children)
|
||||
_details = divs[13].text.strip().lower().split(" · ")
|
||||
format = ""
|
||||
size = ""
|
||||
for f in _details:
|
||||
if format == "" and f.strip().lower() in SUPPORTED_FORMATS:
|
||||
format = f.strip().lower()
|
||||
if size == "" and any(u in f.strip().lower() for u in ["mb", "kb", "gb"]):
|
||||
size = f.strip().lower()
|
||||
|
||||
if format == "" or size == "":
|
||||
for f in _details:
|
||||
if f == "" and not " " in f.strip().lower():
|
||||
format = f.strip().lower()
|
||||
if size == "" and "." in f.strip().lower():
|
||||
size = f.strip().lower()
|
||||
|
||||
every_url = soup.find_all("a")
|
||||
slow_urls_no_waitlist = set()
|
||||
@@ -237,20 +222,49 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
|
||||
# Remove empty urls
|
||||
urls = [url for url in urls if url != ""]
|
||||
|
||||
# Filter out divs that are not text
|
||||
original_divs = divs
|
||||
divs = [div.text.strip() for div in divs if div.text.strip() != ""]
|
||||
|
||||
separator_index = 6
|
||||
for i, div in enumerate(divs):
|
||||
if "·" in div.strip():
|
||||
separator_index = i
|
||||
break
|
||||
|
||||
_details = divs[separator_index].lower().split(" · ")
|
||||
format = ""
|
||||
size = ""
|
||||
for f in _details:
|
||||
if format == "" and f.strip().lower() in SUPPORTED_FORMATS:
|
||||
format = f.strip().lower()
|
||||
if size == "" and any(u in f.strip().lower() for u in ["mb", "kb", "gb"]):
|
||||
size = f.strip().lower()
|
||||
|
||||
if format == "" or size == "":
|
||||
for f in _details:
|
||||
if f == "" and not " " in f.strip().lower():
|
||||
format = f.strip().lower()
|
||||
if size == "" and "." in f.strip().lower():
|
||||
size = f.strip().lower()
|
||||
|
||||
|
||||
book_title = divs[separator_index-3].strip("🔍")
|
||||
|
||||
# Extract basic information
|
||||
book_info = BookInfo(
|
||||
id=book_id,
|
||||
preview=preview,
|
||||
title=divs[7].text.strip(),
|
||||
publisher=divs[11].text.strip(),
|
||||
author=divs[9].text.strip(),
|
||||
title=book_title,
|
||||
publisher=divs[separator_index-1],
|
||||
author=divs[separator_index-2],
|
||||
format=format,
|
||||
size=size,
|
||||
download_urls=urls,
|
||||
)
|
||||
|
||||
# Extract additional metadata
|
||||
info = _extract_book_metadata(divs[-6])
|
||||
info = _extract_book_metadata(original_divs[-6])
|
||||
book_info.info = info
|
||||
|
||||
# Set language and year from metadata if available
|
||||
@@ -262,6 +276,8 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
|
||||
return book_info
|
||||
|
||||
def _get_download_urls_from_welib(book_id: str) -> set[str]:
|
||||
if ALLOW_USE_WELIB == False:
|
||||
return set()
|
||||
"""Get download urls from welib.org."""
|
||||
url = f"https://welib.org/md5/{book_id}"
|
||||
logger.info(f"Getting download urls from welib.org for {book_id}. While this uses the bypasser, it will not start downloading them yet.")
|
||||
|
||||
1
env.py
1
env.py
@@ -28,6 +28,7 @@ FLASK_PORT = int(os.getenv("FLASK_PORT", "8084"))
|
||||
DEBUG = string_to_bool(os.getenv("DEBUG", "false"))
|
||||
APP_ENV = os.getenv("APP_ENV", "N/A").lower()
|
||||
PRIORITIZE_WELIB = string_to_bool(os.getenv("PRIORITIZE_WELIB", "false"))
|
||||
ALLOW_USE_WELIB = string_to_bool(os.getenv("ALLOW_USE_WELIB", "true"))
|
||||
|
||||
# Version information from Docker build
|
||||
BUILD_VERSION = os.getenv("BUILD_VERSION", "N/A")
|
||||
|
||||
@@ -83,6 +83,7 @@ Note that if using TOR, the TZ will be calculated automatically based on IP.
|
||||
| `AA_DONATOR_KEY` | Optional Donator key for Anna's Archive fast download API | `` |
|
||||
| `USE_BOOK_TITLE` | Use book title as filename instead of ID | `false` |
|
||||
| `PRIORITIZE_WELIB` | When downloading, download from WELIB first instead of AA | `false` |
|
||||
| `ALLOW_USE_WELIB` | Allow usage of welib for downloading books if found there | `true` |
|
||||
|
||||
If you change `BOOK_LANGUAGE`, you can add multiple comma separated languages, such as `en,fr,ru` etc.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user