mirror of
https://github.com/calibrain/shelfmark.git
synced 2026-05-19 11:34:53 -04:00
External CloudFlare resolver (#245)
Adding support for an external CloudFlare bypasser service and introducing a new Docker image build with a dedicated target. Key Changes - Added `cloudflare_bypasser_external.py` for external bypasser integration. - Updated Docker Compose files to support the new service. - Introduced a new Docker target for building a separate image for the external bypasser. - Refactored relevant modules to utilize the external bypasser when configured. - Documentation and configuration updates to reflect new options and Docker targets. Impact - Users can now choose between internal and external CloudFlare bypassing. - New Docker image and target streamline deployment of the external bypasser. - Improved modularity and maintainability. - No breaking changes for existing workflows. Testing - Manual and E2E tests performed for both bypasser modes. - Docker Compose setups and new image build verified for development and production. Notes Please review the new configuration options and Docker targets. Update your environment and deployment scripts as needed. Feedback and suggestions are welcome!
This commit is contained in:
committed by
GitHub
parent
c8f21b8f8d
commit
207cff96d3
@@ -26,6 +26,9 @@ jobs:
|
||||
- suffix: "-tor"
|
||||
target: cwa-bd-tor
|
||||
image_name_suffix: "-tor"
|
||||
- suffix: "-extbp"
|
||||
target: cwa-bd-extbp
|
||||
image_name_suffix: "-extbp"
|
||||
steps:
|
||||
- name: Get current date
|
||||
id: date
|
||||
|
||||
53
Dockerfile
53
Dockerfile
@@ -38,18 +38,10 @@ RUN apt-get update && \
|
||||
curl \
|
||||
# For entrypoint
|
||||
dumb-init \
|
||||
# For dumb display
|
||||
xvfb \
|
||||
# For screen recording
|
||||
ffmpeg \
|
||||
# For debug
|
||||
zip iputils-ping \
|
||||
# For user switching
|
||||
sudo \
|
||||
# --- Chromium Browser ---
|
||||
chromium-driver \
|
||||
# For tkinter (pyautogui)
|
||||
python3-tk && \
|
||||
sudo && \
|
||||
# Cleanup APT cache *after* all installs in this layer
|
||||
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false && \
|
||||
apt-get clean && \
|
||||
@@ -67,17 +59,12 @@ WORKDIR /app
|
||||
|
||||
# Install Python dependencies using pip
|
||||
# Upgrade pip first, then copy requirements and install
|
||||
# Copying requirements.txt separately leverages build cache
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt && \
|
||||
# Copying requirements-base.txt separately leverages build cache
|
||||
COPY requirements-base.txt .
|
||||
RUN pip install --no-cache-dir -r requirements-base.txt && \
|
||||
# Clean root's pip cache
|
||||
rm -rf /root/.cache
|
||||
|
||||
# Add this line to grant read/execute permissions to others
|
||||
RUN chmod -R o+rx /usr/bin/chromium && \
|
||||
chmod -R o+rx /usr/bin/chromedriver && \
|
||||
chmod -R o+w /usr/local/lib/python3.10/site-packages/seleniumbase/drivers/
|
||||
|
||||
# Copy application code *after* dependencies are installed
|
||||
COPY . .
|
||||
|
||||
@@ -101,10 +88,34 @@ ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
|
||||
FROM base AS cwa-bd
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
# For dumb display
|
||||
xvfb \
|
||||
# For screen recording
|
||||
ffmpeg \
|
||||
# --- Chromium ---
|
||||
chromium \
|
||||
# --- ChromeDriver ---
|
||||
chromium-driver \
|
||||
# For tkinter (pyautogui)
|
||||
python3-tk
|
||||
|
||||
# install additional dependencies
|
||||
COPY requirements-cwa-bd.txt .
|
||||
RUN pip install --no-cache-dir -r requirements-cwa-bd.txt && \
|
||||
# Clean root's pip cache
|
||||
rm -rf /root/.cache
|
||||
|
||||
# Add this line to grant read/execute permissions to others
|
||||
RUN chmod -R o+rx /usr/bin/chromium && \
|
||||
chmod -R o+rx /usr/bin/chromedriver && \
|
||||
chmod -R o+w /usr/local/lib/python3.10/site-packages/seleniumbase/drivers/
|
||||
|
||||
# Default command to run the application entrypoint script
|
||||
CMD ["/app/entrypoint.sh"]
|
||||
|
||||
FROM base AS cwa-bd-tor
|
||||
FROM cwa-bd AS cwa-bd-tor
|
||||
|
||||
ENV USING_TOR=true
|
||||
|
||||
@@ -124,3 +135,9 @@ RUN apt-get update && \
|
||||
|
||||
# Override the default command to run Tor
|
||||
CMD ["/app/entrypoint.sh"]
|
||||
|
||||
FROM base AS cwa-bd-extbp
|
||||
|
||||
ENV USING_EXTERNAL_BYPASSER=true
|
||||
|
||||
CMD ["/app/entrypoint.sh"]
|
||||
|
||||
7
app.py
7
app.py
@@ -13,7 +13,7 @@ import typing
|
||||
|
||||
from logger import setup_logger
|
||||
from config import _SUPPORTED_BOOK_LANGUAGE, BOOK_LANGUAGE
|
||||
from env import FLASK_HOST, FLASK_PORT, APP_ENV, CWA_DB_PATH, DEBUG
|
||||
from env import FLASK_HOST, FLASK_PORT, APP_ENV, CWA_DB_PATH, DEBUG, USING_EXTERNAL_BYPASSER
|
||||
import backend
|
||||
|
||||
from models import SearchFilters
|
||||
@@ -117,7 +117,10 @@ from typing import Union, Tuple
|
||||
if DEBUG:
|
||||
import subprocess
|
||||
import time
|
||||
from cloudflare_bypasser import _reset_driver as STOP_GUI
|
||||
if USING_EXTERNAL_BYPASSER:
|
||||
STOP_GUI = lambda: None # No-op for external bypasser
|
||||
else:
|
||||
from cloudflare_bypasser import _reset_driver as STOP_GUI
|
||||
@app.route('/debug', methods=['GET'])
|
||||
@login_required
|
||||
def debug() -> Union[Response, Tuple[Response, int]]:
|
||||
|
||||
@@ -8,6 +8,8 @@ from env import LOG_DIR, DEBUG
|
||||
import signal
|
||||
from datetime import datetime
|
||||
import subprocess
|
||||
import requests
|
||||
from typing import Optional
|
||||
|
||||
# --- SeleniumBase Import ---
|
||||
from seleniumbase import Driver
|
||||
@@ -475,3 +477,20 @@ def wait_for_result(func, timeout : int = 10, condition : any = True):
|
||||
time.sleep(0.5)
|
||||
return None
|
||||
_init_cleanup_thread()
|
||||
|
||||
|
||||
def get_bypassed_page(url: str) -> Optional[str]:
|
||||
"""Fetch HTML content from a URL using the internal Cloudflare Bypasser.
|
||||
|
||||
Args:
|
||||
url: Target URL
|
||||
Returns:
|
||||
str: HTML content if successful, None otherwise
|
||||
"""
|
||||
|
||||
response_html = get(url)
|
||||
logger.debug(f"Cloudflare Bypasser response length: {len(response_html)}")
|
||||
if response_html.strip() != "":
|
||||
return response_html
|
||||
else:
|
||||
raise requests.exceptions.RequestException("Failed to bypass Cloudflare")
|
||||
|
||||
34
cloudflare_bypasser_external.py
Normal file
34
cloudflare_bypasser_external.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from logger import setup_logger
|
||||
from typing import Optional
|
||||
import requests
|
||||
|
||||
try:
|
||||
from env import EXT_BYPASSER_PATH, EXT_BYPASSER_TIMEOUT, EXT_BYPASSER_URL
|
||||
except ImportError:
|
||||
raise RuntimeError("Failed to import environment variables. Are you using an `extbp` image?")
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
def get_bypassed_page(url: str) -> Optional[str]:
|
||||
"""Fetch HTML content from a URL using an External Cloudflare Resolver.
|
||||
|
||||
Args:
|
||||
url: Target URL
|
||||
Returns:
|
||||
str: HTML content if successful, None otherwise
|
||||
"""
|
||||
if not EXT_BYPASSER_URL or not EXT_BYPASSER_PATH:
|
||||
logger.error("Wrong External Bypass configuration. Please check your environment configuration.")
|
||||
return None
|
||||
ext_url = f"{EXT_BYPASSER_URL}{EXT_BYPASSER_PATH}"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
data = {
|
||||
"cmd": "request.get",
|
||||
"url": url,
|
||||
"maxTimeout": EXT_BYPASSER_TIMEOUT
|
||||
}
|
||||
response = requests.post(ext_url, headers=headers, json=data)
|
||||
response.raise_for_status()
|
||||
logger.debug(f"External Bypass response for '{url}': {response.json()['status']} - {response.json()['message']}")
|
||||
return response.json()['solution']['response']
|
||||
10
config.py
10
config.py
@@ -93,7 +93,9 @@ if CUSTOM_SCRIPT:
|
||||
CUSTOM_SCRIPT = ""
|
||||
|
||||
# Debugging settings
|
||||
VIRTUAL_SCREEN_SIZE = (1024, 768)
|
||||
RECORDING_DIR = env.LOG_DIR / "recording"
|
||||
if env.DEBUG:
|
||||
RECORDING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if not env.USING_EXTERNAL_BYPASSER:
|
||||
# Virtual display settings for debugging internal cloudflare bypasser
|
||||
VIRTUAL_SCREEN_SIZE = (1024, 768)
|
||||
RECORDING_DIR = env.LOG_DIR / "recording"
|
||||
if env.DEBUG:
|
||||
RECORDING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
35
docker-compose.extbp.dev.yml
Normal file
35
docker-compose.extbp.dev.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
services:
|
||||
calibre-web-automated-book-downloader-extbp-dev:
|
||||
container_name: cwa-bd-extbp-dev
|
||||
extends:
|
||||
file: ./docker-compose.extbp.yml
|
||||
service: calibre-web-automated-book-downloader-extbp
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: cwa-bd-extbp
|
||||
environment:
|
||||
DEBUG: true
|
||||
APP_ENV: dev
|
||||
USE_DOH: true
|
||||
CUSTOM_DNS: cloudflare
|
||||
USE_CF_BYPASS: true # Enable Cloudflare bypass (default: true)
|
||||
# External Cloudflare Bypass environment variables
|
||||
EXT_BYPASSER_URL: "http://flaresolverr:8191" # URL of the external Cloudflare resolver service (used FlareSolverr)
|
||||
EXT_BYPASSER_PATH: "/v1" # Path for external Cloudflare resolver API (default: /v1)
|
||||
EXT_BYPASSER_TIMEOUT: 60000 # Timeout for external Cloudflare resolver requests (default: 60000)
|
||||
volumes:
|
||||
#- /tmp/cwa-book-downloader:/tmp/cwa-book-downloader
|
||||
#- /tmp/cwa-book-downloader-log:/var/log/cwa-book-downloader
|
||||
- ./deploy/ingest:/cwa-book-ingest
|
||||
- ./deploy/log:/var/log/cwa-book-downloader
|
||||
- ./deploy/tmp:/tmp/cwa-book-downloader
|
||||
|
||||
flaresolverr: # External Cloudflare resolver service
|
||||
image: ghcr.io/flaresolverr/flaresolverr:v3.3.22
|
||||
container_name: flaresolverr
|
||||
environment:
|
||||
LOG_LEVEL: info
|
||||
LOG_HTML: false
|
||||
CAPTCHA_SOLVER: none
|
||||
TZ: Europe/Rome
|
||||
22
docker-compose.extbp.yml
Normal file
22
docker-compose.extbp.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
services:
|
||||
calibre-web-automated-book-downloader-extbp:
|
||||
image: ghcr.io/calibrain/calibre-web-automated-book-downloader-extbp:latest
|
||||
environment:
|
||||
FLASK_PORT: 8084
|
||||
LOG_LEVEL: info
|
||||
BOOK_LANGUAGE: en
|
||||
USE_BOOK_TITLE: true
|
||||
TZ: America/New_York
|
||||
APP_ENV: prod
|
||||
UID: 1000
|
||||
GID: 100
|
||||
ports:
|
||||
- 8084:8084
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
# This is where the books will be downloaded to, usually it would be
|
||||
# the same as whatever you gave in "calibre-web-automated"
|
||||
- /tmp/data/calibre-web/ingest:/cwa-book-ingest
|
||||
# This is the location of CWA's app.db, which contains authentication
|
||||
# details
|
||||
#- /cwa/config/path/app.db:/auth/app.db:ro
|
||||
@@ -12,9 +12,12 @@ from typing import Callable
|
||||
from threading import Event
|
||||
from logger import setup_logger
|
||||
from config import PROXIES
|
||||
from env import MAX_RETRY, DEFAULT_SLEEP, USE_CF_BYPASS
|
||||
from env import MAX_RETRY, DEFAULT_SLEEP, USE_CF_BYPASS, USING_EXTERNAL_BYPASSER
|
||||
if USE_CF_BYPASS:
|
||||
import cloudflare_bypasser
|
||||
if USING_EXTERNAL_BYPASSER:
|
||||
from cloudflare_bypasser_external import get_bypassed_page
|
||||
else:
|
||||
from cloudflare_bypasser import get_bypassed_page
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
@@ -35,12 +38,7 @@ def html_get_page(url: str, retry: int = MAX_RETRY, use_bypasser: bool = False)
|
||||
logger.debug(f"html_get_page: {url}, retry: {retry}, use_bypasser: {use_bypasser}")
|
||||
if use_bypasser and USE_CF_BYPASS:
|
||||
logger.info(f"GET Using Cloudflare Bypasser for: {url}")
|
||||
response_html = cloudflare_bypasser.get(url)
|
||||
logger.debug(f"Cloudflare Bypasser response length: {len(response_html)}")
|
||||
if response_html.strip() != "":
|
||||
return response_html
|
||||
else:
|
||||
raise requests.exceptions.RequestException("Failed to bypass Cloudflare")
|
||||
return get_bypassed_page(url)
|
||||
else:
|
||||
logger.info(f"GET: {url}")
|
||||
response = requests.get(url, proxies=PROXIES)
|
||||
|
||||
@@ -112,8 +112,8 @@ else
|
||||
command="python3 app.py"
|
||||
fi
|
||||
|
||||
# IF DEBUG
|
||||
if [ "$DEBUG" = "true" ]; then
|
||||
# If DEBUG and not using an external bypass
|
||||
if [ "$DEBUG" = "true" ] && [ "$USING_EXTERNAL_BYPASSER" != "true" ]; then
|
||||
set +e
|
||||
set -x
|
||||
echo "vvvvvvvvvvvv DEBUG MODE vvvvvvvvvvvv"
|
||||
|
||||
6
env.py
6
env.py
@@ -45,6 +45,12 @@ APP_ENV = os.getenv("APP_ENV", "prod").lower()
|
||||
# Logging settings
|
||||
LOG_FILE = LOG_DIR / "cwa-book-downloader.log"
|
||||
|
||||
USING_EXTERNAL_BYPASSER = string_to_bool(os.getenv("USING_EXTERNAL_BYPASSER", "false"))
|
||||
if USING_EXTERNAL_BYPASSER:
|
||||
EXT_BYPASSER_URL = os.getenv("EXT_BYPASSER_URL").strip()
|
||||
EXT_BYPASSER_PATH = os.getenv("EXT_BYPASSER_PATH", "/v1").strip()
|
||||
EXT_BYPASSER_TIMEOUT = int(os.getenv("EXT_BYPASSER_TIMEOUT", "60000"))
|
||||
|
||||
USING_TOR = string_to_bool(os.getenv("USING_TOR", "false"))
|
||||
# If using Tor, we don't need to set custom DNS, use DOH, or proxy
|
||||
if USING_TOR:
|
||||
|
||||
46
readme.md
46
readme.md
@@ -175,7 +175,9 @@ volumes:
|
||||
|
||||
Mount should align with your Calibre-Web-Automated ingest folder.
|
||||
|
||||
## 🧅 Tor Variant
|
||||
## Variants:
|
||||
|
||||
### 🧅 Tor Variant
|
||||
|
||||
This application also offers a variant that routes all its traffic through the Tor network. This can be useful for enhanced privacy or bypassing network restrictions.
|
||||
|
||||
@@ -196,6 +198,48 @@ To use the Tor variant:
|
||||
* **Timezone:** When running in Tor mode, the container will attempt to determine the timezone based on the Tor exit node's IP address and set it automatically. This will override the `TZ` environment variable if it is set.
|
||||
* **Network Settings:** Custom DNS, DoH, and HTTP(S) proxy settings (`CUSTOM_DNS`, `USE_DOH`, `HTTP_PROXY`, `HTTPS_PROXY`) are ignored when using the Tor variant, as all traffic goes through Tor.
|
||||
|
||||
### External Cloudflare resolver variant
|
||||
|
||||
This variant allows the application to use an external service to bypass Cloudflare protection, instead of relying on the built-in bypasser. This is useful if you already have a dedicated Cloudflare resolver (such as [FlareSolverr](https://github.com/FlareSolverr/FlareSolverr) or compatible services like [ByParr](https://github.com/ThePhaseless/Byparr)) running elsewhere.
|
||||
|
||||
#### How it works:
|
||||
|
||||
- When enabled, all requests that require Cloudflare bypass are sent to your external resolver service.
|
||||
- The application communicates with the resolver using its API.
|
||||
- This approach can improve reliability and performance, especially if your external resolver is optimized or shared across multiple applications.
|
||||
|
||||
#### Configuration
|
||||
|
||||
| Variable | Description | Default Value |
|
||||
| ---------------------- | ----------------------------------------------------------- | ----------------------- |
|
||||
| `EXT_BYPASSER_URL` | The full URL of your external resolver (required) | |
|
||||
| `EXT_BYPASSER_PATH` | API path for the resolver (usually `/v1`) | `/v1` |
|
||||
| `EXT_BYPASSER_TIMEOUT` | Timeout for page loading (in milliseconds) | `60000` |
|
||||
|
||||
#### Important
|
||||
|
||||
This feature follows the same configuration of the built-in Cloudflare bypasser, so you should turn on the `USE_CF_BYPASS` configuration to enable it.
|
||||
|
||||
#### To use the External Cloudflare resolver variant:
|
||||
|
||||
1. Get the extbp-specific docker-compose file:
|
||||
```bash
|
||||
curl -O https://raw.githubusercontent.com/calibrain/calibre-web-automated-book-downloader/refs/heads/main/docker-compose.extbp.yml
|
||||
```
|
||||
2. Start the service using this file:
|
||||
```bash
|
||||
docker compose -f docker-compose.extbp.yml up -d
|
||||
```
|
||||
|
||||
#### Compatibility:
|
||||
This feature is designed to work with any resolver that implements the `FlareSolverr` API schema, including `ByParr` and similar projects.
|
||||
|
||||
#### Benefits:
|
||||
|
||||
- Centralizes Cloudflare bypass logic for easier maintenance.
|
||||
- Can leverage more powerful or distributed resolver infrastructure.
|
||||
- Reduces load on the main application container.
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
The application consists of a single service:
|
||||
|
||||
@@ -2,10 +2,7 @@ flask
|
||||
requests[socks]
|
||||
beautifulsoup4
|
||||
tqdm
|
||||
pyvirtualdisplay
|
||||
dnspython
|
||||
pyautogui
|
||||
seleniumbase>=4.41.1
|
||||
gunicorn
|
||||
python-xlib
|
||||
psutil
|
||||
3
requirements-cwa-bd.txt
Normal file
3
requirements-cwa-bd.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
pyvirtualdisplay
|
||||
pyautogui
|
||||
seleniumbase>=4.41.1
|
||||
Reference in New Issue
Block a user