From 7d4854a0a4f5665db599f18c34035786465639f3 Mon Sep 17 00:00:00 2001 From: Inverle Date: Tue, 4 Nov 2025 12:48:31 +0100 Subject: [PATCH] Create separate `Retry-After` files for proxies (#8029) * Create separate `Retry-After` files for proxies Bad proxies are able to send a false `Retry-After` header and affect the availability of feeds (domain-wide) for other users. This PR starts including the address of the proxy if present in filenames for `Retry-After` to mitigate the issue. * Reduce code changes * Sync SimplePie fork https://github.com/FreshRSS/simplepie/pull/62 --------- Co-authored-by: Alexandre Alapetite --- app/Models/Feed.php | 2 +- app/Models/SimplePieResponse.php | 5 ++-- app/Utils/httpUtil.php | 12 ++++---- lib/composer.json | 2 +- lib/lib_rss.php | 41 +++++++++++++++------------- lib/simplepie/simplepie/src/File.php | 9 +++--- 6 files changed, 38 insertions(+), 33 deletions(-) diff --git a/app/Models/Feed.php b/app/Models/Feed.php index fd9177d4e..2a1ec3f63 100644 --- a/app/Models/Feed.php +++ b/app/Models/Feed.php @@ -552,7 +552,7 @@ class FreshRSS_Feed extends Minz_Model { Minz_Exception::ERROR ); } else { - if (($retryAfter = FreshRSS_http_Util::getRetryAfter($this->url)) > 0) { + if (($retryAfter = FreshRSS_http_Util::getRetryAfter($this->url, $this->proxyParam())) > 0) { throw new FreshRSS_Feed_Exception('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . $this->url(includeCredentials: false), code: 503); } diff --git a/app/Models/SimplePieResponse.php b/app/Models/SimplePieResponse.php index 6e15da24a..27dc41b74 100644 --- a/app/Models/SimplePieResponse.php +++ b/app/Models/SimplePieResponse.php @@ -4,7 +4,7 @@ declare(strict_types=1); final class FreshRSS_SimplePieResponse extends \SimplePie\File { #[\Override] - protected function on_http_response($response): void { + protected function on_http_response($response, array $curl_options = []): void { syslog(LOG_INFO, 'FreshRSS SimplePie GET ' . $this->get_status_code() . ' ' . \SimplePie\Misc::url_remove_credentials($this->get_final_requested_uri())); if (in_array($this->get_status_code(), [429, 503], true)) { @@ -15,7 +15,8 @@ final class FreshRSS_SimplePieResponse extends \SimplePie\File $headers = []; } - $retryAfter = FreshRSS_http_Util::setRetryAfter($this->get_final_requested_uri(), $headers['retry-after'] ?? ''); + $proxy = is_string($curl_options[CURLOPT_PROXY] ?? null) ? $curl_options[CURLOPT_PROXY] : ''; + $retryAfter = FreshRSS_http_Util::setRetryAfter($this->get_final_requested_uri(), $proxy, $headers['retry-after'] ?? ''); if ($retryAfter > 0) { $domain = parse_url($this->get_final_requested_uri(), PHP_URL_HOST); if (is_string($domain) && $domain !== '') { diff --git a/app/Utils/httpUtil.php b/app/Utils/httpUtil.php index 2b57b410f..6e176a0d3 100644 --- a/app/Utils/httpUtil.php +++ b/app/Utils/httpUtil.php @@ -5,7 +5,7 @@ final class FreshRSS_http_Util { private const RETRY_AFTER_PATH = DATA_PATH . '/Retry-After/'; - private static function getRetryAfterFile(string $url): string { + private static function getRetryAfterFile(string $url, string $proxy): string { $domain = parse_url($url, PHP_URL_HOST); if (!is_string($domain) || $domain === '') { return ''; @@ -14,7 +14,7 @@ final class FreshRSS_http_Util { if (is_int($port)) { $domain .= ':' . $port; } - return self::RETRY_AFTER_PATH . urlencode($domain) . '.txt'; + return self::RETRY_AFTER_PATH . urlencode($domain) . (empty($proxy) ? '' : ('_' . urlencode($proxy))) . '.txt'; } /** @@ -39,11 +39,11 @@ final class FreshRSS_http_Util { * Check whether the URL needs to wait for a Retry-After period. * @return int The timestamp of when the Retry-After expires, or 0 if not set. */ - public static function getRetryAfter(string $url): int { + public static function getRetryAfter(string $url, string $proxy): int { if (rand(0, 30) === 1) { // Remove old files once in a while self::cleanRetryAfters(); } - $txt = self::getRetryAfterFile($url); + $txt = self::getRetryAfterFile($url, $proxy); if ($txt === '') { return 0; } @@ -61,8 +61,8 @@ final class FreshRSS_http_Util { /** * Store the HTTP Retry-After header value of an HTTP `429 Too Many Requests` or `503 Service Unavailable` response. */ - public static function setRetryAfter(string $url, string $retryAfter): int { - $txt = self::getRetryAfterFile($url); + public static function setRetryAfter(string $url, string $proxy, string $retryAfter): int { + $txt = self::getRetryAfterFile($url, $proxy); if ($txt === '') { return 0; } diff --git a/lib/composer.json b/lib/composer.json index 963ee2717..bda192099 100644 --- a/lib/composer.json +++ b/lib/composer.json @@ -14,7 +14,7 @@ "marienfressinaud/lib_opml": "0.5.1", "phpgt/cssxpath": "v1.4.0", "phpmailer/phpmailer": "7.0.0", - "simplepie/simplepie": "dev-freshrss#187c2f28c6a7050e46e7bbfa5579552f78a6c1df" + "simplepie/simplepie": "dev-freshrss#e7b26b4f01d377dc8174d5d4aee961604534d065" }, "config": { "sort-packages": true, diff --git a/lib/lib_rss.php b/lib/lib_rss.php index 5e19ec628..e7503ffe4 100644 --- a/lib/lib_rss.php +++ b/lib/lib_rss.php @@ -735,7 +735,24 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a cleanCache(CLEANCACHE_HOURS); } - if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) { + $options = []; + $accept = ''; + $proxy = is_string(FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] ?? null) ? FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] : ''; + if (is_array($attributes['curl_params'] ?? null)) { + $options = sanitizeCurlParams($attributes['curl_params']); + $proxy = is_string($options[CURLOPT_PROXY]) ? $options[CURLOPT_PROXY] : ''; + if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) { + // Remove headers problematic for security + $options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER], + fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header)); + // Add Accept header if it is not set + if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) { + $options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept; + } + } + } + + if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url, $proxy)) > 0) { Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url)); return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true]; } @@ -744,7 +761,6 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url)); } - $accept = ''; switch ($type) { case 'json': $accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7'; @@ -782,6 +798,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a //CURLOPT_VERBOSE => 1, // To debug sent HTTP headers ]); + curl_setopt_array($ch, $options); + curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options); + $responseHeaders = ''; curl_setopt($ch, CURLOPT_HEADERFUNCTION, function (\CurlHandle $ch, string $header) use (&$responseHeaders) { if (trim($header) !== '') { // Skip e.g. separation with trailer headers @@ -790,22 +809,6 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a return strlen($header); }); - curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options); - - if (is_array($attributes['curl_params'] ?? null)) { - $options = sanitizeCurlParams($attributes['curl_params']); - if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) { - // Remove headers problematic for security - $options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER], - fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header)); - // Add Accept header if it is not set - if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) { - $options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept; - } - } - curl_setopt_array($ch, $options); - } - if (isset($attributes['ssl_verify'])) { curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']); @@ -838,7 +841,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a $body = ''; Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url); if (in_array($c_status, [429, 503], true)) { - $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? ''); + $retryAfter = FreshRSS_http_Util::setRetryAfter($url, $proxy, $headers['retry-after'] ?? ''); if ($c_status === 429) { $errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']'; } elseif ($c_status === 503) { diff --git a/lib/simplepie/simplepie/src/File.php b/lib/simplepie/simplepie/src/File.php index 874438c76..0981ebf96 100644 --- a/lib/simplepie/simplepie/src/File.php +++ b/lib/simplepie/simplepie/src/File.php @@ -145,7 +145,7 @@ class File implements Response $responseHeaders .= "\r\n"; if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS - $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody); + $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options); $this->error = null; // FreshRSS curl_setopt($fp, CURLOPT_ENCODING, 'none'); $responseHeaders = ''; @@ -156,7 +156,7 @@ class File implements Response if (curl_errno($fp) !== CURLE_OK) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); $this->success = false; - $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody); + $this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options); } else { // For PHPStan: `curl_exec` returns `false` only on error so the `is_string` check will always pass. \assert(is_string($responseBody)); @@ -164,7 +164,7 @@ class File implements Response // TODO: Replace with `CURLOPT_SUPPRESS_CONNECT_HEADERS` once PHP 7.2 support is dropped. $responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders($responseHeaders); } - $this->on_http_response($responseHeaders . $responseBody); + $this->on_http_response($responseHeaders . $responseBody, $curl_options); if (\PHP_VERSION_ID < 80000) { curl_close($fp); } @@ -332,8 +332,9 @@ class File implements Response * Triggered just after an HTTP response is received. * @param string|false $response The raw HTTP response headers and body, or false in case of failure (as returned by curl_exec()). * FreshRSS. + * @param array $curl_options */ - protected function on_http_response($response): void + protected function on_http_response($response, array $curl_options = []): void { }