mirror of
https://github.com/FreshRSS/FreshRSS.git
synced 2025-12-23 21:47:44 -05:00
Create separate Retry-After files for proxies (#8029)
* Create separate `Retry-After` files for proxies Bad proxies are able to send a false `Retry-After` header and affect the availability of feeds (domain-wide) for other users. This PR starts including the address of the proxy if present in filenames for `Retry-After` to mitigate the issue. * Reduce code changes * Sync SimplePie fork https://github.com/FreshRSS/simplepie/pull/62 --------- Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
This commit is contained in:
@@ -552,7 +552,7 @@ class FreshRSS_Feed extends Minz_Model {
|
||||
Minz_Exception::ERROR
|
||||
);
|
||||
} else {
|
||||
if (($retryAfter = FreshRSS_http_Util::getRetryAfter($this->url)) > 0) {
|
||||
if (($retryAfter = FreshRSS_http_Util::getRetryAfter($this->url, $this->proxyParam())) > 0) {
|
||||
throw new FreshRSS_Feed_Exception('For that domain, will first retry after ' . date('c', $retryAfter) .
|
||||
'. ' . $this->url(includeCredentials: false), code: 503);
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ declare(strict_types=1);
|
||||
final class FreshRSS_SimplePieResponse extends \SimplePie\File
|
||||
{
|
||||
#[\Override]
|
||||
protected function on_http_response($response): void {
|
||||
protected function on_http_response($response, array $curl_options = []): void {
|
||||
syslog(LOG_INFO, 'FreshRSS SimplePie GET ' . $this->get_status_code() . ' ' . \SimplePie\Misc::url_remove_credentials($this->get_final_requested_uri()));
|
||||
|
||||
if (in_array($this->get_status_code(), [429, 503], true)) {
|
||||
@@ -15,7 +15,8 @@ final class FreshRSS_SimplePieResponse extends \SimplePie\File
|
||||
$headers = [];
|
||||
}
|
||||
|
||||
$retryAfter = FreshRSS_http_Util::setRetryAfter($this->get_final_requested_uri(), $headers['retry-after'] ?? '');
|
||||
$proxy = is_string($curl_options[CURLOPT_PROXY] ?? null) ? $curl_options[CURLOPT_PROXY] : '';
|
||||
$retryAfter = FreshRSS_http_Util::setRetryAfter($this->get_final_requested_uri(), $proxy, $headers['retry-after'] ?? '');
|
||||
if ($retryAfter > 0) {
|
||||
$domain = parse_url($this->get_final_requested_uri(), PHP_URL_HOST);
|
||||
if (is_string($domain) && $domain !== '') {
|
||||
|
||||
@@ -5,7 +5,7 @@ final class FreshRSS_http_Util {
|
||||
|
||||
private const RETRY_AFTER_PATH = DATA_PATH . '/Retry-After/';
|
||||
|
||||
private static function getRetryAfterFile(string $url): string {
|
||||
private static function getRetryAfterFile(string $url, string $proxy): string {
|
||||
$domain = parse_url($url, PHP_URL_HOST);
|
||||
if (!is_string($domain) || $domain === '') {
|
||||
return '';
|
||||
@@ -14,7 +14,7 @@ final class FreshRSS_http_Util {
|
||||
if (is_int($port)) {
|
||||
$domain .= ':' . $port;
|
||||
}
|
||||
return self::RETRY_AFTER_PATH . urlencode($domain) . '.txt';
|
||||
return self::RETRY_AFTER_PATH . urlencode($domain) . (empty($proxy) ? '' : ('_' . urlencode($proxy))) . '.txt';
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -39,11 +39,11 @@ final class FreshRSS_http_Util {
|
||||
* Check whether the URL needs to wait for a Retry-After period.
|
||||
* @return int The timestamp of when the Retry-After expires, or 0 if not set.
|
||||
*/
|
||||
public static function getRetryAfter(string $url): int {
|
||||
public static function getRetryAfter(string $url, string $proxy): int {
|
||||
if (rand(0, 30) === 1) { // Remove old files once in a while
|
||||
self::cleanRetryAfters();
|
||||
}
|
||||
$txt = self::getRetryAfterFile($url);
|
||||
$txt = self::getRetryAfterFile($url, $proxy);
|
||||
if ($txt === '') {
|
||||
return 0;
|
||||
}
|
||||
@@ -61,8 +61,8 @@ final class FreshRSS_http_Util {
|
||||
/**
|
||||
* Store the HTTP Retry-After header value of an HTTP `429 Too Many Requests` or `503 Service Unavailable` response.
|
||||
*/
|
||||
public static function setRetryAfter(string $url, string $retryAfter): int {
|
||||
$txt = self::getRetryAfterFile($url);
|
||||
public static function setRetryAfter(string $url, string $proxy, string $retryAfter): int {
|
||||
$txt = self::getRetryAfterFile($url, $proxy);
|
||||
if ($txt === '') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
"marienfressinaud/lib_opml": "0.5.1",
|
||||
"phpgt/cssxpath": "v1.4.0",
|
||||
"phpmailer/phpmailer": "7.0.0",
|
||||
"simplepie/simplepie": "dev-freshrss#187c2f28c6a7050e46e7bbfa5579552f78a6c1df"
|
||||
"simplepie/simplepie": "dev-freshrss#e7b26b4f01d377dc8174d5d4aee961604534d065"
|
||||
},
|
||||
"config": {
|
||||
"sort-packages": true,
|
||||
|
||||
@@ -735,7 +735,24 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
cleanCache(CLEANCACHE_HOURS);
|
||||
}
|
||||
|
||||
if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) {
|
||||
$options = [];
|
||||
$accept = '';
|
||||
$proxy = is_string(FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] ?? null) ? FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] : '';
|
||||
if (is_array($attributes['curl_params'] ?? null)) {
|
||||
$options = sanitizeCurlParams($attributes['curl_params']);
|
||||
$proxy = is_string($options[CURLOPT_PROXY]) ? $options[CURLOPT_PROXY] : '';
|
||||
if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
|
||||
// Remove headers problematic for security
|
||||
$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER],
|
||||
fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header));
|
||||
// Add Accept header if it is not set
|
||||
if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) {
|
||||
$options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url, $proxy)) > 0) {
|
||||
Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
|
||||
return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true];
|
||||
}
|
||||
@@ -744,7 +761,6 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
|
||||
}
|
||||
|
||||
$accept = '';
|
||||
switch ($type) {
|
||||
case 'json':
|
||||
$accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7';
|
||||
@@ -782,6 +798,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
//CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
|
||||
]);
|
||||
|
||||
curl_setopt_array($ch, $options);
|
||||
curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
|
||||
|
||||
$responseHeaders = '';
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function (\CurlHandle $ch, string $header) use (&$responseHeaders) {
|
||||
if (trim($header) !== '') { // Skip e.g. separation with trailer headers
|
||||
@@ -790,22 +809,6 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
return strlen($header);
|
||||
});
|
||||
|
||||
curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
|
||||
|
||||
if (is_array($attributes['curl_params'] ?? null)) {
|
||||
$options = sanitizeCurlParams($attributes['curl_params']);
|
||||
if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
|
||||
// Remove headers problematic for security
|
||||
$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER],
|
||||
fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header));
|
||||
// Add Accept header if it is not set
|
||||
if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) {
|
||||
$options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept;
|
||||
}
|
||||
}
|
||||
curl_setopt_array($ch, $options);
|
||||
}
|
||||
|
||||
if (isset($attributes['ssl_verify'])) {
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2);
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']);
|
||||
@@ -838,7 +841,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
$body = '';
|
||||
Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
|
||||
if (in_array($c_status, [429, 503], true)) {
|
||||
$retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? '');
|
||||
$retryAfter = FreshRSS_http_Util::setRetryAfter($url, $proxy, $headers['retry-after'] ?? '');
|
||||
if ($c_status === 429) {
|
||||
$errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
|
||||
} elseif ($c_status === 503) {
|
||||
|
||||
@@ -145,7 +145,7 @@ class File implements Response
|
||||
$responseHeaders .= "\r\n";
|
||||
if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) {
|
||||
$this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS
|
||||
$this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody);
|
||||
$this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options);
|
||||
$this->error = null; // FreshRSS
|
||||
curl_setopt($fp, CURLOPT_ENCODING, 'none');
|
||||
$responseHeaders = '';
|
||||
@@ -156,7 +156,7 @@ class File implements Response
|
||||
if (curl_errno($fp) !== CURLE_OK) {
|
||||
$this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
|
||||
$this->success = false;
|
||||
$this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody);
|
||||
$this->on_http_response($responseBody === false ? false : $responseHeaders . $responseBody, $curl_options);
|
||||
} else {
|
||||
// For PHPStan: `curl_exec` returns `false` only on error so the `is_string` check will always pass.
|
||||
\assert(is_string($responseBody));
|
||||
@@ -164,7 +164,7 @@ class File implements Response
|
||||
// TODO: Replace with `CURLOPT_SUPPRESS_CONNECT_HEADERS` once PHP 7.2 support is dropped.
|
||||
$responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders($responseHeaders);
|
||||
}
|
||||
$this->on_http_response($responseHeaders . $responseBody);
|
||||
$this->on_http_response($responseHeaders . $responseBody, $curl_options);
|
||||
if (\PHP_VERSION_ID < 80000) {
|
||||
curl_close($fp);
|
||||
}
|
||||
@@ -332,8 +332,9 @@ class File implements Response
|
||||
* Triggered just after an HTTP response is received.
|
||||
* @param string|false $response The raw HTTP response headers and body, or false in case of failure (as returned by curl_exec()).
|
||||
* FreshRSS.
|
||||
* @param array<int, mixed> $curl_options
|
||||
*/
|
||||
protected function on_http_response($response): void
|
||||
protected function on_http_response($response, array $curl_options = []): void
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user