mirror of
https://github.com/FreshRSS/FreshRSS.git
synced 2026-06-29 01:35:35 -04:00
* Add SSRF mitigations using `filter_var` and `CURLOPT_RESOLVE`
The idea is to prevent FreshRSS from sending any HTTP requests to internal services, except for the ones that are explicitly allowed in the config.
Based on 6e82b46a48/lib/filelib.php (L3818) and https://github.com/symfony/symfony/blob/8.1/src/Symfony/Component/HttpClient/NoPrivateNetworkHttpClient.php
https://github.com/FreshRSS/simplepie/pull/76
https://github.com/FreshRSS/simplepie/pull/78
* Add allowlist setting in Web UI
* make readme
* Update app/i18n/fr/admin.php
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
* make readme again
* make readme
* Further work
Still WIP and needs testing etc.
* Readd previous if check for domain combination allowlist
* Turn POST to GET after redirect
* Improve
* Update config.default.php
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
* make readme
* Skip SSRF check if `CURLOPT_PROXY` is set
* make readme
* Fix `!empty()` mistake
* Respect max redirects feed option when fetching with `httpGet()`
* Respect max redirects during SimplePie fetching + fix bypass
bypass fix: `CURLOPT_FOLLOWLOCATION` was moved below so that emulated redirects are enforced.
* Avoid FreshRSS and Minz code in SimplePie
https://github.com/FreshRSS/FreshRSS/pull/8400#discussion_r2935375980
* Corrected hook code
* phpdoc wrong return type
* Add CIDR support in allowlist
* Implement simple DNS caching
* Suppress `dns_get_record()` warnings
* A bit of proof-reading
* Minor typo
* Fix proxy logic
* Fix HTTP POST redirect logic
* Proofread checkCIDR
Add fixes for several situations
* Remove credentials from URL in logs
* Ensure `CURLOPT_FOLLOWLOCATION` is `false` by setting it at the end
* Fix codesniffer long line
* Fix potential bypass due to wrong return value
If there were no records returned by `dns_get_record()`, no overrides to `CURLOPT_RESOLVE` would get passed,
and a potential bypass could occur, when cURL would try to resolve the domain by itself.
* Put the URL at the end in logs
* Add documentation and environment variable support
* make readme
* Fix wrong behavior in case of IP
* Fix duplicate selector in CSS
* Minor type check change
* i18n fr, en
* Minor type check change
* Fix whitespace i18n fr
* make fix-all
* Fix `$ips_ok` not being returned after domain records were cached
* make readme
* PHPStan fix
* make readme
* Minor syntax in SimplePie
* Only return `null` if no allowed IPs were found
* Add wildcard *, help message
* Consistent docs with help message
* i18n: pl
* SimplePie compatibility PHP 7.2
* make fix-all
* Sync SimplePie
* https://github.com/FreshRSS/simplepie/pull/76
* 💥 Breaking change in the Changelog
* Document `INTERNAL_HOST_ALLOWLIST` in Docker docs
* Remove `Cookie` and `Authorization` headers in `httpGet()` during cross-origin redirect
* Minor whitespace
And same comment convention than below
* Remove authentication headers and change POST to GET on redirect in SimplePie
* Remove .local in Docker example
* Fill in default ports when comparing URL origins
* Remove .local from other places than the Docker example
* Rewrite WebSub subscribe to use `httpGet()`
* make fix-all
* Also unset `CURLOPT_USERPWD` during redirects
* phpcs fix
* Always unset `CURLOPT_FOLLOWLOCATION`
* Bump SimplePie
https://github.com/FreshRSS/simplepie/pull/78
* Update logic for CURLOPT_FOLLOWLOCATION
* Fix PHPStan
* Changelog fix security section
* Update most common RSS Bridge case
https://hub.docker.com/r/rssbridge/rss-bridge
* Replace misleading 127.0.0.1:8080 example for Docker
This does not make sense for a Docker container
---------
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
782 lines
28 KiB
PHP
782 lines
28 KiB
PHP
<?php
|
||
declare(strict_types=1);
|
||
|
||
final class FreshRSS_http_Util {
|
||
|
||
private const RETRY_AFTER_PATH = DATA_PATH . '/Retry-After/';
|
||
private const PRIVATE_SUBNETS = [
|
||
'127.0.0.0/8', // RFC1700 (Loopback)
|
||
'10.0.0.0/8', // RFC1918
|
||
'192.168.0.0/16', // RFC1918
|
||
'172.16.0.0/12', // RFC1918
|
||
'169.254.0.0/16', // RFC3927
|
||
'0.0.0.0/8', // RFC5735
|
||
'240.0.0.0/4', // RFC1112
|
||
'::1/128', // Loopback
|
||
'fc00::/7', // Unique Local Address
|
||
'fe80::/10', // Link Local Address
|
||
'::ffff:0:0/96', // IPv4 translations
|
||
'::/128', // Unspecified address
|
||
];
|
||
/** @var array<string, string[]> $resolve_ok */
|
||
private static array $resolve_ok = [];
|
||
|
||
private static function getRetryAfterFile(string $url, string $proxy): string {
|
||
$domain = parse_url($url, PHP_URL_HOST);
|
||
if (!is_string($domain) || $domain === '') {
|
||
return '';
|
||
}
|
||
$domainWide = Minz_Request::serverIsPublic($domain);
|
||
$port = parse_url($url, PHP_URL_PORT);
|
||
if (is_int($port)) {
|
||
$domain .= ':' . $port;
|
||
}
|
||
return self::RETRY_AFTER_PATH . urlencode($domain) .
|
||
($domainWide ? '' : '_' . hash('sha256', $url)) .
|
||
(empty($proxy) ? '' : '_' . urlencode($proxy)) . '.txt';
|
||
}
|
||
|
||
/**
|
||
* Clean up old Retry-After files
|
||
*/
|
||
private static function cleanRetryAfters(): void {
|
||
if (!is_dir(self::RETRY_AFTER_PATH)) {
|
||
return;
|
||
}
|
||
$files = glob(self::RETRY_AFTER_PATH . '*.txt', GLOB_NOSORT);
|
||
if ($files === false) {
|
||
return;
|
||
}
|
||
foreach ($files as $file) {
|
||
if (@filemtime($file) < time()) {
|
||
@unlink($file);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Check whether the URL needs to wait for a Retry-After period.
|
||
* @return int The timestamp of when the Retry-After expires, or 0 if not set.
|
||
*/
|
||
public static function getRetryAfter(string $url, string $proxy): int {
|
||
if (rand(0, 30) === 1) { // Remove old files once in a while
|
||
self::cleanRetryAfters();
|
||
}
|
||
$txt = self::getRetryAfterFile($url, $proxy);
|
||
if ($txt === '') {
|
||
return 0;
|
||
}
|
||
$retryAfter = @filemtime($txt) ?: 0;
|
||
if ($retryAfter <= 0) {
|
||
return 0;
|
||
}
|
||
if ($retryAfter < time()) {
|
||
@unlink($txt);
|
||
return 0;
|
||
}
|
||
return $retryAfter;
|
||
}
|
||
|
||
/**
|
||
* Store the HTTP Retry-After header value of an HTTP `429 Too Many Requests` or `503 Service Unavailable` response.
|
||
*/
|
||
public static function setRetryAfter(string $url, string $proxy, string $retryAfter): int {
|
||
$txt = self::getRetryAfterFile($url, $proxy);
|
||
if ($txt === '') {
|
||
return 0;
|
||
}
|
||
|
||
$limits = FreshRSS_Context::systemConf()->limits;
|
||
if (ctype_digit($retryAfter)) {
|
||
$retryAfter = time() + (int)$retryAfter;
|
||
} else {
|
||
$retryAfter = \SimplePie\Misc::parse_date($retryAfter) ?:
|
||
(time() + max(600, $limits['retry_after_default'] ?? 0));
|
||
}
|
||
$retryAfter = min($retryAfter, time() + max(3600, $limits['retry_after_max'] ?? 0));
|
||
|
||
@mkdir(self::RETRY_AFTER_PATH);
|
||
if (!touch($txt, $retryAfter)) {
|
||
Minz_Log::error('Failed to set Retry-After for ' . $url);
|
||
return 0;
|
||
}
|
||
return $retryAfter;
|
||
}
|
||
|
||
/**
|
||
* @param array<mixed> $curl_params
|
||
* @return array<mixed>
|
||
*/
|
||
public static function sanitizeCurlParams(array $curl_params): array {
|
||
$safe_params = [
|
||
CURLOPT_COOKIE,
|
||
CURLOPT_COOKIEFILE,
|
||
CURLOPT_FOLLOWLOCATION, // We filter this value later, only allowing `false`
|
||
CURLOPT_HTTPHEADER,
|
||
CURLOPT_MAXREDIRS,
|
||
CURLOPT_POST,
|
||
CURLOPT_POSTFIELDS,
|
||
CURLOPT_PROXY,
|
||
CURLOPT_PROXYTYPE,
|
||
CURLOPT_USERAGENT,
|
||
];
|
||
foreach ($curl_params as $k => $_) {
|
||
if (!in_array($k, $safe_params, true)) {
|
||
unset($curl_params[$k]);
|
||
continue;
|
||
}
|
||
// Allow only an empty value just to enable the libcurl cookie engine
|
||
if ($k === CURLOPT_COOKIEFILE) {
|
||
$curl_params[$k] = '';
|
||
}
|
||
}
|
||
return $curl_params;
|
||
}
|
||
|
||
private static function idn_to_puny(string $url): string {
|
||
if (function_exists('idn_to_ascii')) {
|
||
$idn = parse_url($url, PHP_URL_HOST);
|
||
if (is_string($idn) && $idn != '') {
|
||
$puny = idn_to_ascii($idn);
|
||
$pos = strpos($url, $idn);
|
||
if ($puny != false && $pos !== false) {
|
||
$url = substr_replace($url, $puny, $pos, strlen($idn));
|
||
}
|
||
}
|
||
}
|
||
return $url;
|
||
}
|
||
|
||
public static function checkUrl(string $url, bool $fixScheme = true): string|false {
|
||
$url = trim($url);
|
||
if ($url == '') {
|
||
return '';
|
||
}
|
||
if ($fixScheme && preg_match('#^https?://#i', $url) !== 1) {
|
||
$url = 'https://' . ltrim($url, '/');
|
||
}
|
||
|
||
$url = self::idn_to_puny($url); // https://bugs.php.net/bug.php?id=53474
|
||
$urlRelaxed = str_replace('_', 'z', $url); //PHP discussion #64948 Underscore
|
||
|
||
if (is_string(filter_var($urlRelaxed, FILTER_VALIDATE_URL))) {
|
||
return $url;
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Remove the charset meta information of an HTML document, e.g.:
|
||
* `<meta charset="..." />`
|
||
* `<meta http-equiv="Content-Type" content="text/html; charset=...">`
|
||
*/
|
||
private static function stripHtmlMetaCharset(string $html): string {
|
||
return preg_replace('/<meta\s[^>]*charset\s*=\s*[^>]+>/i', '', $html, 1) ?? '';
|
||
}
|
||
|
||
/**
|
||
* Set an XML preamble to enforce the HTML content type charset received by HTTP.
|
||
* @param string $html the raw downloaded HTML content
|
||
* @param string $contentType an HTTP Content-Type such as 'text/html; charset=utf-8'
|
||
* @return string an HTML string with XML encoding information for DOMDocument::loadHTML()
|
||
*/
|
||
private static function enforceHttpEncoding(string $html, string $contentType = ''): string {
|
||
$httpCharset = preg_match('/\bcharset=([0-9a-z_-]{2,12})$/i', $contentType, $matches) === 1 ? $matches[1] : '';
|
||
if ($httpCharset == '') {
|
||
// No charset defined by HTTP
|
||
if (preg_match('/<meta\s[^>]*charset\s*=[\s\'"]*UTF-?8\b/i', substr($html, 0, 2048))) {
|
||
// Detect UTF-8 even if declared too deep in HTML for DOMDocument
|
||
$httpCharset = 'UTF-8';
|
||
} else {
|
||
// Do nothing
|
||
return $html;
|
||
}
|
||
}
|
||
$httpCharsetNormalized = \SimplePie\Misc::encoding($httpCharset);
|
||
if (in_array($httpCharsetNormalized, ['windows-1252', 'US-ASCII'], true)) {
|
||
// Default charset for HTTP, do nothing
|
||
return $html;
|
||
}
|
||
if (substr($html, 0, 3) === "\xEF\xBB\xBF" || // UTF-8 BOM
|
||
substr($html, 0, 2) === "\xFF\xFE" || // UTF-16 Little Endian BOM
|
||
substr($html, 0, 2) === "\xFE\xFF" || // UTF-16 Big Endian BOM
|
||
substr($html, 0, 4) === "\xFF\xFE\x00\x00" || // UTF-32 Little Endian BOM
|
||
substr($html, 0, 4) === "\x00\x00\xFE\xFF") { // UTF-32 Big Endian BOM
|
||
// Existing byte order mark, do nothing
|
||
return $html;
|
||
}
|
||
if (preg_match('/^<[?]xml[^>]+encoding\b/', substr($html, 0, 64))) {
|
||
// Existing XML declaration, do nothing
|
||
return $html;
|
||
}
|
||
if ($httpCharsetNormalized !== 'UTF-8') {
|
||
// Try to change encoding to UTF-8 using mbstring or iconv or intl
|
||
$utf8 = \SimplePie\Misc::change_encoding($html, $httpCharsetNormalized, 'UTF-8');
|
||
if (is_string($utf8)) {
|
||
$html = self::stripHtmlMetaCharset($utf8);
|
||
$httpCharsetNormalized = 'UTF-8';
|
||
}
|
||
}
|
||
if ($httpCharsetNormalized === 'UTF-8') {
|
||
// Save encoding information as Unicode BOM
|
||
return "\xEF\xBB\xBF" . $html;
|
||
}
|
||
// Give up
|
||
return $html;
|
||
}
|
||
|
||
/**
|
||
* Set an HTML base URL to the HTML content if there is none.
|
||
* @param string $html the raw downloaded HTML content
|
||
* @param string $href the HTML base URL
|
||
* @return string an HTML string
|
||
*/
|
||
private static function enforceHtmlBase(string $html, string $href): string {
|
||
$doc = new DOMDocument();
|
||
$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
|
||
if ($doc->documentElement === null) {
|
||
return '';
|
||
}
|
||
$xpath = new DOMXPath($doc);
|
||
$bases = $xpath->evaluate('//base');
|
||
if (!($bases instanceof DOMNodeList) || $bases->length === 0) {
|
||
$base = $doc->createElement('base');
|
||
if ($base === false) {
|
||
return $html;
|
||
}
|
||
$base->setAttribute('href', $href);
|
||
$head = null;
|
||
$heads = $xpath->evaluate('//head');
|
||
if ($heads instanceof DOMNodeList && $heads->length > 0) {
|
||
$head = $heads->item(0);
|
||
}
|
||
if ($head instanceof DOMElement) {
|
||
$head->insertBefore($base, $head->firstChild);
|
||
} else {
|
||
$doc->documentElement->insertBefore($base, $doc->documentElement->firstChild);
|
||
}
|
||
}
|
||
|
||
// Save the start of HTML because libxml2 saveHTML() risks scrambling it
|
||
$htmlPos = stripos($html, '<html');
|
||
$htmlStart = $htmlPos === false || $htmlPos > 512 ? '' : substr($html, 0, $htmlPos);
|
||
|
||
$html = $doc->saveHTML() ?: $html;
|
||
if ($htmlStart !== '' && !str_starts_with($html, $htmlStart)) {
|
||
// libxml2 saveHTML() risks removing Unicode BOM and XML declaration,
|
||
// which affects future detection of charset encoding, so manually restore it
|
||
$htmlPos = stripos($html, '<html');
|
||
$html = $htmlPos === false || $htmlPos > 512 ? $html : $htmlStart . substr($html, $htmlPos);
|
||
}
|
||
return $html;
|
||
}
|
||
|
||
public static function compareURLOrigins(string $url1, string $url2): bool {
|
||
$url1 = parse_url(strtolower($url1));
|
||
$url2 = parse_url(strtolower($url2));
|
||
if ($url1 === false || $url2 === false) {
|
||
return false;
|
||
}
|
||
foreach ([&$url1, &$url2] as &$url) {
|
||
$url['port'] ??= match ($url['scheme']) {
|
||
'http' => 80,
|
||
'https' => 443,
|
||
default => 0,
|
||
};
|
||
}
|
||
return ($url1['scheme'] ?? '') === ($url2['scheme'] ?? '') &&
|
||
($url1['host'] ?? '') === ($url2['host'] ?? '') &&
|
||
($url1['port'] ?? '') === ($url2['port'] ?? '');
|
||
}
|
||
|
||
/**
|
||
* Returns a value for CURLOPT_RESOLVE as an array, null if no allowed IPs were found, false if the domain failed to resolve.
|
||
*
|
||
* @return array<string>|null|false
|
||
*/
|
||
public static function getCurlResolveInfo(string $url): array|null|false {
|
||
$url = strtolower($url);
|
||
$parsed = parse_url($url);
|
||
if ($parsed === false) {
|
||
return false;
|
||
}
|
||
$host = $parsed['host'] ?? null;
|
||
$scheme = $parsed['scheme'] ?? null;
|
||
if ($host === null || $scheme === null) {
|
||
return false;
|
||
}
|
||
if (str_starts_with($host, '[') && str_ends_with($host, ']')) {
|
||
if (strlen($host) === 2) {
|
||
return false;
|
||
}
|
||
$host = substr($host, 1, strlen($host) - 2);
|
||
}
|
||
|
||
$internal_host_allowlist = getenv('INTERNAL_HOST_ALLOWLIST');
|
||
if (is_string($internal_host_allowlist) && $internal_host_allowlist !== '') {
|
||
$internal_host_allowlist = preg_split('/\s+/', $internal_host_allowlist, -1, PREG_SPLIT_NO_EMPTY);
|
||
}
|
||
if (!is_array($internal_host_allowlist) || empty($internal_host_allowlist)) {
|
||
$internal_host_allowlist = FreshRSS_Context::systemConf()->internal_host_allowlist;
|
||
}
|
||
|
||
if (in_array('*', $internal_host_allowlist, true)) {
|
||
return []; // Disables SSRF checks entirely (unsafe)
|
||
}
|
||
|
||
$port = parse_url($url)['port'] ?? match ($scheme) {
|
||
'http' => 80,
|
||
'https' => 443,
|
||
default => 0,
|
||
};
|
||
$resolve_str = "$host:$port:";
|
||
$ips_ok = [];
|
||
$ips = [];
|
||
$records = [];
|
||
if (filter_var($host, FILTER_VALIDATE_IP) !== false) {
|
||
$ips[] = $host;
|
||
} elseif (isset(self::$resolve_ok[$host])) {
|
||
$ips = self::$resolve_ok[$host];
|
||
} else {
|
||
$records = @dns_get_record($host, DNS_A + DNS_AAAA);
|
||
if ($records === false) {
|
||
return false;
|
||
}
|
||
foreach ($records as $record) {
|
||
$ip = $record['ip'] ?? $record['ipv6'];
|
||
if (is_string($ip)) {
|
||
$ips[] = $ip;
|
||
}
|
||
}
|
||
self::$resolve_ok[$host] = $ips;
|
||
}
|
||
|
||
$cidr_allowlist = array_filter($internal_host_allowlist, fn($v, $_) => str_contains($v, '/'), ARRAY_FILTER_USE_BOTH);
|
||
foreach ($ips as $ip) {
|
||
$allowlist_str = "$ip:$port";
|
||
$add_ip = $ip;
|
||
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) !== false) {
|
||
$allowlist_str = "[$ip]:$port";
|
||
$add_ip = "[$ip]";
|
||
}
|
||
foreach ($cidr_allowlist as $cidr) {
|
||
if (self::checkCIDR($ip, $cidr)) {
|
||
$ips_ok[] = $add_ip;
|
||
continue 2;
|
||
}
|
||
}
|
||
if (in_array($allowlist_str, $internal_host_allowlist, true) ||
|
||
in_array("$host:$port", $internal_host_allowlist, true)) {
|
||
$ips_ok[] = $add_ip;
|
||
continue;
|
||
}
|
||
|
||
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) === false) {
|
||
continue;
|
||
}
|
||
// Extra check because the above one might not be enough: https://github.com/php/php-src/issues/16944
|
||
// Workaround is available by using `FILTER_FLAG_GLOBAL_RANGE` instead, but that was only added in PHP 8.2, and we need to support PHP 8.1+
|
||
foreach (self::PRIVATE_SUBNETS as $cidr) {
|
||
if (self::checkCIDR($ip, $cidr)) {
|
||
continue 2;
|
||
}
|
||
}
|
||
|
||
$ips_ok[] = $add_ip;
|
||
}
|
||
|
||
if (count($ips_ok) > 0) {
|
||
if (count($records) > 0 || isset(self::$resolve_ok[$host])) {
|
||
$resolve_str .= implode(',', $ips_ok);
|
||
return [$resolve_str];
|
||
}
|
||
if (filter_var($host, FILTER_VALIDATE_IP) !== false) {
|
||
// No resolve overrides since the URL only contained an IP, not a domain
|
||
return [];
|
||
}
|
||
}
|
||
|
||
if (count($ips) === 0) {
|
||
return false;
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
|
||
/**
|
||
* @param non-empty-string $url
|
||
* @param string|null $cachePath path to cache file, or `null` to disable caching
|
||
* @param string $type {html,ico,json,opml,xml}
|
||
* @param array<string,mixed> $attributes May contain user-defined cURL options in `$attributes['curl_params']`
|
||
* @param array<int,mixed> $curl_options Internal overrides of cURL options
|
||
* @return array{body:string,effective_url:string,redirect_count:int,fail:bool,status:int,error:string}
|
||
* `status` is the HTTP response code (e.g. 200, 404), or a custom negative value:
|
||
* * `-200` served from local cache;
|
||
* * `-429` blocked by active `Retry-After` period;
|
||
* * `-500` `curl_init()` failure.
|
||
*/
|
||
public static function httpGet(string $url, ?string $cachePath = null, string $type = 'html', array $attributes = [], array $curl_options = []): array {
|
||
$limits = FreshRSS_Context::systemConf()->limits;
|
||
$feed_timeout = empty($attributes['timeout']) || !is_numeric($attributes['timeout']) ? 0 : intval($attributes['timeout']);
|
||
|
||
if ($cachePath !== null) {
|
||
$cacheMtime = @filemtime($cachePath);
|
||
if ($cacheMtime !== false && $cacheMtime > time() - intval($limits['cache_duration'])) {
|
||
$body = @file_get_contents($cachePath);
|
||
if ($body != false) {
|
||
syslog(LOG_DEBUG, 'FreshRSS uses cache for ' . \SimplePie\Misc::url_remove_credentials($url));
|
||
return ['body' => $body, 'effective_url' => $url, 'redirect_count' => 0, 'fail' => false, 'status' => -200, 'error' => ''];
|
||
}
|
||
}
|
||
}
|
||
|
||
if (rand(0, 30) === 1) { // Remove old cache once in a while
|
||
cleanCache(CLEANCACHE_HOURS);
|
||
}
|
||
|
||
$accept = '';
|
||
$proxy = is_string(FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] ?? null) ? FreshRSS_Context::systemConf()->curl_options[CURLOPT_PROXY] : '';
|
||
$options = []; // User-defined cURL options
|
||
if (is_array($attributes['curl_params'] ?? null)) {
|
||
$options = self::sanitizeCurlParams($attributes['curl_params']);
|
||
$proxy = is_string($options[CURLOPT_PROXY] ?? null) ? $options[CURLOPT_PROXY] : $proxy;
|
||
if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
|
||
// Remove headers problematic for security
|
||
$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER],
|
||
fn($header) => is_string($header) && !preg_match('/^(Remote-User|X-WebAuth-User)\\s*:/i', $header));
|
||
// Add Accept header if it is not set
|
||
if (preg_grep('/^Accept\\s*:/i', $options[CURLOPT_HTTPHEADER]) === false) {
|
||
$options[CURLOPT_HTTPHEADER][] = 'Accept: ' . $accept;
|
||
}
|
||
}
|
||
}
|
||
$proxy = is_string($curl_options[CURLOPT_PROXY] ?? null) ? $curl_options[CURLOPT_PROXY] : $proxy;
|
||
|
||
if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url, $proxy)) > 0) {
|
||
Minz_Log::warning('For that domain, will first retry after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
|
||
return ['body' => '', 'effective_url' => $url, 'redirect_count' => 0, 'fail' => true, 'status' => -429, 'error' => ''];
|
||
}
|
||
|
||
if (FreshRSS_Context::systemConf()->simplepie_syslog_enabled) {
|
||
syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
|
||
}
|
||
|
||
switch ($type) {
|
||
case 'json':
|
||
$accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7';
|
||
break;
|
||
case 'opml':
|
||
$accept = 'text/x-opml,text/xml;q=0.9,application/xml;q=0.9,*/*;q=0.8';
|
||
break;
|
||
case 'xml':
|
||
$accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8';
|
||
break;
|
||
case 'ico':
|
||
$accept = 'image/x-icon,image/vnd.microsoft.icon,image/ico,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.1';
|
||
break;
|
||
case 'html':
|
||
default:
|
||
$accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
|
||
break;
|
||
}
|
||
|
||
$original_url = $url;
|
||
$fail = false;
|
||
$redirs = 0;
|
||
$max_redirs = $curl_options[CURLOPT_MAXREDIRS] ?? $options[CURLOPT_MAXREDIRS] ?? FreshRSS_Context::systemConf()->curl_options[CURLOPT_MAXREDIRS] ?? null;
|
||
if (!is_int($max_redirs)) {
|
||
$max_redirs = 4;
|
||
}
|
||
while (true) {
|
||
$url = is_string($url) ? $url : '';
|
||
$resolve = [];
|
||
if ($proxy === '') {
|
||
$resolve = self::getCurlResolveInfo($url);
|
||
if ($resolve === null) {
|
||
Minz_Log::warning('Fetching this URL is not allowed, because the host’s IP is not in the allowlist [' .
|
||
\SimplePie\Misc::url_remove_credentials($url) . ']');
|
||
return ['body' => '', 'effective_url' => '', 'redirect_count' => 0, 'fail' => true, 'status' => -500, 'error' => ''];
|
||
} elseif ($resolve === false) {
|
||
return ['body' => '', 'effective_url' => '', 'redirect_count' => 0, 'fail' => true, 'status' => -500, 'error' => ''];
|
||
}
|
||
if (!empty($resolve)) {
|
||
$curl_options[CURLOPT_RESOLVE] = $resolve; // Prevent DNS rebinding
|
||
}
|
||
}
|
||
// TODO: Implement HTTP 1.1 conditional GET If-Modified-Since
|
||
$ch = curl_init();
|
||
if ($ch === false || $url === '') {
|
||
return ['body' => '', 'effective_url' => '', 'redirect_count' => 0, 'fail' => true, 'status' => -500, 'error' => ''];
|
||
}
|
||
curl_setopt_array($ch, [
|
||
CURLOPT_URL => $url,
|
||
CURLOPT_HTTPHEADER => ['Accept: ' . $accept],
|
||
CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
|
||
CURLOPT_CONNECTTIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
|
||
CURLOPT_TIMEOUT => $feed_timeout > 0 ? $feed_timeout : $limits['timeout'],
|
||
CURLOPT_RETURNTRANSFER => true,
|
||
CURLOPT_ACCEPT_ENCODING => '', //Enable all encodings
|
||
//CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
|
||
]);
|
||
|
||
curl_setopt_array($ch, $options);
|
||
curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
|
||
|
||
$responseHeaders = '';
|
||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function (\CurlHandle $ch, string $header) use (&$responseHeaders) {
|
||
if (trim($header) !== '') { // Skip e.g. separation with trailer headers
|
||
$responseHeaders .= $header;
|
||
}
|
||
return strlen($header);
|
||
});
|
||
|
||
if (isset($attributes['ssl_verify'])) {
|
||
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, empty($attributes['ssl_verify']) ? 0 : 2);
|
||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (bool)$attributes['ssl_verify']);
|
||
if (empty($attributes['ssl_verify'])) {
|
||
curl_setopt($ch, CURLOPT_SSL_CIPHER_LIST, 'DEFAULT@SECLEVEL=1');
|
||
}
|
||
}
|
||
|
||
if (defined('CURLOPT_PROTOCOLS_STR') && is_int(CURLOPT_PROTOCOLS_STR)) {
|
||
$curl_options[CURLOPT_PROTOCOLS_STR] = 'http,https';
|
||
if (defined('CURLOPT_REDIR_PROTOCOLS_STR') && is_int(CURLOPT_REDIR_PROTOCOLS_STR)) {
|
||
$curl_options[CURLOPT_REDIR_PROTOCOLS_STR] = 'http,https';
|
||
}
|
||
} elseif (defined('CURLPROTO_HTTP') && defined('CURLPROTO_HTTPS')) {
|
||
// Legacy PHP 8.2-
|
||
if (defined('CURLOPT_PROTOCOLS')) {
|
||
$curl_options[CURLOPT_PROTOCOLS] = CURLPROTO_HTTP | CURLPROTO_HTTPS;
|
||
}
|
||
if (defined('CURLOPT_REDIR_PROTOCOLS')) {
|
||
$curl_options[CURLOPT_REDIR_PROTOCOLS] = CURLPROTO_HTTP | CURLPROTO_HTTPS;
|
||
}
|
||
}
|
||
|
||
curl_setopt_array($ch, $curl_options);
|
||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); // We handle HTTP redirections manually for security
|
||
|
||
$body = curl_exec($ch);
|
||
$c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||
$c_content_type = '' . curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
|
||
$c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
|
||
$c_error = curl_error($ch);
|
||
|
||
$headers = [];
|
||
if ($body !== false) {
|
||
$responseHeaders .= "\r\n";
|
||
$responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders($responseHeaders);
|
||
$parser = new \SimplePie\HTTP\Parser($responseHeaders);
|
||
if ($parser->parse()) {
|
||
$headers = $parser->headers;
|
||
}
|
||
}
|
||
|
||
if (in_array($c_status, [301, 302, 303, 307, 308], true)) {
|
||
// Handle the redirect by making another request
|
||
$location = \SimplePie\Misc::absolutize_url($headers['location'] ?? $url, $url);
|
||
if ($location === false) {
|
||
$location = $url;
|
||
}
|
||
if (!self::compareURLOrigins($url, $location)) {
|
||
unset($curl_options[CURLOPT_COOKIE]);
|
||
unset($curl_options[CURLOPT_USERPWD]);
|
||
unset($options[CURLOPT_COOKIE]);
|
||
unset($options[CURLOPT_USERPWD]);
|
||
if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
|
||
$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER], fn(mixed $header): bool =>
|
||
is_string($header) && !preg_match('/^(Cookie|Authorization)\\s*:/i', $header));
|
||
}
|
||
if (is_array($curl_options[CURLOPT_HTTPHEADER] ?? null)) {
|
||
$curl_options[CURLOPT_HTTPHEADER] = array_filter($curl_options[CURLOPT_HTTPHEADER], fn(mixed $header): bool =>
|
||
is_string($header) && !preg_match('/^(Cookie|Authorization)\\s*:/i', $header));
|
||
}
|
||
}
|
||
if ($max_redirs >= 0) {
|
||
$redirs++;
|
||
}
|
||
if ($redirs > $max_redirs) {
|
||
Minz_Log::warning('Error fetching content: Too many redirects were hit [' . \SimplePie\Misc::url_remove_credentials($original_url) . ']');
|
||
break;
|
||
}
|
||
if ((isset($options[CURLOPT_POST]) || isset($curl_options[CURLOPT_POST])) &&
|
||
in_array($c_status, [301, 302, 303], true)) { // Not for 307 and 308, which must not change the HTTP method
|
||
unset($curl_options[CURLOPT_POST]);
|
||
unset($curl_options[CURLOPT_POSTFIELDS]);
|
||
unset($options[CURLOPT_POST]);
|
||
unset($options[CURLOPT_POSTFIELDS]);
|
||
if (is_array($options[CURLOPT_HTTPHEADER] ?? null)) {
|
||
$options[CURLOPT_HTTPHEADER] = array_filter($options[CURLOPT_HTTPHEADER], fn(mixed $header): bool =>
|
||
is_string($header) && !str_starts_with(strtolower(trim($header)), 'content-type:'));
|
||
}
|
||
if (is_array($curl_options[CURLOPT_HTTPHEADER] ?? null)) {
|
||
$curl_options[CURLOPT_HTTPHEADER] = array_filter($curl_options[CURLOPT_HTTPHEADER], fn(mixed $header): bool =>
|
||
is_string($header) && !str_starts_with(strtolower(trim($header)), 'content-type:'));
|
||
}
|
||
}
|
||
$url = $location;
|
||
continue;
|
||
}
|
||
|
||
$fail = $c_status != 200 || $c_error != '' || $body === false;
|
||
if ($fail) {
|
||
$body = '';
|
||
Minz_Log::warning('Error fetching content: HTTP code ' . $c_status . ': ' . $c_error . ' ' . $url);
|
||
if (in_array($c_status, [429, 503], true)) {
|
||
$retryAfter = FreshRSS_http_Util::setRetryAfter($url, $proxy, $headers['retry-after'] ?? '');
|
||
if ($c_status === 429) {
|
||
$errorMessage = 'HTTP 429 Too Many Requests! [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
|
||
} elseif ($c_status === 503) {
|
||
$errorMessage = 'HTTP 503 Service Unavailable! [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
|
||
}
|
||
if ($retryAfter > 0) {
|
||
$errorMessage .= ' We may retry after ' . date('c', $retryAfter);
|
||
}
|
||
}
|
||
} elseif (!is_string($body) || strlen($body) === 0) { // TODO: Implement HTTP 410 Gone
|
||
$body = '';
|
||
} else {
|
||
if (in_array($type, ['html', 'json', 'opml', 'xml'], true)) {
|
||
$body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM
|
||
}
|
||
if (in_array($type, ['html', 'xml', 'opml'], true)) {
|
||
$body = self::enforceHttpEncoding($body, $c_content_type);
|
||
}
|
||
if (in_array($type, ['html'], true)) {
|
||
if (stripos($c_content_type, 'text/plain') !== false) {
|
||
// Plain text to be displayed as preformatted text. Prefixed with UTF-8 BOM
|
||
$body = "\xEF\xBB\xBF" . '<pre class="text-plain">' . htmlspecialchars($body, ENT_NOQUOTES, 'UTF-8') . '</pre>';
|
||
} else {
|
||
$body = self::enforceHtmlBase($body, $c_effective_url);
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
|
||
if ($cachePath !== null && file_put_contents($cachePath, $body) === false) {
|
||
Minz_Log::warning("Error saving cache $cachePath for $url");
|
||
}
|
||
|
||
return ['body' => is_string($body) ? $body : '', 'effective_url' => $c_effective_url, 'redirect_count' => $redirs,
|
||
'fail' => $fail, 'status' => $c_status, 'error' => $c_error];
|
||
}
|
||
|
||
/**
|
||
* Converts an IP (v4 or v6) to a binary representation using inet_pton
|
||
*
|
||
* @param string $ip the IP to convert
|
||
* @return string a binary representation of the specified IP
|
||
*/
|
||
private static function ipToBits(string $ip): string {
|
||
$binaryip = '';
|
||
foreach (str_split(inet_pton($ip) ?: '') as $char) {
|
||
$binaryip .= str_pad(decbin(ord($char)), 8, '0', STR_PAD_LEFT);
|
||
}
|
||
return $binaryip;
|
||
}
|
||
|
||
/**
|
||
* Check if an ip belongs to the provided range (in CIDR format)
|
||
*
|
||
* @param string $ip the IP that we want to verify (ex: 192.168.16.1)
|
||
* @param string $range the range to check against (ex: 192.168.16.0/24)
|
||
* @return bool true if the IP is in the range, otherwise false
|
||
*/
|
||
private static function checkCIDR(string $ip, string $range): bool {
|
||
$binary_ip = self::ipToBits($ip);
|
||
if ($binary_ip === '') {
|
||
return false;
|
||
}
|
||
$split = explode('/', $range);
|
||
|
||
$subnet = $split[0] ?? '';
|
||
if ($subnet == '') {
|
||
return false;
|
||
}
|
||
$binary_subnet = self::ipToBits($subnet);
|
||
if ($binary_subnet === '') {
|
||
return false;
|
||
}
|
||
if (strlen($binary_ip) !== strlen($binary_subnet)) {
|
||
return false; // Do not mix IPv4 and IPv6
|
||
}
|
||
|
||
$mask_bits_str = $split[1] ?? '';
|
||
if (!ctype_digit($mask_bits_str)) {
|
||
return false;
|
||
}
|
||
$mask_bits = (int)$mask_bits_str;
|
||
$max_mask_bits = str_contains($ip, ':') ? 128 : 32;
|
||
if ($mask_bits < 0 || $mask_bits > $max_mask_bits) {
|
||
return false; // Reject invalid mask bits lengths
|
||
}
|
||
if ($mask_bits === 0) {
|
||
return true;
|
||
}
|
||
|
||
$ip_net_bits = substr($binary_ip, 0, $mask_bits);
|
||
$subnet_bits = substr($binary_subnet, 0, $mask_bits);
|
||
return $ip_net_bits === $subnet_bits;
|
||
}
|
||
|
||
/**
|
||
* Check if the client (e.g. last proxy) is allowed to send unsafe headers.
|
||
* This uses the `TRUSTED_PROXY` environment variable or the `trusted_sources` configuration option to get an array of the authorized ranges,
|
||
* The connection IP is obtained from the `CONN_REMOTE_ADDR`
|
||
* (if available, to be robust even when using Apache mod_remoteip) or `REMOTE_ADDR` environment variables.
|
||
* @return bool true if the sender’s IP is in one of the ranges defined in the configuration, else false
|
||
*/
|
||
public static function checkTrustedIP(): bool {
|
||
if (!FreshRSS_Context::hasSystemConf()) {
|
||
return false;
|
||
}
|
||
$remoteIp = Minz_Request::connectionRemoteAddress();
|
||
if ($remoteIp === '') {
|
||
return false;
|
||
}
|
||
$trusted = getenv('TRUSTED_PROXY');
|
||
if ($trusted != 0 && is_string($trusted)) {
|
||
$trusted = preg_split('/\s+/', $trusted, -1, PREG_SPLIT_NO_EMPTY);
|
||
}
|
||
if (!is_array($trusted) || empty($trusted)) {
|
||
$trusted = FreshRSS_Context::systemConf()->trusted_sources;
|
||
}
|
||
foreach ($trusted as $cidr) {
|
||
if (self::checkCIDR($remoteIp, $cidr)) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
public static function httpAuthUser(bool $onlyTrusted = true): string {
|
||
$auths = array_unique(array_filter(
|
||
array_intersect_key($_SERVER, ['REMOTE_USER' => '', 'REDIRECT_REMOTE_USER' => '', 'HTTP_REMOTE_USER' => '', 'HTTP_X_WEBAUTH_USER' => '']),
|
||
fn($value) => is_string($value) && $value !== ''
|
||
));
|
||
if (count($auths) > 1) {
|
||
Minz_Log::warning('Multiple HTTP authentication headers!');
|
||
return '';
|
||
}
|
||
|
||
if (!empty($_SERVER['REMOTE_USER']) && is_string($_SERVER['REMOTE_USER'])) {
|
||
return $_SERVER['REMOTE_USER'];
|
||
}
|
||
if (!empty($_SERVER['REDIRECT_REMOTE_USER']) && is_string($_SERVER['REDIRECT_REMOTE_USER'])) {
|
||
return $_SERVER['REDIRECT_REMOTE_USER'];
|
||
}
|
||
if (!$onlyTrusted || self::checkTrustedIP()) {
|
||
if (!empty($_SERVER['HTTP_REMOTE_USER']) && is_string($_SERVER['HTTP_REMOTE_USER'])) {
|
||
return $_SERVER['HTTP_REMOTE_USER'];
|
||
}
|
||
if (!empty($_SERVER['HTTP_X_WEBAUTH_USER']) && is_string($_SERVER['HTTP_X_WEBAUTH_USER'])) {
|
||
return $_SERVER['HTTP_X_WEBAUTH_USER'];
|
||
}
|
||
}
|
||
return '';
|
||
}
|
||
}
|