mirror of
https://github.com/FreshRSS/FreshRSS.git
synced 2026-03-27 10:43:41 -04:00
Rework fetch favicons (#7767)
* Use main function `httpGet()` instead of local one; * Use HTTP cache, also between users; * Do not default to feed URL when there is no website URL TODO for later: consider supporting Atom's `<icon>` and RSS 2.0's `<image>` https://github.com/FreshRSS/FreshRSS/issues/7774
This commit is contained in:
committed by
GitHub
parent
188cc0d063
commit
e915ebe46e
@@ -722,11 +722,13 @@ class FreshRSS_feed_Controller extends FreshRSS_ActionController {
|
||||
$feed->_name($name);
|
||||
$feedProperties['name'] = $feed->name(false);
|
||||
}
|
||||
if (trim($feed->website()) === '') {
|
||||
if ($feed->website() === '' || $feed->website() === $feed->url()) {
|
||||
$website = html_only_entity_decode($simplePie->get_link());
|
||||
$feed->_website($website == '' ? $feed->url() : $website);
|
||||
$feedProperties['website'] = $feed->website();
|
||||
$feed->faviconPrepare();
|
||||
if ($website !== $feed->website()) {
|
||||
$feed->_website($website);
|
||||
$feedProperties['website'] = $feed->website();
|
||||
$feed->faviconPrepare();
|
||||
}
|
||||
}
|
||||
if (trim($feed->description()) === '') {
|
||||
$description = html_only_entity_decode($simplePie->get_description());
|
||||
|
||||
@@ -369,7 +369,6 @@ class FreshRSS_subscription_Controller extends FreshRSS_ActionController {
|
||||
}
|
||||
|
||||
if ($favicon_uploaded && !$resetFavicon) {
|
||||
require_once(LIB_PATH . '/favicons.php');
|
||||
$max_size = FreshRSS_Context::systemConf()->limits['max_favicon_upload_size'];
|
||||
if ($favicon_size > $max_size) {
|
||||
Minz_Request::bad(_t('feedback.sub.feed.favicon.too_large', format_bytes($max_size)), $url_redirect);
|
||||
|
||||
@@ -176,6 +176,8 @@ class FreshRSS_Feed extends Minz_Model {
|
||||
}
|
||||
|
||||
$attributesOnly = $contents === null && $tmpPath === '';
|
||||
|
||||
require_once(LIB_PATH . '/favicons.php');
|
||||
if (!$attributesOnly && !isImgMime(is_string($contents) ? $contents : '')) {
|
||||
throw new FreshRSS_UnsupportedImageFormat_Exception();
|
||||
}
|
||||
@@ -195,7 +197,6 @@ class FreshRSS_Feed extends Minz_Model {
|
||||
$this->_attribute('customFaviconExt', $extName);
|
||||
$this->_attribute('customFaviconDisallowDel', $disallowDelete);
|
||||
|
||||
require_once(LIB_PATH . '/favicons.php');
|
||||
$newPath = FAVICONS_DIR . $this->hashFavicon(skipCache: true) . '.ico';
|
||||
if ($attributesOnly && !file_exists($newPath)) {
|
||||
$updateFeed = false;
|
||||
@@ -404,7 +405,12 @@ class FreshRSS_Feed extends Minz_Model {
|
||||
if ($this->customFavicon()) {
|
||||
return;
|
||||
}
|
||||
$url = $this->website(fallback: true);
|
||||
$url = $this->website(fallback: false);
|
||||
if ($url === '' || $url === $this->url) {
|
||||
// Get root URL from the feed URL
|
||||
$url = preg_replace('%^(https?://[^/]+).*$%i', '$1/', $this->url) ?? $this->url;
|
||||
}
|
||||
|
||||
$txt = FAVICONS_DIR . $this->hashFavicon() . '.txt';
|
||||
if (@file_get_contents($txt) !== $url) {
|
||||
file_put_contents($txt, $url);
|
||||
|
||||
1
data/cache/.gitignore
vendored
1
data/cache/.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
*.html
|
||||
*.ico
|
||||
*.json
|
||||
*.spc
|
||||
*.xml
|
||||
|
||||
@@ -22,81 +22,14 @@ function isImgMime(string $content): bool {
|
||||
return $isImage;
|
||||
}
|
||||
|
||||
/** @param array<int,int|bool|string> $curlOptions */
|
||||
function downloadHttp(string &$url, array $curlOptions = []): string {
|
||||
if (($retryAfter = FreshRSS_http_Util::getRetryAfter($url)) > 0) {
|
||||
Minz_Log::warning('For that domain, will first retry favicon after ' . date('c', $retryAfter) . '. ' . \SimplePie\Misc::url_remove_credentials($url));
|
||||
return '';
|
||||
}
|
||||
|
||||
syslog(LOG_INFO, 'FreshRSS Favicon GET ' . $url);
|
||||
$url2 = checkUrl($url);
|
||||
if ($url2 == false) {
|
||||
return '';
|
||||
}
|
||||
$url = $url2;
|
||||
|
||||
$ch = curl_init($url);
|
||||
if ($ch === false) {
|
||||
return '';
|
||||
}
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_HEADER => true,
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_TIMEOUT => 15,
|
||||
CURLOPT_USERAGENT => FRESHRSS_USERAGENT,
|
||||
CURLOPT_MAXREDIRS => 10,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_ENCODING => '', //Enable all encodings
|
||||
//CURLOPT_VERBOSE => 1, // To debug sent HTTP headers
|
||||
]);
|
||||
|
||||
FreshRSS_Context::initSystem();
|
||||
if (FreshRSS_Context::hasSystemConf()) {
|
||||
curl_setopt_array($ch, FreshRSS_Context::systemConf()->curl_options);
|
||||
}
|
||||
|
||||
curl_setopt_array($ch, $curlOptions);
|
||||
|
||||
$response = curl_exec($ch);
|
||||
$c_status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
$c_effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
|
||||
curl_close($ch);
|
||||
|
||||
$parser = new \SimplePie\HTTP\Parser(is_string($response) ? $response : '');
|
||||
if ($parser->parse()) {
|
||||
$headers = $parser->headers;
|
||||
$body = $parser->body;
|
||||
} else {
|
||||
$headers = [];
|
||||
$body = false;
|
||||
}
|
||||
|
||||
if (in_array($c_status, [429, 503], true)) {
|
||||
$retryAfter = FreshRSS_http_Util::setRetryAfter($url, $headers['retry-after'] ?? '');
|
||||
if ($c_status === 429) {
|
||||
$errorMessage = 'HTTP 429 Too Many Requests! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
|
||||
} elseif ($c_status === 503) {
|
||||
$errorMessage = 'HTTP 503 Service Unavailable! Searching favicon [' . \SimplePie\Misc::url_remove_credentials($url) . ']';
|
||||
}
|
||||
if ($retryAfter > 0) {
|
||||
$errorMessage .= ' We may retry after ' . date('c', $retryAfter);
|
||||
}
|
||||
}
|
||||
|
||||
$url2 = checkUrl($c_effective_url);
|
||||
if ($url2 != false) {
|
||||
$url = $url2; //Possible redirect
|
||||
}
|
||||
|
||||
return $c_status === 200 && is_string($body) ? $body : '';
|
||||
function faviconCachePath(string $url): string {
|
||||
return CACHE_PATH . '/' . sha1($url) . '.ico';
|
||||
}
|
||||
|
||||
function searchFavicon(string &$url): string {
|
||||
function searchFavicon(string $url): string {
|
||||
$dom = new DOMDocument();
|
||||
$html = downloadHttp($url);
|
||||
|
||||
if ($html == '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
|
||||
['body' => $html, 'effective_url' => $effective_url, 'fail' => $fail] = httpGet($url, cachePath: CACHE_PATH . '/' . sha1($url) . '.html', type: 'html');
|
||||
if ($fail || $html === '' || !@$dom->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
@@ -110,14 +43,14 @@ function searchFavicon(string &$url): string {
|
||||
// Use the base element for relative paths, if there is one
|
||||
$baseElements = $xpath->query('//base[@href]');
|
||||
$baseElement = ($baseElements !== false && $baseElements->length > 0) ? $baseElements->item(0) : null;
|
||||
$baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $url;
|
||||
$baseUrl = ($baseElement instanceof DOMElement) ? $baseElement->getAttribute('href') : $effective_url;
|
||||
|
||||
foreach ($links as $link) {
|
||||
if (!$link instanceof DOMElement) {
|
||||
continue;
|
||||
}
|
||||
$href = trim($link->getAttribute('href'));
|
||||
$urlParts = parse_url($url);
|
||||
$urlParts = parse_url($effective_url);
|
||||
|
||||
// Handle protocol-relative URLs by adding the current URL's scheme
|
||||
if (substr($href, 0, 2) === '//') {
|
||||
@@ -133,7 +66,9 @@ function searchFavicon(string &$url): string {
|
||||
if ($iri == false) {
|
||||
return '';
|
||||
}
|
||||
$favicon = downloadHttp($iri, [CURLOPT_REFERER => $url]);
|
||||
$favicon = httpGet($iri, faviconCachePath($iri), 'ico', curl_options: [
|
||||
CURLOPT_REFERER => $effective_url,
|
||||
])['body'];
|
||||
if (isImgMime($favicon)) {
|
||||
return $favicon;
|
||||
}
|
||||
@@ -152,7 +87,9 @@ function download_favicon(string $url, string $dest): bool {
|
||||
}
|
||||
if ($favicon == '') {
|
||||
$link = $rootUrl . 'favicon.ico';
|
||||
$favicon = downloadHttp($link, [CURLOPT_REFERER => $url]);
|
||||
$favicon = httpGet($link, faviconCachePath($link), 'ico', curl_options: [
|
||||
CURLOPT_REFERER => $url,
|
||||
])['body'];
|
||||
if (!isImgMime($favicon)) {
|
||||
$favicon = '';
|
||||
}
|
||||
|
||||
@@ -432,13 +432,9 @@ function sanitizeHTML(string $data, string $base = '', ?int $maxLength = null):
|
||||
|
||||
function cleanCache(int $hours = 720): void {
|
||||
// N.B.: GLOB_BRACE is not available on all platforms
|
||||
$files = array_merge(
|
||||
glob(CACHE_PATH . '/*.html', GLOB_NOSORT) ?: [],
|
||||
glob(CACHE_PATH . '/*.json', GLOB_NOSORT) ?: [],
|
||||
glob(CACHE_PATH . '/*.spc', GLOB_NOSORT) ?: [],
|
||||
glob(CACHE_PATH . '/*.xml', GLOB_NOSORT) ?: []);
|
||||
$files = glob(CACHE_PATH . '/*.*', GLOB_NOSORT) ?: [];
|
||||
foreach ($files as $file) {
|
||||
if (substr($file, -10) === 'index.html') {
|
||||
if (str_ends_with($file, 'index.html')) {
|
||||
continue;
|
||||
}
|
||||
$cacheMtime = @filemtime($file);
|
||||
@@ -543,7 +539,7 @@ function enforceHtmlBase(string $html, string $href): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $type {html,json,opml,xml}
|
||||
* @param string $type {html,ico,json,opml,xml}
|
||||
* @param array<string,mixed> $attributes
|
||||
* @param array<int,mixed> $curl_options
|
||||
* @return array{body:string,effective_url:string,redirect_count:int,fail:bool}
|
||||
@@ -574,7 +570,7 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
syslog(LOG_INFO, 'FreshRSS GET ' . $type . ' ' . \SimplePie\Misc::url_remove_credentials($url));
|
||||
}
|
||||
|
||||
$accept = '*/*;q=0.8';
|
||||
$accept = '';
|
||||
switch ($type) {
|
||||
case 'json':
|
||||
$accept = 'application/json,application/feed+json,application/javascript;q=0.9,text/javascript;q=0.8,*/*;q=0.7';
|
||||
@@ -585,6 +581,9 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
case 'xml':
|
||||
$accept = 'application/xml,application/xhtml+xml,text/xml;q=0.9,*/*;q=0.8';
|
||||
break;
|
||||
case 'ico':
|
||||
$accept = 'image/x-icon,image/vnd.microsoft.icon,image/ico,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.1';
|
||||
break;
|
||||
case 'html':
|
||||
default:
|
||||
$accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
|
||||
@@ -673,9 +672,13 @@ function httpGet(string $url, string $cachePath, string $type = 'html', array $a
|
||||
} elseif (!is_string($body) || strlen($body) === 0) {
|
||||
$body = '';
|
||||
} else {
|
||||
$body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM
|
||||
if ($type !== 'json') {
|
||||
if (in_array($type, ['html', 'json', 'opml', 'xml'], true)) {
|
||||
$body = trim($body, " \n\r\t\v"); // Do not trim \x00 to avoid breaking a BOM
|
||||
}
|
||||
if (in_array($type, ['html', 'xml', 'opml'], true)) {
|
||||
$body = enforceHttpEncoding($body, $c_content_type);
|
||||
}
|
||||
if (in_array($type, ['html'], true)) {
|
||||
$body = enforceHtmlBase($body, $c_effective_url);
|
||||
}
|
||||
}
|
||||
|
||||
6
p/f.php
6
p/f.php
@@ -39,6 +39,12 @@ if (($ico_mtime == false || $ico_mtime < $txt_mtime || ($ico_mtime < time() - (m
|
||||
show_default_favicon(1800);
|
||||
exit();
|
||||
}
|
||||
|
||||
FreshRSS_Context::initSystem();
|
||||
if (!FreshRSS_Context::hasSystemConf()) {
|
||||
header('HTTP/1.1 500 Internal Server Error');
|
||||
die('Invalid system init!');
|
||||
}
|
||||
if (!download_favicon($url, $ico)) {
|
||||
// Download failed
|
||||
if ($ico_mtime == false) {
|
||||
|
||||
Reference in New Issue
Block a user