Bug récupération flux tronqués

Corrige https://github.com/marienfressinaud/FreshRSS/issues/381
This commit is contained in:
Alexandre Alapetite
2014-01-22 21:34:59 +01:00
parent 922129c4db
commit bc6aba67bb
2 changed files with 51 additions and 47 deletions

View File

@@ -187,54 +187,12 @@ class FreshRSS_Feed extends Minz_Model {
Minz_Exception::ERROR
);
} else {
$feed = new SimplePie ();
$feed->set_useragent(Minz_Translate::t ('freshrss') . '/' . FRESHRSS_VERSION . ' (' . PHP_OS . '; ' . FRESHRSS_WEBSITE . ') ' . SIMPLEPIE_NAME . '/' . SIMPLEPIE_VERSION);
$url = htmlspecialchars_decode ($this->url, ENT_QUOTES);
if ($this->httpAuth != '') {
$url = preg_replace ('#((.+)://)(.+)#', '${1}' . $this->httpAuth . '@${3}', $url);
}
$feed = customSimplePie();
$feed->set_feed_url ($url);
$feed->set_cache_location (CACHE_PATH);
$feed->set_cache_duration(1500);
$feed->strip_htmltags (array (
'base', 'blink', 'body', 'doctype', 'embed',
'font', 'form', 'frame', 'frameset', 'html',
'input', 'marquee', 'meta', 'noscript',
'object', 'param', 'plaintext', 'script', 'style',
));
$feed->strip_attributes(array_merge($feed->strip_attributes, array(
'autoplay', 'onload', 'onunload', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup',
'onmouseover', 'onmousemove', 'onmouseout', 'onfocus', 'onblur',
'onkeypress', 'onkeydown', 'onkeyup', 'onselect', 'onchange', 'seamless')));
$feed->add_attributes(array(
'img' => array('lazyload' => ''), //http://www.w3.org/TR/resource-priorities/
'audio' => array('preload' => 'none'),
'iframe' => array('postpone' => '', 'sandbox' => 'allow-scripts allow-same-origin'),
'video' => array('postpone' => '', 'preload' => 'none'),
));
$feed->set_url_replacements(array(
'a' => 'href',
'area' => 'href',
'audio' => 'src',
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'iframe' => 'src',
'img' => array(
'longdesc',
'src'
),
'input' => 'src',
'ins' => 'cite',
'q' => 'cite',
'source' => 'src',
'track' => 'src',
'video' => array(
'poster',
'src',
),
));
$feed->init ();
if ($feed->error ()) {

View File

@@ -106,13 +106,59 @@ function html_only_entity_decode($text) {
return strtr($text, $htmlEntitiesOnly);
}
function sanitizeHTML($data) {
function customSimplePie() {
$simplePie = new SimplePie();
$simplePie->set_useragent(Minz_Translate::t('freshrss') . '/' . FRESHRSS_VERSION . ' (' . PHP_OS . '; ' . FRESHRSS_WEBSITE . ') ' . SIMPLEPIE_NAME . '/' . SIMPLEPIE_VERSION);
$simplePie->set_cache_location(CACHE_PATH);
$simplePie->set_cache_duration(1500);
$simplePie->strip_htmltags(array(
'base', 'blink', 'body', 'doctype', 'embed',
'font', 'form', 'frame', 'frameset', 'html',
'input', 'marquee', 'meta', 'noscript',
'object', 'param', 'plaintext', 'script', 'style',
));
$simplePie->strip_attributes(array_merge($simplePie->strip_attributes, array(
'autoplay', 'onload', 'onunload', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup',
'onmouseover', 'onmousemove', 'onmouseout', 'onfocus', 'onblur',
'onkeypress', 'onkeydown', 'onkeyup', 'onselect', 'onchange', 'seamless')));
$simplePie->add_attributes(array(
'img' => array('lazyload' => ''), //http://www.w3.org/TR/resource-priorities/
'audio' => array('preload' => 'none'),
'iframe' => array('postpone' => '', 'sandbox' => 'allow-scripts allow-same-origin'),
'video' => array('postpone' => '', 'preload' => 'none'),
));
$simplePie->set_url_replacements(array(
'a' => 'href',
'area' => 'href',
'audio' => 'src',
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'iframe' => 'src',
'img' => array(
'longdesc',
'src'
),
'input' => 'src',
'ins' => 'cite',
'q' => 'cite',
'source' => 'src',
'track' => 'src',
'video' => array(
'poster',
'src',
),
));
return $simplePie;
}
function sanitizeHTML($data, $base = '') {
static $simplePie = null;
if ($simplePie == null) {
$simplePie = new SimplePie();
$simplePie = customSimplePie();
$simplePie->init();
}
return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_MAYBE_HTML));
return html_only_entity_decode($simplePie->sanitize->sanitize($data, SIMPLEPIE_CONSTRUCT_HTML, $base));
}
/* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */
@@ -125,7 +171,7 @@ function get_content_by_parsing ($url, $path) {
if ($html) {
$doc = phpQuery::newDocument ($html);
$content = $doc->find ($path);
return sanitizeHTML($content->__toString());
return sanitizeHTML($content->__toString(), $url);
} else {
throw new Exception ();
}