mirror of
https://github.com/FreshRSS/FreshRSS.git
synced 2026-03-06 15:38:33 -05:00
Fix: handle very big feed (#3416)
* fix: handle big xml files which cause out of memory exceptions by working with chunks in cleanMd5 function (because of preg_replace) and parse (because of xml_parse) * Review * Fixes in error handling (case of the last call to xml_parse, case of error during fopen, break in case of XML error...) * Takes advantage of the chunking for computing the cache hash * Larger chunks of 1MB Co-authored-by: e <bokes74743@tjuln.com> Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
This commit is contained in:
@@ -1322,12 +1322,24 @@ class SimplePie
|
||||
|
||||
function cleanMd5($rss)
|
||||
{
|
||||
return md5(preg_replace(array(
|
||||
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
|
||||
'#<(media:starRating|media:statistics) [^/<>]+/>#',
|
||||
'#<!--.+?-->#s',
|
||||
), '', $rss));
|
||||
|
||||
//Process by chunks not to use too much memory
|
||||
if (($stream = fopen('php://temp', 'r+')) &&
|
||||
fwrite($stream, $rss) &&
|
||||
rewind($stream))
|
||||
{
|
||||
$ctx = hash_init('md5');
|
||||
while ($stream_data = fread($stream, 1048576))
|
||||
{
|
||||
hash_update($ctx, preg_replace([
|
||||
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
|
||||
'#<(media:starRating|media:statistics) [^/<>]+/>#',
|
||||
'#<!--.+?-->#s',
|
||||
], '', $stream_data));
|
||||
}
|
||||
fclose($stream);
|
||||
return hash_final($ctx);
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -181,12 +181,29 @@ class SimplePie_Parser
|
||||
xml_set_element_handler($xml, 'tag_open', 'tag_close');
|
||||
|
||||
// Parse!
|
||||
if (!xml_parse($xml, $data, true))
|
||||
if (($stream = fopen('php://temp', 'r+')) &&
|
||||
fwrite($stream, $data) &&
|
||||
rewind($stream))
|
||||
{
|
||||
//Parse by chunks not to use too much memory
|
||||
do
|
||||
{
|
||||
$stream_data = fread($stream, 1048576);
|
||||
if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream)))
|
||||
{
|
||||
$this->error_code = xml_get_error_code($xml);
|
||||
$this->error_string = xml_error_string($this->error_code);
|
||||
$return = false;
|
||||
break;
|
||||
}
|
||||
} while (!feof($stream));
|
||||
fclose($stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
$this->error_code = xml_get_error_code($xml);
|
||||
$this->error_string = xml_error_string($this->error_code);
|
||||
$return = false;
|
||||
}
|
||||
|
||||
$this->current_line = xml_get_current_line_number($xml);
|
||||
$this->current_column = xml_get_current_column_number($xml);
|
||||
$this->current_byte = xml_get_current_byte_index($xml);
|
||||
|
||||
Reference in New Issue
Block a user