Fix: handle very big feed (#3416)

* fix: handle big xml files which cause out of memory exceptions by working with chunks in cleanMd5 function (because of preg_replace) and parse (because of xml_parse)

* Review

* Fixes in error handling (case of the last call to xml_parse, case of
error during fopen, break in case of XML error...)
* Takes advantage of the chunking for computing the cache hash
* Larger chunks of 1MB

Co-authored-by: e <bokes74743@tjuln.com>
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
This commit is contained in:
Kiblyn11
2021-02-17 21:50:25 +01:00
committed by GitHub
parent 787e8fa463
commit 0e6ad01dbf
2 changed files with 38 additions and 9 deletions

View File

@@ -1322,12 +1322,24 @@ class SimplePie
function cleanMd5($rss)
{
return md5(preg_replace(array(
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
'#<(media:starRating|media:statistics) [^/<>]+/>#',
'#<!--.+?-->#s',
), '', $rss));
//Process by chunks not to use too much memory
if (($stream = fopen('php://temp', 'r+')) &&
fwrite($stream, $rss) &&
rewind($stream))
{
$ctx = hash_init('md5');
while ($stream_data = fread($stream, 1048576))
{
hash_update($ctx, preg_replace([
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
'#<(media:starRating|media:statistics) [^/<>]+/>#',
'#<!--.+?-->#s',
], '', $stream_data));
}
fclose($stream);
return hash_final($ctx);
}
return '';
}
/**

View File

@@ -181,12 +181,29 @@ class SimplePie_Parser
xml_set_element_handler($xml, 'tag_open', 'tag_close');
// Parse!
if (!xml_parse($xml, $data, true))
if (($stream = fopen('php://temp', 'r+')) &&
fwrite($stream, $data) &&
rewind($stream))
{
//Parse by chunks not to use too much memory
do
{
$stream_data = fread($stream, 1048576);
if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream)))
{
$this->error_code = xml_get_error_code($xml);
$this->error_string = xml_error_string($this->error_code);
$return = false;
break;
}
} while (!feof($stream));
fclose($stream);
}
else
{
$this->error_code = xml_get_error_code($xml);
$this->error_string = xml_error_string($this->error_code);
$return = false;
}
$this->current_line = xml_get_current_line_number($xml);
$this->current_column = xml_get_current_column_number($xml);
$this->current_byte = xml_get_current_byte_index($xml);