Fix: handle very big feed (#3416)

* fix: handle big xml files which cause out of memory exceptions by working with chunks in cleanMd5 function (because of preg_replace) and parse (because of xml_parse)

* Review

* Fixes in error handling (case of the last call to xml_parse, case of
error during fopen, break in case of XML error...)
* Takes advantage of the chunking for computing the cache hash
* Larger chunks of 1MB

Co-authored-by: e <bokes74743@tjuln.com>
Co-authored-by: Alexandre Alapetite <alexandre@alapetite.fr>
This commit is contained in:
Kiblyn11
2021-02-17 21:50:25 +01:00
committed by GitHub
parent 787e8fa463
commit 0e6ad01dbf
2 changed files with 38 additions and 9 deletions

View File

@@ -1322,12 +1322,24 @@ class SimplePie
function cleanMd5($rss)
{
return md5(preg_replace(array(
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
'#<(media:starRating|media:statistics) [^/<>]+/>#',
'#<!--.+?-->#s',
), '', $rss));
//Process by chunks not to use too much memory
if (($stream = fopen('php://temp', 'r+')) &&
fwrite($stream, $rss) &&
rewind($stream))
{
$ctx = hash_init('md5');
while ($stream_data = fread($stream, 1048576))
{
hash_update($ctx, preg_replace([
'#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+</\\1>#',
'#<(media:starRating|media:statistics) [^/<>]+/>#',
'#<!--.+?-->#s',
], '', $stream_data));
}
fclose($stream);
return hash_final($ctx);
}
return '';
}
/**