Fix parsing of literal "or" in regex (#8338)

fix https://github.com/FreshRSS/FreshRSS/issues/7879
This commit is contained in:
Alexandre Alapetite
2025-12-23 11:58:51 +01:00
committed by GitHub
parent cf3ca70765
commit 6d57a9de47
3 changed files with 44 additions and 9 deletions

View File

@@ -29,7 +29,7 @@ class FreshRSS_BooleanSearch implements \Stringable {
$this->raw_input = $input;
if ($level === 0) {
$input = self::escapeLiteralParentheses($input);
$input = self::escapeLiterals($input);
$input = $this->parseUserQueryNames($input, $allowUserQueries);
$input = $this->parseUserQueryIds($input, $allowUserQueries);
$input = trim($input);
@@ -79,7 +79,7 @@ class FreshRSS_BooleanSearch implements \Stringable {
if (!empty($queries[$name])) {
$fromS[] = $matches[0][$i];
if ($allowUserQueries) {
$toS[] = '(' . self::escapeLiteralParentheses($queries[$name]) . ')';
$toS[] = '(' . self::escapeLiterals($queries[$name]) . ')';
} else {
$toS[] = '';
}
@@ -130,7 +130,7 @@ class FreshRSS_BooleanSearch implements \Stringable {
$fromS[] = $matches[0][$i];
if ($allowUserQueries) {
$escapedQueries = array_map(fn(string $query): string => self::escapeLiteralParentheses($query), $matchedQueries);
$escapedQueries = array_map(fn(string $query): string => self::escapeLiterals($query), $matchedQueries);
$toS[] = '(' . implode(') OR (', $escapedQueries) . ')';
} else {
$toS[] = '';
@@ -144,17 +144,29 @@ class FreshRSS_BooleanSearch implements \Stringable {
}
/**
* Temporarily escape parentheses used in regex expressions or inside quoted strings.
* Temporarily escape parentheses and 'OR' used in regex expressions or inside "quoted strings".
*/
public static function escapeLiteralParentheses(string $input): string {
public static function escapeLiterals(string $input): string {
return preg_replace_callback('%(?<=[\\s(:#!-]|^)(?<![\\\\])(?P<delim>[\'"/]).+?(?<!\\\\)(?P=delim)[im]*%',
fn(array $matches): string => str_replace(['(', ')'], ['\\u0028', '\\u0029'], $matches[0]),
function (array $matches): string {
$match = $matches[0];
$match = str_replace(['(', ')'], ['\\u0028', '\\u0029'], $match);
$match = preg_replace_callback('/\bOR\b/i', fn(array $ms): string =>
str_replace(['O', 'o', 'R', 'r'], ['\\u004f', '\\u006f', '\\u0052', '\\u0072'], $ms[0]),
$match
) ?? '';
return $match;
},
$input
) ?? '';
}
public static function unescapeLiteralParentheses(string $input): string {
return str_replace(['\\u0028', '\\u0029'], ['(', ')'], $input);
public static function unescapeLiterals(string $input): string {
return str_replace(
['\\u0028', '\\u0029', '\\u004f', '\\u006f', '\\u0052', '\\u0072'],
['(', ')', 'O', 'o', 'R', 'r'],
$input
);
}
/**

View File

@@ -114,7 +114,7 @@ class FreshRSS_Search implements \Stringable {
public function __construct(string $input) {
$input = self::cleanSearch($input);
$input = self::unescape($input);
$input = FreshRSS_BooleanSearch::unescapeLiteralParentheses($input);
$input = FreshRSS_BooleanSearch::unescapeLiterals($input);
$this->raw_input = $input;
$input = $this->parseNotEntryIds($input);

View File

@@ -790,6 +790,21 @@ final class SearchTest extends \PHPUnit\Framework\TestCase {
'((e.title LIKE ? OR e.content LIKE ?) )',
['%https://example.net/test/%', '%https://example.net/test/%']
],
[ // Regex with literal 'or'
'intitle:/^A or B/i',
'(e.title ~* ? )',
['^A or B']
],
[ // Regex with literal 'OR'
'intitle:/^A B OR C D/i OR intitle:/^A B OR C D/i',
'(e.title ~* ? ) OR (e.title ~* ? )',
['^A B OR C D', '^A B OR C D']
],
[ // Quote with literal 'OR'
'intitle:"A B OR C D" OR intitle:"E or F"',
'(e.title LIKE ? ) OR (e.title LIKE ? )',
['%A B OR C D%', '%E or F%']
],
];
}
@@ -997,6 +1012,14 @@ final class SearchTest extends \PHPUnit\Framework\TestCase {
'-intitle:a -inurl:b',
'-intitle:a -inurl:b',
],
[
'intitle:/^A or B/i',
'intitle:/^A or B/i',
],
[
'intitle:/^A B OR C D/i',
'intitle:/^A B OR C D/i',
],
];
}