Files
FreshRSS/app/Models/BooleanSearch.php
Alexandre Alapetite b1d24fbdb7 PHPStan 2.0 (#7131)
* PHPStan 2.0
fix https://github.com/FreshRSS/FreshRSS/issues/6989
https://github.com/phpstan/phpstan/releases/tag/2.0.0
https://github.com/phpstan/phpstan/blob/2.0.x/UPGRADING.md

* More

* More

* Done

* fix i18n CLI

* Restore a PHPStan Next test
For work towards PHPStan Level 10

* 4 more on Level 10

* fix getTagsForEntry

* API at Level 10

* More Level 10

* Finish Minz at Level 10

* Finish CLI at Level 10

* Finish Controllers at Level 10

* More Level 10

* More

* Pass bleedingEdge

* Clean PHPStan options and add TODOs

* Level 10 for main config

* More

* Consitency array vs. list

* Sanitize themes get_infos

* Simplify TagDAO->getTagsForEntries()

* Finish reportAnyTypeWideningInVarTag

* Prepare checkBenevolentUnionTypes and checkImplicitMixed

* Fixes

* Refix

* Another fix

* Casing of __METHOD__ constant
2024-12-27 12:12:49 +01:00

429 lines
12 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
declare(strict_types=1);
/**
* Contains Boolean search from the search form.
*/
class FreshRSS_BooleanSearch implements \Stringable {
private string $raw_input = '';
/** @var list<FreshRSS_BooleanSearch|FreshRSS_Search> */
private array $searches = [];
/**
* @param string $input
* @param int $level
* @param 'AND'|'OR'|'AND NOT'|'OR NOT' $operator
* @param bool $allowUserQueries
*/
public function __construct(
string $input,
int $level = 0,
private readonly string $operator = 'AND',
bool $allowUserQueries = true
) {
$input = trim($input);
if ($input === '') {
return;
}
if ($level === 0) {
$input = preg_replace('/:&quot;(.*?)&quot;/', ':"\1"', $input);
if (!is_string($input)) {
return;
}
$input = preg_replace('/(?<=[\s(!-]|^)&quot;(.*?)&quot;/', '"\1"', $input);
if (!is_string($input)) {
return;
}
}
$this->raw_input = $input;
if ($level === 0) {
$input = self::escapeLiteralParentheses($input);
$input = $this->parseUserQueryNames($input, $allowUserQueries);
$input = $this->parseUserQueryIds($input, $allowUserQueries);
$input = trim($input);
}
$input = self::consistentOrParentheses($input);
// Either parse everything as a series of BooleanSearchs combined by implicit AND
// or parse everything as a series of Searchs combined by explicit OR
$this->parseParentheses($input, $level) || $this->parseOrSegments($input);
}
/**
* Parse the user queries (saved searches) by name and expand them in the input string.
*/
private function parseUserQueryNames(string $input, bool $allowUserQueries = true): string {
$all_matches = [];
if (preg_match_all('/\bsearch:(?P<delim>[\'"])(?P<search>.*)(?P=delim)/U', $input, $matchesFound)) {
$all_matches[] = $matchesFound;
}
if (preg_match_all('/\bsearch:(?P<search>[^\s"]*)/', $input, $matchesFound)) {
$all_matches[] = $matchesFound;
}
if (!empty($all_matches)) {
$queries = [];
foreach (FreshRSS_Context::userConf()->queries as $raw_query) {
if (($raw_query['name'] ?? '') !== '' && ($raw_query['search'] ?? '') !== '') {
$queries[$raw_query['name']] = trim($raw_query['search']);
}
}
$fromS = [];
$toS = [];
foreach ($all_matches as $matches) {
if (empty($matches['search'])) {
continue;
}
for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
$name = trim($matches['search'][$i]);
if (!empty($queries[$name])) {
$fromS[] = $matches[0][$i];
if ($allowUserQueries) {
$toS[] = '(' . self::escapeLiteralParentheses($queries[$name]) . ')';
} else {
$toS[] = '';
}
}
}
}
$input = str_replace($fromS, $toS, $input);
}
return $input;
}
/**
* Parse the user queries (saved searches) by ID and expand them in the input string.
*/
private function parseUserQueryIds(string $input, bool $allowUserQueries = true): string {
$all_matches = [];
if (preg_match_all('/\bS:(?P<search>\d+)/', $input, $matchesFound)) {
$all_matches[] = $matchesFound;
}
if (!empty($all_matches)) {
$queries = [];
foreach (FreshRSS_Context::userConf()->queries as $raw_query) {
$queries[] = trim($raw_query['search'] ?? '');
}
$fromS = [];
$toS = [];
foreach ($all_matches as $matches) {
if (empty($matches['search'])) {
continue;
}
for ($i = count($matches['search']) - 1; $i >= 0; $i--) {
// Index starting from 1
$id = (int)(trim($matches['search'][$i])) - 1;
if (!empty($queries[$id])) {
$fromS[] = $matches[0][$i];
if ($allowUserQueries) {
$toS[] = '(' . self::escapeLiteralParentheses($queries[$id]) . ')';
} else {
$toS[] = '';
}
}
}
}
$input = str_replace($fromS, $toS, $input);
}
return $input;
}
/**
* Temporarily escape parentheses used in regex expressions or inside quoted strings.
*/
public static function escapeLiteralParentheses(string $input): string {
return preg_replace_callback('%(?<=[\\s(:#!-]|^)(?<![\\\\])(?P<delim>[\'"/]).+?(?<!\\\\)(?P=delim)[im]*%',
fn(array $matches): string => str_replace(['(', ')'], ['\\u0028', '\\u0029'], $matches[0]),
$input
) ?? '';
}
public static function unescapeLiteralParentheses(string $input): string {
return str_replace(['\\u0028', '\\u0029'], ['(', ')'], $input);
}
/**
* Example: 'ab cd OR ef OR "gh ij"' becomes '(ab cd) OR (ef) OR ("gh ij")'
*/
public static function addOrParentheses(string $input): string {
$input = trim($input);
if ($input === '') {
return '';
}
$splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
$ns = count($splits);
if ($ns <= 1) {
return $input;
}
$result = '';
$segment = '';
for ($i = 0; $i < $ns; $i++) {
$segment .= $splits[$i];
if (trim($segment) === '') {
$segment = '';
} elseif (strcasecmp($segment, 'OR') === 0) {
$result .= $segment . ' ';
$segment = '';
} else {
$quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
if ($quotes % 2 === 0) {
$segment = trim($segment);
if (in_array($segment, ['!', '-'], true)) {
$result .= $segment;
} else {
$result .= '(' . $segment . ') ';
}
$segment = '';
}
}
}
$segment = trim($segment);
if (in_array($segment, ['!', '-'], true)) {
$result .= $segment;
} elseif ($segment !== '') {
$result .= '(' . $segment . ')';
}
return trim($result);
}
/**
* If the query contains a mix of `OR` expressions with and without parentheses,
* then add parentheses to make the query consistent.
* Example: '(ab (cd OR ef)) OR gh OR ij OR (kl)' becomes '(ab ((cd) OR (ef))) OR (gh) OR (ij) OR (kl)'
*/
public static function consistentOrParentheses(string $input): string {
if (!preg_match('/(?<!\\\\)\\(/', $input)) {
// No unescaped parentheses in the input
return trim($input);
}
$parenthesesCount = 0;
$result = '';
$segment = '';
$length = strlen($input);
for ($i = 0; $i < $length; $i++) {
$c = $input[$i];
$backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
if (!$backslashed) {
if ($c === '(') {
if ($parenthesesCount === 0) {
if ($segment !== '') {
$result = rtrim($result) . ' ' . self::addOrParentheses($segment);
$negation = preg_match('/[!-]$/', $result);
if (!$negation) {
$result .= ' ';
}
$segment = '';
}
$c = '';
}
$parenthesesCount++;
} elseif ($c === ')') {
$parenthesesCount--;
if ($parenthesesCount === 0) {
$segment = self::consistentOrParentheses($segment);
if ($segment !== '') {
$result .= '(' . $segment . ')';
$segment = '';
}
$c = '';
}
}
}
$segment .= $c;
}
if (trim($segment) !== '') {
$result = rtrim($result);
$negation = preg_match('/[!-]$/', $segment);
if (!$negation) {
$result .= ' ';
}
$result .= self::addOrParentheses($segment);
}
return trim($result);
}
/** @return bool True if some parenthesis logic took over, false otherwise */
private function parseParentheses(string $input, int $level): bool {
$input = trim($input);
$length = strlen($input);
$i = 0;
$before = '';
$hasParenthesis = false;
$nextOperator = 'AND';
while ($i < $length) {
$c = $input[$i];
$backslashed = $i >= 1 ? $input[$i - 1] === '\\' : false;
if ($c === '(' && !$backslashed) {
$hasParenthesis = true;
$before = trim($before);
if (preg_match('/[!-]$/', $before)) {
// Trim trailing negation
$before = rtrim($before, ' !-');
$isOr = preg_match('/\bOR$/i', $before);
if ($isOr) {
// Trim trailing OR
$before = substr($before, 0, -2);
}
// The text prior to the negation is a BooleanSearch
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
if (count($searchBefore->searches()) > 0) {
$this->searches[] = $searchBefore;
}
$before = '';
// The next BooleanSearch will have to be combined with AND NOT or OR NOT instead of default AND
$nextOperator = $isOr ? 'OR NOT' : 'AND NOT';
} elseif (preg_match('/\bOR$/i', $before)) {
// Trim trailing OR
$before = substr($before, 0, -2);
// The text prior to the OR is a BooleanSearch
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
if (count($searchBefore->searches()) > 0) {
$this->searches[] = $searchBefore;
}
$before = '';
// The next BooleanSearch will have to be combined with OR instead of default AND
$nextOperator = 'OR';
} elseif ($before !== '') {
// The text prior to the opening parenthesis is a BooleanSearch
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
if (count($searchBefore->searches()) > 0) {
$this->searches[] = $searchBefore;
}
$before = '';
}
// Search the matching closing parenthesis
$parentheses = 1;
$sub = '';
$i++;
while ($i < $length) {
$c = $input[$i];
$backslashed = $input[$i - 1] === '\\';
if ($c === '(' && !$backslashed) {
// One nested level deeper
$parentheses++;
$sub .= $c;
} elseif ($c === ')' && !$backslashed) {
$parentheses--;
if ($parentheses === 0) {
// Found the matching closing parenthesis
$searchSub = new FreshRSS_BooleanSearch($sub, $level + 1, $nextOperator);
$nextOperator = 'AND';
if (count($searchSub->searches()) > 0) {
$this->searches[] = $searchSub;
}
$sub = '';
break;
} else {
$sub .= $c;
}
} else {
$sub .= $c;
}
$i++;
}
// $sub = trim($sub);
// if ($sub !== '') {
// // TODO: Consider throwing an error or warning in case of non-matching parenthesis
// }
// } elseif ($c === ')') {
// // TODO: Consider throwing an error or warning in case of non-matching parenthesis
} else {
$before .= $c;
}
$i++;
}
if ($hasParenthesis) {
$before = trim($before);
if (preg_match('/^OR\b/i', $before)) {
// The next BooleanSearch will have to be combined with OR instead of default AND
$nextOperator = 'OR';
// Trim leading OR
$before = substr($before, 2);
}
// The remaining text after the last parenthesis is a BooleanSearch
$searchBefore = new FreshRSS_BooleanSearch($before, $level + 1, $nextOperator);
$nextOperator = 'AND';
if (count($searchBefore->searches()) > 0) {
$this->searches[] = $searchBefore;
}
return true;
}
// There was no parenthesis logic to apply
return false;
}
private function parseOrSegments(string $input): void {
$input = trim($input);
if ($input === '') {
return;
}
$splits = preg_split('/\b(OR)\b/i', $input, -1, PREG_SPLIT_DELIM_CAPTURE) ?: [];
$segment = '';
$ns = count($splits);
for ($i = 0; $i < $ns; $i++) {
$segment = $segment . $splits[$i];
if (trim($segment) === '' || strcasecmp($segment, 'OR') === 0) {
$segment = '';
} else {
$quotes = substr_count($segment, '"') + substr_count($segment, '&quot;');
if ($quotes % 2 === 0) {
$segment = trim($segment);
$this->searches[] = new FreshRSS_Search($segment);
$segment = '';
}
}
}
$segment = trim($segment);
if ($segment !== '') {
$this->searches[] = new FreshRSS_Search($segment);
}
}
/**
* Either a list of FreshRSS_BooleanSearch combined by implicit AND
* or a series of FreshRSS_Search combined by explicit OR
* @return list<FreshRSS_BooleanSearch|FreshRSS_Search>
*/
public function searches(): array {
return $this->searches;
}
/** @return 'AND'|'OR'|'AND NOT'|'OR NOT' depending on how this BooleanSearch should be combined */
public function operator(): string {
return $this->operator;
}
/** @param FreshRSS_BooleanSearch|FreshRSS_Search $search */
public function add($search): void {
$this->searches[] = $search;
}
#[\Override]
public function __toString(): string {
return $this->getRawInput();
}
/** @return string Plain text search query. Must be XML-encoded or URL-encoded depending on the situation */
public function getRawInput(): string {
return $this->raw_input;
}
}