Easier full-text search possibility (#4505)

* Easier full-text search possibility
Contributes to https://github.com/FreshRSS/FreshRSS/issues/1331
Avoid concats in searches to make text indexes easier to build

* Fix tests

* Documentation
This commit is contained in:
Alexandre Alapetite
2022-08-18 12:06:31 +02:00
committed by GitHub
parent 2acf3a4dd8
commit 4f111c5b30
6 changed files with 54 additions and 22 deletions

View File

@@ -10,10 +10,6 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
return true;
}
protected static function sqlConcat($s1, $s2) {
return 'CONCAT(' . $s1 . ',' . $s2 . ')'; //MySQL
}
public static function sqlHexDecode(string $x): string {
return 'unhex(' . $x . ')';
}
@@ -950,47 +946,49 @@ SQL;
}
if ($filter->getInurl()) {
foreach ($filter->getInurl() as $url) {
$sub_search .= 'AND ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ? ';
$sub_search .= 'AND ' . $alias . 'link LIKE ? ';
$values[] = "%{$url}%";
}
}
if ($filter->getNotAuthor()) {
foreach ($filter->getNotAuthor() as $author) {
$sub_search .= 'AND (NOT ' . $alias . 'author LIKE ?) ';
$sub_search .= 'AND ' . $alias . 'author NOT LIKE ? ';
$values[] = "%{$author}%";
}
}
if ($filter->getNotIntitle()) {
foreach ($filter->getNotIntitle() as $title) {
$sub_search .= 'AND (NOT ' . $alias . 'title LIKE ?) ';
$sub_search .= 'AND ' . $alias . 'title NOT LIKE ? ';
$values[] = "%{$title}%";
}
}
if ($filter->getNotTags()) {
foreach ($filter->getNotTags() as $tag) {
$sub_search .= 'AND (NOT ' . $alias . 'tags LIKE ?) ';
$sub_search .= 'AND ' . $alias . 'tags NOT LIKE ? ';
$values[] = "%{$tag}%";
}
}
if ($filter->getNotInurl()) {
foreach ($filter->getNotInurl() as $url) {
$sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ?) ';
$sub_search .= 'AND ' . $alias . 'link NOT LIKE ? ';
$values[] = "%{$url}%";
}
}
if ($filter->getSearch()) {
foreach ($filter->getSearch() as $search_value) {
$sub_search .= 'AND ' . static::sqlConcat($alias . 'title',
static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ? ';
$sub_search .= 'AND (' . $alias . 'title LIKE ? OR ' .
(static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) ';
$values[] = "%{$search_value}%";
$values[] = "%{$search_value}%";
}
}
if ($filter->getNotSearch()) {
foreach ($filter->getNotSearch() as $search_value) {
$sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'title',
static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) ';
$sub_search .= 'AND ' . $alias . 'title NOT LIKE ? AND ' .
(static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' NOT LIKE ? ';
$values[] = "%{$search_value}%";
$values[] = "%{$search_value}%";
}
}

View File

@@ -10,10 +10,6 @@ class FreshRSS_EntryDAOSQLite extends FreshRSS_EntryDAO {
return false;
}
protected static function sqlConcat($s1, $s2) {
return $s1 . '||' . $s2;
}
public static function sqlHexDecode(string $x): string {
return $x;
}

View File

@@ -18,6 +18,7 @@ Learn how to install, update, and backup FreshRSS, as well as how to use the com
* [Setting Up Automatic Feed Updating](08_FeedUpdates.md)
* [Access Control](09_AccessControl.md)
* [Apache/Nginx configuration files](10_ServerConfig.md)
* [Database configuration](DatabaseConfig.md)
* [Using the command line interface (CLI)](https://github.com/FreshRSS/FreshRSS/tree/edge/cli)
* [Configuring the email address validation](05_Configuring_email_validation.md)
* [Frequently asked questions](04_Frequently_Asked_Questions.md)

View File

@@ -0,0 +1,33 @@
# Database configuration
FreshRSS supports the databases SQLite (built-in), PostgreSQL, MySQL / MariaDB.
While the default installation should be fine for most cases, additional tuning can be made.
## Full-text search optimisation in PostgreSQL
Without changing anything in FreshRSS code (which is using [`ILIKE`](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE)), it is possible to make text searches much faster by adding some indexes in PostgreSQL 9.1+ (at the cost of more disc space and slower insertions):
```sql
CREATE EXTENSION pg_trgm;
CREATE INDEX gin_trgm_index_title ON freshrss_entry USING gin(title gin_trgm_ops);
CREATE INDEX gin_trgm_index_content ON freshrss_entry USING gin(content gin_trgm_ops);
```
Where `freshrss_entry` needs to be adapted to the name of the *entry* of a given use, e.g., `freshrss_alice_entry`.
Such an index on the `entry.title` column makes searches such as `intitle:Hello` much faster.
General searches such as `Something` search both on `entry.title` and `entry.content` and therefore require the two indexes shown above.
Likewise, if you wanted to speed up searches on the authors (`author:Alice`), you would add another index:
```sql
CREATE INDEX gin_trgm_index_author ON freshrss_entry USING gin(author gin_trgm_ops);
```
Etc. for other text fields. The list of fields can be seen in [`CREATE TABLE _entry` section](https://github.com/FreshRSS/FreshRSS/blob/edge/app/SQL/install.sql.pgsql.php).
### References
* [GIN: Generalized Inverted Index](https://www.postgresql.org/docs/current/gin-intro.html)
* [`pg_trgm` module for fast text search](https://www.postgresql.org/docs/current/pgtrgm.html#id-1.11.7.42.8)

View File

@@ -1,3 +1,7 @@
# Database Schema
> **TODO**
## See also
* [Database configuration](../../admins/DatabaseConfig.md)

View File

@@ -318,17 +318,17 @@ class SearchTest extends PHPUnit\Framework\TestCase {
],
[
'#tag Hello OR (author:Alice inurl:example) OR (f:3 intitle:World) OR L:12',
' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) OR ((e.author LIKE ? AND e.link||e.guid LIKE ? )) OR' .
' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) OR ((e.author LIKE ? AND e.link LIKE ? )) OR' .
' ((e.id_feed IN (?) AND e.title LIKE ? )) OR ((e.id IN (SELECT et.id_entry FROM `_entrytag` et WHERE et.id_tag IN (?)) )) ',
['%tag%','%Hello%','%Alice%','%example%','3','%World%', '12']
['%tag%','%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', '12']
],
[
'#tag Hello (author:Alice inurl:example) (f:3 intitle:World) label:Bleu',
' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) AND' .
' ((e.author LIKE ? AND e.link||e.guid LIKE ? )) AND' .
' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) AND' .
' ((e.author LIKE ? AND e.link LIKE ? )) AND' .
' ((e.id_feed IN (?) AND e.title LIKE ? )) AND' .
' ((e.id IN (SELECT et.id_entry FROM `_entrytag` et, `_tag` t WHERE et.id_tag = t.id AND t.name IN (?)) )) ',
['%tag%','%Hello%','%Alice%','%example%','3','%World%', 'Bleu']
['%tag%', '%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', 'Bleu']
],
[
'!((author:Alice intitle:hello) OR (author:Bob intitle:world))',