Add stem vector search in bio

This commit is contained in:
MartinBraquet
2025-10-11 21:15:03 +02:00
parent 020b9ddb8d
commit daf5350f41
4 changed files with 42 additions and 26 deletions

View File

@@ -109,7 +109,7 @@ export const loadProfiles = async (props: profileQueryType) => {
where(`data->>'userDeleted' != 'true' or data->>'userDeleted' is null`),
...keywords.map(word => where(
`lower(users.name) ilike '%' || lower($(word)) || '%' or lower(bio::text) ilike '%' || lower($(word)) || '%'`,
`lower(users.name) ilike '%' || lower($(word)) || '%' or lower(bio::text) ilike '%' || lower($(word)) || '%' or bio_tsv @@ phraseto_tsquery('english', $(word))`,
{word}
)),

View File

@@ -14,10 +14,14 @@ export function convertRow(row: ProfileAndUserRow): Profile
export function convertRow(row: ProfileAndUserRow | undefined): Profile | null {
if (!row) return null
return {
// Remove internal/search-only fields from the returned profile row
const profile: any = {
...row,
user: { ...row.user, name: row.name, username: row.username } as User,
} as Profile
}
delete profile.bio_text
delete profile.bio_tsv
return profile as Profile
}
const LOVER_COLS = 'profiles.*, name, username, users.data as user'

View File

@@ -115,25 +115,31 @@ CREATE INDEX profiles_bio_trgm_idx
--- bio_text
-- ALTER TABLE profiles ADD COLUMN bio_text tsvector;
--
-- CREATE OR REPLACE FUNCTION profiles_bio_tsvector_update()
-- RETURNS trigger AS $$
-- BEGIN
-- new.bio_text := to_tsvector(
-- 'english',
-- (
-- SELECT string_agg(trim(both '"' from x::text), ' ')
-- FROM jsonb_path_query(new.bio, '$.**.text'::jsonpath) AS x
-- )
-- );
-- RETURN new;
-- END;
-- $$ LANGUAGE plpgsql;
--
-- CREATE TRIGGER profiles_bio_tsvector_trigger
-- BEFORE INSERT OR UPDATE OF bio ON profiles
-- FOR EACH ROW EXECUTE FUNCTION profiles_bio_tsvector_update();
--
-- create index on profiles using gin(bio_text);
ALTER TABLE profiles ADD COLUMN bio_text TEXT;
UPDATE profiles
SET bio_text = (
SELECT string_agg(DISTINCT trim(both '"' from value::text), ' ')
FROM jsonb_path_query(bio, '$.**.text') AS t(value)
);
ALTER TABLE profiles ADD COLUMN bio_tsv tsvector
GENERATED ALWAYS AS (to_tsvector('english', coalesce(bio_text, ''))) STORED;
CREATE INDEX profiles_bio_tsv_idx ON profiles USING GIN (bio_tsv);
CREATE OR REPLACE FUNCTION update_bio_text()
RETURNS trigger AS $$
BEGIN
NEW.bio_text := (
SELECT string_agg(DISTINCT trim(both '"' from value::text), ' ')
FROM jsonb_path_query(NEW.bio, '$.**.text') AS t(value)
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trg_update_bio_text
BEFORE INSERT OR UPDATE OF bio ON profiles
FOR EACH ROW EXECUTE FUNCTION update_bio_text();

View File

@@ -406,6 +406,8 @@ export type Database = {
age: number | null
bio: Json | null
bio_length: number | null
bio_text: string | null
bio_tsv: unknown | null
born_in_location: string | null
city: string
city_latitude: number | null
@@ -452,6 +454,8 @@ export type Database = {
age?: number | null
bio?: Json | null
bio_length?: number | null
bio_text?: string | null
bio_tsv?: unknown | null
born_in_location?: string | null
city: string
city_latitude?: number | null
@@ -467,7 +471,7 @@ export type Database = {
geodb_city_id?: string | null
has_kids?: number | null
height_in_inches?: number | null
id?: never
id?: number
is_smoker?: boolean | null
is_vegetarian_or_vegan?: boolean | null
last_modification_time?: string
@@ -498,6 +502,8 @@ export type Database = {
age?: number | null
bio?: Json | null
bio_length?: number | null
bio_text?: string | null
bio_tsv?: unknown | null
born_in_location?: string | null
city?: string
city_latitude?: number | null
@@ -513,7 +519,7 @@ export type Database = {
geodb_city_id?: string | null
has_kids?: number | null
height_in_inches?: number | null
id?: never
id?: number
is_smoker?: boolean | null
is_vegetarian_or_vegan?: boolean | null
last_modification_time?: string