diff --git a/android/app/build.gradle b/android/app/build.gradle index 3555f107..a2cb03ed 100644 --- a/android/app/build.gradle +++ b/android/app/build.gradle @@ -11,8 +11,8 @@ android { applicationId "com.compassconnections.app" minSdkVersion rootProject.ext.minSdkVersion targetSdkVersion rootProject.ext.targetSdkVersion - versionCode 71 - versionName "1.13.0" + versionCode 72 + versionName "1.14.0" testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" aaptOptions { // Files and dirs to omit from the packaged assets dir, modified to accommodate modern web apps. diff --git a/backend/api/package.json b/backend/api/package.json index 7b326ca7..b2ca8845 100644 --- a/backend/api/package.json +++ b/backend/api/package.json @@ -1,6 +1,6 @@ { "name": "@compass/api", - "version": "1.28.0", + "version": "1.29.0", "private": true, "description": "Backend API endpoints", "main": "src/serve.ts", @@ -28,6 +28,7 @@ "dependencies": { "@google-cloud/monitoring": "4.0.0", "@google-cloud/secret-manager": "4.2.1", + "@mozilla/readability": "0.6.0", "@sentry/node": "10.41.0", "@tiptap/core": "2.10.4", "cors": "2.8.5", @@ -35,8 +36,10 @@ "express": "5.0.0", "firebase-admin": "13.5.0", "gcp-metadata": "6.1.0", + "jsdom": "29.0.1", "jsonwebtoken": "9.0.0", "lodash": "4.17.23", + "marked": "17.0.5", "openapi-types": "12.1.3", "pg-promise": "12.6.1", "posthog-node": "4.11.0", @@ -50,6 +53,7 @@ }, "devDependencies": { "@types/cors": "2.8.17", + "@types/jsdom": "28.0.1", "@types/jsonwebtoken": "^9.0.0", "@types/lodash": "^4.17.0", "@types/swagger-ui-express": "4.1.8", diff --git a/backend/api/src/app.ts b/backend/api/src/app.ts index cbcb3d5a..897994f9 100644 --- a/backend/api/src/app.ts +++ b/backend/api/src/app.ts @@ -10,7 +10,7 @@ import {getLastSeenChannelTime, setChannelLastSeenTime} from 'api/get-channel-se import {getHiddenProfiles} from 'api/get-hidden-profiles' import {getLastMessages} from 'api/get-last-messages' import {getMessagesCountEndpoint} from 'api/get-messages-count' -import {getOptions} from 'api/get-options' +import {getOptionsEndpoint} from 'api/get-options' import {getPinnedCompatibilityQuestions} from 'api/get-pinned-compatibility-questions' import {getChannelMessagesEndpoint} from 'api/get-private-messages' import {getUser} from 'api/get-user' @@ -78,11 +78,12 @@ import {type APIHandler, typedEndpoint} from './helpers/endpoint' import {hideComment} from './hide-comment' import {leavePrivateUserMessageChannel} from './leave-private-user-message-channel' import {likeProfile} from './like-profile' +import {llmExtractProfileEndpoint} from './llm-extract-profile' import {markAllNotifsRead} from './mark-all-notifications-read' import {removePinnedPhoto} from './remove-pinned-photo' import {report} from './report' import {rsvpEvent} from './rsvp-event' -import {searchLocation} from './search-location' +import {searchLocationEndpoint} from './search-location' import {searchNearCity} from './search-near-city' import {searchUsers} from './search-users' import {setCompatibilityAnswer} from './set-compatibility-answer' @@ -602,7 +603,7 @@ const handlers: {[k in APIPath]: APIHandler} = { 'get-likes-and-ships': getLikesAndShips, 'get-messages-count': getMessagesCountEndpoint, 'get-notifications': getNotifications, - 'get-options': getOptions, + 'get-options': getOptionsEndpoint, 'get-profile-answers': getProfileAnswers, 'get-profiles': getProfiles, 'get-supabase-token': getSupabaseToken, @@ -622,7 +623,7 @@ const handlers: {[k in APIPath]: APIHandler} = { 'remove-pinned-photo': removePinnedPhoto, 'save-subscription': saveSubscription, 'save-subscription-mobile': saveSubscriptionMobile, - 'search-location': searchLocation, + 'search-location': searchLocationEndpoint, 'search-near-city': searchNearCity, 'search-users': searchUsers, 'set-channel-seen-time': setChannelLastSeenTime, @@ -644,6 +645,7 @@ const handlers: {[k in APIPath]: APIHandler} = { 'user/by-id/:id/unblock': unblockUser, vote: vote, 'validate-username': validateUsernameEndpoint, + 'llm-extract-profile': llmExtractProfileEndpoint, // 'user/:username': getUser, // 'user/:username/lite': getDisplayUser, // 'user/by-id/:id/lite': getDisplayUser, diff --git a/backend/api/src/get-options.ts b/backend/api/src/get-options.ts index ec3aa727..ac2b3bc8 100644 --- a/backend/api/src/get-options.ts +++ b/backend/api/src/get-options.ts @@ -1,24 +1,44 @@ import {APIErrors, APIHandler} from 'api/helpers/endpoint' -import {OPTION_TABLES} from 'common/profiles/constants' +import {OptionTableKey} from 'common/profiles/constants' +import {validateTable} from 'common/profiles/options' import {tryCatch} from 'common/util/try-catch' import {createSupabaseDirectClient} from 'shared/supabase/init' import {log} from 'shared/utils' -export const getOptions: APIHandler<'get-options'> = async ({table}, _auth) => { - if (!OPTION_TABLES.includes(table)) throw APIErrors.badRequest('Invalid table') +export async function getOptions(table: OptionTableKey, locale?: string): Promise { + validateTable(table) const pg = createSupabaseDirectClient() - const result = await tryCatch( - pg.manyOrNone<{name: string}>(`SELECT interests.name - FROM interests`), - ) + let query: string + const params: any[] = [] + + if (locale) { + // Get translated options for the specified locale + const translationTable = `${table}_translations` + query = ` + SELECT COALESCE(t.name, o.name) as name + FROM ${table} o + LEFT JOIN ${translationTable} t ON o.id = t.option_id AND t.locale = $1 + ORDER BY o.id + ` + params.push(locale) + } else { + // Get default options (fallback to English) + query = `SELECT name FROM ${table} ORDER BY id` + } + + const result = await tryCatch(pg.manyOrNone<{name: string}>(query, params)) if (result.error) { log('Error getting profile options', result.error) throw APIErrors.internalServerError('Error getting profile options') } - const names = result.data.map((row) => row.name) + return result.data.map((row) => row.name) +} + +export const getOptionsEndpoint: APIHandler<'get-options'> = async ({table, locale}, _auth) => { + const names = await getOptions(table, locale) return {names} } diff --git a/backend/api/src/llm-extract-profile.ts b/backend/api/src/llm-extract-profile.ts new file mode 100644 index 00000000..d5917c74 --- /dev/null +++ b/backend/api/src/llm-extract-profile.ts @@ -0,0 +1,374 @@ +import {JSONContent} from '@tiptap/core' +import {getOptions} from 'api/get-options' +import {APIErrors, APIHandler} from 'api/helpers/endpoint' +import {searchLocation} from 'api/search-location' +import { + DIET_CHOICES, + EDUCATION_CHOICES, + GENDERS, + LANGUAGE_CHOICES, + MBTI_CHOICES, + POLITICAL_CHOICES, + RACE_CHOICES, + RELATIONSHIP_CHOICES, + RELATIONSHIP_STATUS_CHOICES, + RELIGION_CHOICES, + ROMANTIC_CHOICES, +} from 'common/choices' +import {debug} from 'common/logger' +import {ProfileWithoutUser} from 'common/profiles/profile' +import {parseJsonContentToText} from 'common/util/parse' +import {createHash} from 'crypto' +import {promises as fs} from 'fs' +import {tmpdir} from 'os' +import {join} from 'path' +import {log} from 'shared/monitoring/log' +import {convertToJSONContent, extractGoogleDocId} from 'shared/parse' + +const MAX_CONTEXT_LENGTH = 7 * 10 * 30 * 50 +const USE_CACHE = true +const CACHE_DIR = join(tmpdir(), 'compass-llm-cache') +const CACHE_TTL_MS = 24 * 60 * 60 * 1000 // 24 hours + +function getCacheKey(content: string): string { + if (!USE_CACHE) return '' + const hash = createHash('sha256') + hash.update(content) + return hash.digest('hex') +} + +async function getCachedResult(cacheKey: string): Promise | null> { + if (!USE_CACHE) return null + try { + const cacheFile = join(CACHE_DIR, `${cacheKey}.json`) + const stats = await fs.stat(cacheFile) + + if (Date.now() - stats.mtime.getTime() > CACHE_TTL_MS) { + await fs.unlink(cacheFile) + return null + } + + const cachedData = await fs.readFile(cacheFile, 'utf-8') + return JSON.parse(cachedData) + } catch { + return null + } +} + +async function setCachedResult( + cacheKey: string, + result: Partial, +): Promise { + if (!USE_CACHE) return + try { + await fs.mkdir(CACHE_DIR, {recursive: true}) + const cacheFile = join(CACHE_DIR, `${cacheKey}.json`) + await fs.writeFile(cacheFile, JSON.stringify(result), 'utf-8') + debug('Cached LLM result', {cacheKey: cacheKey.substring(0, 8)}) + } catch (error) { + log('Failed to write cache', {cacheKey, error}) + // Don't throw - caching failure shouldn't break the main flow + } +} + +async function callGemini(text: string) { + const apiKey = process.env.GEMINI_API_KEY + + if (!apiKey) { + log('GEMINI_API_KEY not configured') + throw APIErrors.internalServerError('Profile extraction service is not configured') + } + + const response = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + contents: [ + { + parts: [ + { + text: text.slice(0, MAX_CONTEXT_LENGTH), + }, + ], + }, + ], + generationConfig: { + temperature: 0, + topP: 0.95, + topK: 40, + responseMimeType: 'application/json', + }, + }), + }, + ) + + if (!response.ok) { + const errorText = await response.text() + log('Gemini API error', {status: response.status, error: errorText}) + throw APIErrors.internalServerError('Failed to extract profile data') + } + + const data = await response.json() + const outputText = data.candidates?.[0]?.content?.parts?.[0]?.text + return outputText +} + +async function _callClaude(text: string) { + // We don't use it as there is no free tier + const apiKey = process.env.ANTHROPIC_API_KEY + + if (!apiKey) { + log('ANTHROPIC_API_KEY not configured') + throw APIErrors.internalServerError('Profile extraction service is not configured') + } + + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01', + }, + body: JSON.stringify({ + model: 'claude-sonnet-4-5', + max_tokens: 1024, + temperature: 0, + messages: [ + { + role: 'user', + content: text.slice(0, MAX_CONTEXT_LENGTH), + }, + ], + }), + }) + + if (!response.ok) { + const errorText = await response.text() + log('Anthropic API error', {status: response.status, error: errorText}) + throw APIErrors.internalServerError('Failed to extract profile data') + } + + const data = await response.json() + const outputText = data.content?.[0]?.text + return outputText +} + +async function callLLM(content: string, locale?: string): Promise> { + const [INTERESTS, CAUSE_AREAS, WORK_AREAS] = await Promise.all([ + getOptions('interests', locale), + getOptions('causes', locale), + getOptions('work', locale), + ]) + + const PROFILE_FIELDS: Partial> = { + // Basic info + age: 'Number. Age in years.', + gender: `One of: ${Object.values(GENDERS).join(', ')}. Infer if you have enough evidence`, + height_in_inches: 'Number. Height converted to inches.', + city: 'String. Current city of residence (English spelling).', + country: 'String. Current country of residence (English spelling).', + city_latitude: 'Number. Latitude of current city.', + city_longitude: 'Number. Longitude of current city.', + + // Background + raised_in_city: 'String. City where they grew up (English spelling).', + raised_in_country: 'String. Country where they grew up (English spelling).', + raised_in_lat: 'Number. Latitude of city where they grew up.', + raised_in_lon: 'Number. Longitude of city where they grew up.', + university: 'String. University or college attended.', + education_level: `One of: ${Object.values(EDUCATION_CHOICES).join(', ')}`, + company: 'String. Current employer or company name.', + occupation_title: 'String. Current job title.', + + // Lifestyle + is_smoker: 'Boolean. Whether they smoke.', + drinks_per_month: 'Number. Estimated alcoholic drinks per month.', + has_kids: 'Number. 0 if no kids, otherwise number of kids.', + wants_kids_strength: + 'Number 0–4. How strongly they want kids (0 = definitely not, 4 = definitely yes).', + diet: `Array. Any of: ${Object.values(DIET_CHOICES).join(', ')}`, + ethnicity: `Array. Any of: ${Object.values(RACE_CHOICES).join(', ')}`, + + // Identity — big5 only if person explicitly states a score, never infer from personality description + mbti: `One of: ${Object.values(MBTI_CHOICES).join(', ')}`, + big5_openness: 'Number 0–100. Only if explicitly self-reported, never infer.', + big5_conscientiousness: 'Number 0–100. Only if explicitly self-reported, never infer.', + big5_extraversion: 'Number 0–100. Only if explicitly self-reported, never infer.', + big5_agreeableness: 'Number 0–100. Only if explicitly self-reported, never infer.', + big5_neuroticism: 'Number 0–100. Only if explicitly self-reported, never infer.', + + // Beliefs + religion: `Array. Any of: ${Object.values(RELIGION_CHOICES).join(', ')}`, + religious_beliefs: + 'String. Free-form elaboration on religious views, only if explicitly stated.', + political_beliefs: `Array. Any of: ${Object.values(POLITICAL_CHOICES).join(', ')}`, + political_details: + 'String. Free-form elaboration on political views, only if explicitly stated.', + + // Preferences + pref_age_min: 'Number. Minimum preferred age of match.', + pref_age_max: 'Number. Maximum preferred age of match.', + pref_gender: `Array. Any of: ${Object.values(GENDERS).join(', ')}`, + pref_relation_styles: `Array. Any of: ${Object.values(RELATIONSHIP_CHOICES).join(', ')}`, + pref_romantic_styles: `Array. Any of: ${Object.values(ROMANTIC_CHOICES).join(', ')}`, + relationship_status: `Array. Any of: ${Object.values(RELATIONSHIP_STATUS_CHOICES).join(', ')}`, + + // Languages + languages: `Array. Any of: ${Object.values(LANGUAGE_CHOICES).join(', ')}. If none, infer from text.`, + + // Free-form + headline: + 'String. Summary of who they are, in their own voice (first person). Maximum 200 characters total. Cannot be null.', + keywords: 'Array of 3–6 short tags summarising the person.', + links: 'Object. Any personal URLs found (site, github, linkedin, twitter, etc.).', + + // Taxonomies — match existing labels first, only add new if truly no close match exists + interests: `Array. Prefer existing labels, only add new if no close match. Any of: ${INTERESTS.join(', ')}`, + causes: `Array. Prefer existing labels, only add new if no close match. Any of: ${CAUSE_AREAS.join(', ')}`, + work: `Array. Use only existing labels, do not add new if no close match. Any of: ${WORK_AREAS.join(', ')}`, + } + + const EXTRACTION_PROMPT = `You are a profile information extraction expert analyzing text from a personal webpage, LinkedIn, bio, or similar source. + +TASK: Extract structured profile data and return it as a single valid JSON object. + +RULES: +- Only extract information that is EXPLICITLY stated — do not infer, guess, or hallucinate +- Return null for missing scalar fields, [] for missing array fields +- For taxonomy fields (interests, causes, work): match existing labels first; only add a new label if truly no existing one is close +- For big5 scores: only populate if the person explicitly states a test result — never infer from personality description +- Return valid JSON only — no markdown, no explanation, no extra text + +SCHEMA (each value describes the expected type and accepted values): +${JSON.stringify(PROFILE_FIELDS, null, 2)} + +TEXT TO ANALYZE: +` + const text = EXTRACTION_PROMPT + content + if (text.length > MAX_CONTEXT_LENGTH) { + log('Content exceeds maximum length', {length: text.length}) + throw APIErrors.badRequest('Content exceeds maximum length') + } + debug({text}) + + const cacheKey = getCacheKey(text) + const cached = await getCachedResult(cacheKey) + if (cached) { + debug('Using cached LLM result', {cacheKey: cacheKey.substring(0, 8)}) + return cached + } + + const outputText = await callGemini(text) + // const outputText = JSON.stringify({}) + + if (!outputText) { + throw APIErrors.internalServerError('Failed to parse LLM response') + } + + let parsed: Partial + try { + parsed = JSON.parse(outputText) + } catch (parseError) { + log('Failed to parse LLM response as JSON', {outputText, parseError}) + throw APIErrors.internalServerError('Failed to parse extracted data') + } + + if (parsed.city) { + if (!parsed.city_latitude || !parsed.city_longitude) { + const result = await searchLocation({term: parsed.city, limit: 1}) + const locations = result.data?.data + parsed.city_latitude = locations?.[0]?.latitude + parsed.city_longitude = locations?.[0]?.longitude + parsed.country ??= locations?.[0]?.country + } + } + if (parsed.raised_in_city) { + if (!parsed.raised_in_lat || !parsed.raised_in_lon) { + const result = await searchLocation({term: parsed.raised_in_city, limit: 1}) + const locations = result.data?.data + parsed.raised_in_lat = locations?.[0]?.latitude + parsed.raised_in_lon = locations?.[0]?.longitude + parsed.raised_in_country ??= locations?.[0]?.country + } + } + + await setCachedResult(cacheKey, parsed) + + return parsed +} + +export async function fetchOnlineProfile(url: string | undefined): Promise { + if (!url) throw APIErrors.badRequest('Content or URL is required') + + try { + // 1. Google Docs shortcut + const googleDocId = extractGoogleDocId(url) + if (googleDocId) { + url = `https://docs.google.com/document/d/${googleDocId}/export?format=html` + } + + // 2. Fetch with proper headers + const response = await fetch(url, { + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; bot/1.0)', + Accept: 'text/html,text/plain,*/*', + }, + }) + + if (!response.ok) { + throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`) + } + + const contentType = response.headers.get('content-type') ?? '' + const content = await response.text() + + log('Fetched content from URL', {url, contentType, contentLength: content.length}) + debug({content}) + + // 3. Route by content type + return convertToJSONContent(content, contentType, url) + } catch (error) { + log('Error fetching URL', {url, error}) + throw APIErrors.badRequest('Failed to fetch content from URL') + } +} + +export const llmExtractProfileEndpoint: APIHandler<'llm-extract-profile'> = async ( + parsedBody, + auth, +) => { + const {url, locale} = parsedBody + let content = parsedBody.content + + log('Extracting profile from content', { + contentLength: content?.length, + url, + locale, + userId: auth.uid, + }) + + if (content && url) { + throw APIErrors.badRequest('Content and URL cannot be provided together') + } + + let bio + if (!content) { + bio = await fetchOnlineProfile(url) + debug(JSON.stringify(bio, null, 2)) + content = parseJsonContentToText(bio) + } + + const extracted = await callLLM(content, locale) + + if (bio) { + extracted.bio = bio + } + + log('Profile extracted successfully', {extracted}) + + return extracted +} diff --git a/backend/api/src/search-location.ts b/backend/api/src/search-location.ts index 0ea37fe9..2dbe2347 100644 --- a/backend/api/src/search-location.ts +++ b/backend/api/src/search-location.ts @@ -1,8 +1,13 @@ +import {ValidatedAPIParams} from 'common/api/schema' import {geodbFetch} from 'common/geodb' import {APIHandler} from './helpers/endpoint' -export const searchLocation: APIHandler<'search-location'> = async (body) => { +export const searchLocationEndpoint: APIHandler<'search-location'> = async (body) => { + return await searchLocation(body) +} + +export async function searchLocation(body: ValidatedAPIParams<'search-location'>) { const {term, limit} = body const endpoint = `/cities?namePrefix=${term}&limit=${limit ?? 10}&offset=0&sort=-population` // const endpoint = `/countries?namePrefix=${term}&limit=${limit ?? 10}&offset=0` diff --git a/backend/api/src/update-options.ts b/backend/api/src/update-options.ts index f4d3176b..3ff03c24 100644 --- a/backend/api/src/update-options.ts +++ b/backend/api/src/update-options.ts @@ -1,18 +1,15 @@ import {APIErrors, APIHandler} from 'api/helpers/endpoint' -import {OPTION_TABLES} from 'common/profiles/constants' +import {OptionTableKey} from 'common/profiles/constants' +import {validateTable} from 'common/profiles/options' import {tryCatch} from 'common/util/try-catch' import {createSupabaseDirectClient, SupabaseDirectClient} from 'shared/supabase/init' import {log} from 'shared/utils' -function validateTable(table: 'interests' | 'causes' | 'work') { - if (!OPTION_TABLES.includes(table)) throw APIErrors.badRequest('Invalid table') -} - export async function setProfileOptions( tx: SupabaseDirectClient, profileId: number, userId: string, - table: 'interests' | 'causes' | 'work', + table: OptionTableKey, values: string[] | undefined | null, ) { validateTable(table) diff --git a/backend/api/tests/unit/get-options.unit.test.ts b/backend/api/tests/unit/get-options.unit.test.ts index 25ec0266..2aeb2576 100644 --- a/backend/api/tests/unit/get-options.unit.test.ts +++ b/backend/api/tests/unit/get-options.unit.test.ts @@ -1,7 +1,7 @@ jest.mock('shared/supabase/init') jest.mock('common/util/try-catch') -import {getOptions} from 'api/get-options' +import {getOptionsEndpoint} from 'api/get-options' import {AuthedUser} from 'api/helpers/endpoint' import {sqlMatch} from 'common/test-utils' import {tryCatch} from 'common/util/try-catch' @@ -31,7 +31,7 @@ describe('getOptions', () => { ;(mockPg.manyOrNone as jest.Mock).mockResolvedValue(null) ;(tryCatch as jest.Mock).mockResolvedValue({data: mockData, error: null}) - const result: any = await getOptions({table: mockTable}, mockAuth, mockReq) + const result: any = await getOptionsEndpoint({table: mockTable}, mockAuth, mockReq) expect(result.names).toContain(mockData[0].name) expect(mockPg.manyOrNone).toBeCalledTimes(1) @@ -48,7 +48,9 @@ describe('getOptions', () => { jest.spyOn(Array.prototype, 'includes').mockReturnValue(false) - expect(getOptions({table: mockTable}, mockAuth, mockReq)).rejects.toThrow('Invalid table') + expect(getOptionsEndpoint({table: mockTable}, mockAuth, mockReq)).rejects.toThrow( + 'Invalid table', + ) }) it('should throw if unable to get profile options', async () => { @@ -60,7 +62,7 @@ describe('getOptions', () => { ;(mockPg.manyOrNone as jest.Mock).mockResolvedValue(null) ;(tryCatch as jest.Mock).mockResolvedValue({data: null, error: Error}) - expect(getOptions({table: mockTable}, mockAuth, mockReq)).rejects.toThrow( + expect(getOptionsEndpoint({table: mockTable}, mockAuth, mockReq)).rejects.toThrow( 'Error getting profile options', ) }) diff --git a/backend/api/tests/unit/search-location.unit.test.ts b/backend/api/tests/unit/search-location.unit.test.ts index 4405529d..5663fe68 100644 --- a/backend/api/tests/unit/search-location.unit.test.ts +++ b/backend/api/tests/unit/search-location.unit.test.ts @@ -1,7 +1,7 @@ jest.mock('common/geodb') import {AuthedUser} from 'api/helpers/endpoint' -import {searchLocation} from 'api/search-location' +import {searchLocationEndpoint} from 'api/search-location' import * as geodbModules from 'common/geodb' describe('searchLocation', () => { @@ -24,7 +24,7 @@ describe('searchLocation', () => { ;(geodbModules.geodbFetch as jest.Mock).mockResolvedValue(mockReturn) - const result = await searchLocation(mockBody, mockAuth, mockReq) + const result = await searchLocationEndpoint(mockBody, mockAuth, mockReq) expect(result).toBe(mockReturn) expect(geodbModules.geodbFetch).toBeCalledTimes(1) diff --git a/backend/shared/src/parse.ts b/backend/shared/src/parse.ts new file mode 100644 index 00000000..1b470a4c --- /dev/null +++ b/backend/shared/src/parse.ts @@ -0,0 +1,458 @@ +import {Readability} from '@mozilla/readability' +import {JSONContent} from '@tiptap/core' +import {debug} from 'common/logger' +import {JSDOM} from 'jsdom' +import {marked} from 'marked' + +export function htmlToJSONContent(html: string, url: string): JSONContent { + const originalDom = new JSDOM(html, {url}) + const classStyles = extractClassStyles(originalDom.window.document) + + const isGoogleDoc = !!extractGoogleDocId(url) + if (!isGoogleDoc) { + const reader = new Readability(originalDom.window.document) + const article = reader.parse() + if (article?.content) { + const cleanDom = new JSDOM(article.content) + return parseHtmlBodyToJSONContent(cleanDom.window.document, classStyles) + } + } + + return parseHtmlBodyToJSONContent(originalDom.window.document, classStyles) +} + +function plainTextToJSONContent(text: string): JSONContent { + const paragraphs = text + .split(/\n{2,}/) // split on blank lines + .map((p) => p.trim()) + .filter(Boolean) + .map((p) => ({ + type: 'paragraph' as const, + content: [{type: 'text' as const, text: p}], + })) + + return {type: 'doc', content: paragraphs} +} + +function extractClassStyles(document: Document): Map> { + const classStyles = new Map>() + + for (const styleEl of document.querySelectorAll('style')) { + const css = styleEl.textContent ?? '' + + // Match .className { prop: value; prop: value } + const ruleRegex = /\.([a-zA-Z0-9_-]+)\s*\{([^}]+)}/g + let match + while ((match = ruleRegex.exec(css)) !== null) { + const className = match[1] + const declarations = match[2] + const styles = parseStyleString(declarations) + classStyles.set(className, styles) + } + } + + return classStyles +} + +export function parseHtmlBodyToJSONContent( + document: Document, + classStyles?: Map>, +): JSONContent { + const body = document.body + classStyles ??= extractClassStyles(document) + const content = parseBlockElements(body.children, classStyles) + return {type: 'doc', content} +} + +function parseBlockElements( + children: HTMLCollection | Element[], + classStyles: Map>, +): JSONContent[] { + const content: JSONContent[] = [] + + for (const el of Array.from(children)) { + const tag = el.tagName.toLowerCase() + const node = parseBlockElement(el, tag, classStyles) + if (!node) continue + + if ((node as any).type === '__fragment') { + // Recursively flatten — fragments can contain fragments + content.push(...flattenFragment(node as any)) + } else { + content.push(node) + } + } + + return content +} + +function flattenFragment(node: any): JSONContent[] { + return node.content.flatMap((child: any) => + child.type === '__fragment' ? flattenFragment(child) : [child], + ) +} + +function parseBlockElement( + el: Element, + tag: string, + classStyles: Map>, +): JSONContent | null { + // console.debug('parseBlockElement', {tag, el}) + // Headings h1–h6 + if (/^h[1-6]$/.test(tag)) { + return { + type: 'heading', + attrs: {level: parseInt(tag[1])}, + content: parseInlineElements(el, classStyles), + } + } + + // Paragraph + if (tag === 'p') { + const inline = parseInlineElements(el, classStyles) + return inline.length > 0 ? {type: 'paragraph', content: inline} : null + } + + // Lists + if (tag === 'ol') { + return { + type: 'orderedList', + attrs: {start: 1}, // ← required by TipTap's OrderedList extension + content: parseListItems(el, classStyles), + } + } + if (tag === 'ul') { + return { + type: 'bulletList', + attrs: {}, + content: parseListItems(el, classStyles), + } + } + // Blockquote + if (tag === 'blockquote') { + return { + type: 'blockquote', + content: parseBlockElements(el.children, classStyles), + } + } + + // Code block
...
+ if (tag === 'pre') { + const codeEl = el.querySelector('code') + const language = codeEl?.className.match(/language-(\w+)/)?.[1] ?? null + return { + type: 'codeBlock', + attrs: {language}, + content: [{type: 'text', text: (codeEl ?? el).textContent ?? ''}], + } + } + + // Inline code outside of pre (treat as paragraph) + if (tag === 'code') { + return { + type: 'paragraph', + content: [{type: 'text', text: el.textContent ?? '', marks: [{type: 'code'}]}], + } + } + + // Horizontal rule + if (tag === 'hr') { + return {type: 'horizontalRule'} + } + + // Image + if (tag === 'img') { + const src = el.getAttribute('src') + if (!src || !src.startsWith('http')) return null + return { + type: 'image', + attrs: { + src, + alt: el.getAttribute('alt') ?? null, + title: el.getAttribute('title') ?? null, + }, + } + } + + // Figure (image + optional caption) + if (tag === 'figure') { + const img = el.querySelector('img') + const caption = el.querySelector('figcaption')?.textContent ?? null + const src = img?.getAttribute('src') + if (!src || !src.startsWith('http')) return null + return { + type: 'image', + attrs: { + src: img?.getAttribute('src'), + alt: img?.getAttribute('alt') ?? caption, + title: caption, + }, + } + } + + // Table + if (tag === 'table') { + return parseTable(el, classStyles) + } + + // Container elements — recurse into children + if (['div', 'section', 'article', 'main', 'header', 'footer', 'aside'].includes(tag)) { + const inner = parseBlockElements(el.children, classStyles) + if (inner.length === 0) return null + if (inner.length === 1) return inner[0] + + // Always use fragment — never paragraph — for multiple block children + return {type: '__fragment', content: inner} as any + } + + return null +} + +function parseStyleString(style: string): Record { + return Object.fromEntries( + style + .split(';') + .map((s) => s.trim()) + .filter(Boolean) + .map((declaration) => { + const [prop, ...rest] = declaration.split(':') + const value = rest.join(':').trim() + // Convert kebab-case to camelCase (e.g. font-weight → fontWeight) + const camelProp = prop.trim().replace(/-([a-z])/g, (_, c) => c.toUpperCase()) + return [camelProp, value] + }), + ) +} + +function parseListItems( + listEl: Element, + classStyles: Map>, +): JSONContent[] { + return Array.from(listEl.querySelectorAll(':scope > li')).map((li) => { + const nestedList = li.querySelector('ul, ol') + const blockContent: JSONContent[] = [ + {type: 'paragraph', content: parseInlineElements(li, classStyles, true)}, + ] + + if (nestedList) { + const nestedTag = nestedList.tagName.toLowerCase() + blockContent.push({ + type: nestedTag === 'ul' ? 'bulletList' : 'orderedList', + content: parseListItems(nestedList, classStyles), + }) + } + + return {type: 'listItem', content: blockContent} + }) +} + +function parseTable( + tableEl: Element, + classStyles: Map>, +): JSONContent { + const rows = Array.from(tableEl.querySelectorAll('tr')) + + return { + type: 'table', + content: rows.map((row, rowIndex) => ({ + type: 'tableRow', + content: Array.from(row.querySelectorAll('td, th')).map((cell) => ({ + type: rowIndex === 0 || cell.tagName.toLowerCase() === 'th' ? 'tableHeader' : 'tableCell', + attrs: { + colspan: parseInt(cell.getAttribute('colspan') ?? '1'), + rowspan: parseInt(cell.getAttribute('rowspan') ?? '1'), + }, + content: [{type: 'paragraph', content: parseInlineElements(cell, classStyles)}], + })), + })), + } +} + +function parseInlineElements( + el: Element, + classStyles: Map>, + skipNested = false, +): JSONContent[] { + const nodes: JSONContent[] = [] + + for (const child of el.childNodes) { + // Plain text node + if (child.nodeType === 3) { + let text = child.textContent ?? '' + + // Remove HTML tags from text + text = text.replace('', '\n') + + if (text.trim()) nodes.push({type: 'text', text}) + continue + } + + if (child.nodeType !== 1) continue + const childEl = child as Element + const tag = childEl.tagName.toLowerCase() + + // Skip nested lists when extracting list item text + if (skipNested && ['ul', 'ol'].includes(tag)) continue + + // Line break + if (tag === 'br') { + nodes.push({type: 'hardBreak'}) + continue + } + + // Inline image + if (tag === 'img') { + const src = childEl.getAttribute('src') + if (src && src.startsWith('http')) nodes.push({type: 'image', attrs: {src}}) + continue + } + + // Marks + const marks = getMarks(childEl, tag, classStyles) + + const isInlineContainer = [ + 'span', + 'a', + 'strong', + 'em', + 'b', + 'i', + 'u', + 's', + 'mark', + 'code', + 'label', + ].includes(tag) + const hasChildElements = childEl.children.length > 0 + + if (isInlineContainer && hasChildElements) { + // Recurse into children and apply this element's marks on top + const innerNodes = parseInlineElements(childEl, classStyles, skipNested) + for (const inner of innerNodes) { + if (inner.type === 'text' && marks.length > 0) { + // Merge marks — avoid duplicates + const existingTypes = new Set((inner.marks ?? []).map((m: any) => m.type)) + const newMarks = marks.filter((m) => !existingTypes.has(m.type as string)) + nodes.push({ + ...inner, + marks: [...(inner.marks ?? []), ...newMarks], + } as JSONContent) + } else { + nodes.push(inner) + } + } + continue + } + + const text = childEl.textContent ?? '' + if (!text) continue + + nodes.push({ + type: 'text', + text, + ...(marks.length > 0 && {marks: marks as Array<{type: string; attrs?: Record}>}), + }) + } + + return nodes +} + +function getMarks( + el: Element, + tag: string, + classStyles: Map>, +): JSONContent[] { + const marks: JSONContent[] = [] + + if (['b', 'strong'].includes(tag)) marks.push({type: 'bold'}) + if (['i', 'em'].includes(tag)) marks.push({type: 'italic'}) + if (tag === 'u') marks.push({type: 'underline'}) + if (['s', 'strike', 'del'].includes(tag)) marks.push({type: 'strike'}) + if (tag === 'code') marks.push({type: 'code'}) + if (tag === 'mark') marks.push({type: 'highlight'}) + + if (tag === 'a') { + const href = cleanHref(el.getAttribute('href') ?? '') + marks.push({ + type: 'link', + attrs: {href, target: '_blank'}, + }) + } + + const style: Record = {} + const classes = Array.from(el.classList) + for (const cls of classes) { + const resolved = classStyles.get(cls) + if (resolved) Object.assign(style, resolved) + } + const inlineStyle = parseStyleString(el.getAttribute('style') ?? '') + Object.assign(style, inlineStyle) + + if (!marks.find((m) => m.type === 'bold') && /^(bold|[7-9]\d{2})$/.test(style.fontWeight ?? '')) { + marks.push({type: 'bold'}) + } + + if (!marks.find((m) => m.type === 'italic') && style.fontStyle === 'italic') { + marks.push({type: 'italic'}) + } + + if (style.textDecoration?.includes('underline') && !marks.find((m) => m.type === 'underline')) { + marks.push({type: 'underline'}) + } + + if (style.textDecoration?.includes('line-through') && !marks.find((m) => m.type === 'strike')) { + marks.push({type: 'strike'}) + } + + return marks +} + +function cleanHref(href: string): string { + try { + const url = new URL(href) + if (url.hostname === 'www.google.com' && url.pathname === '/url') { + return url.searchParams.get('q') ?? href + } + } catch (error) { + debug('Invalid URL:', href, error) + } + return href +} + +export function extractGoogleDocId(url: string) { + const patterns = [ + /\/document\/d\/([a-zA-Z0-9-_]+)/, // standard /d/{id}/ format + /id=([a-zA-Z0-9-_]+)/, // ?id= query param format + /^([a-zA-Z0-9-_]+)$/, // raw ID passed directly + ] + + for (const pattern of patterns) { + const match = url.match(pattern) + if (match) return match[1] + } + + return null +} + +function markdownToJSONContent(markdown: string): JSONContent { + const html = marked(markdown) as string + const dom = new JSDOM(html) + return parseHtmlBodyToJSONContent(dom.window.document) +} + +export function convertToJSONContent( + content: string, + contentType: string, + url: string, +): JSONContent { + if (contentType.includes('text/html')) { + return htmlToJSONContent(content, url) // use Readability for articles + } + + if (contentType.includes('text/markdown') || url.endsWith('.md')) { + return markdownToJSONContent(content) + } + + // plain text fallback + return plainTextToJSONContent(content) +} diff --git a/common/package.json b/common/package.json index f217a3c1..24d86ed2 100644 --- a/common/package.json +++ b/common/package.json @@ -17,6 +17,11 @@ "@tiptap/extension-image": "2.10.4", "@tiptap/extension-link": "2.10.4", "@tiptap/extension-mention": "2.10.4", + "@tiptap/extension-table": "2.10.4", + "@tiptap/extension-table-cell": "2.10.4", + "@tiptap/extension-table-header": "2.10.4", + "@tiptap/extension-table-row": "2.10.4", + "@tiptap/extension-underline": "2.10.4", "@tiptap/pm": "2.10.4", "@tiptap/starter-kit": "2.10.4", "@tiptap/suggestion": "2.10.4", diff --git a/common/src/api/schema.ts b/common/src/api/schema.ts index 094d9f31..6243e9ff 100644 --- a/common/src/api/schema.ts +++ b/common/src/api/schema.ts @@ -11,7 +11,7 @@ import {ChatMessage} from 'common/chat-message' import {Notification} from 'common/notifications' import {CompatibilityScore} from 'common/profiles/compatibility-score' import {MAX_COMPATIBILITY_QUESTION_LENGTH, OPTION_TABLES} from 'common/profiles/constants' -import {Profile, ProfileRow} from 'common/profiles/profile' +import {Profile, ProfileRow, ProfileWithoutUser} from 'common/profiles/profile' import {Stats} from 'common/stats' // mqp: very unscientific, just balancing our willingness to accept load import {PrivateMessageChannel} from 'common/supabase/private-messages' import {Row} from 'common/supabase/utils' @@ -1266,6 +1266,21 @@ export const API = (_apiTypeCheck = { summary: 'Validate if a username is available', tag: 'Users', }, + 'llm-extract-profile': { + method: 'POST', + authed: true, + rateLimited: true, + props: z + .object({ + content: z.string().min(1).optional(), + url: z.string().url().optional(), + locale: z.string().optional(), + }) + .strict(), + returns: {} as Partial, + summary: 'Extract profile information from text using LLM', + tag: 'Profiles', + }, } as const) export type APIPath = keyof typeof API diff --git a/common/src/geodb.ts b/common/src/geodb.ts index 89b29ab6..a1e73974 100644 --- a/common/src/geodb.ts +++ b/common/src/geodb.ts @@ -1,9 +1,12 @@ import {debug} from 'common/logger' import {ProfileRow} from 'common/profiles/profile' +import {sleep} from 'common/util/time' export const geodbHost = 'wft-geo-db.p.rapidapi.com' -export const geodbFetch = async (endpoint: string) => { +export const geodbFetch = async ( + endpoint: string, +): Promise<{status: 'success' | 'failure'; data: any}> => { const apiKey = process.env.GEODB_API_KEY if (!apiKey) { return {status: 'failure', data: 'Missing GEODB API key'} @@ -21,6 +24,11 @@ export const geodbFetch = async (endpoint: string) => { }) if (!res.ok) { + if (res.status === 429) { + debug('geodbFetch', endpoint, 'Rate limited') + await sleep(1100) + return geodbFetch(endpoint) + } throw new Error(`HTTP error! Status: ${res.status} ${await res.text()}`) } diff --git a/common/src/parsing.ts b/common/src/parsing.ts index aaf5904c..9f77d24a 100644 --- a/common/src/parsing.ts +++ b/common/src/parsing.ts @@ -11,3 +11,35 @@ export function trimStrings>(body: T) } return body } + +export const isUrl = (text: string): boolean => { + if (!text || typeof text !== 'string') return false + + // Remove leading/trailing whitespace + const trimmedText = text.trim() + + // If it already starts with a protocol, test as-is + if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(trimmedText)) { + try { + new URL(trimmedText) + return true + } catch { + return false + } + } + + // Try adding https:// prefix for common domain patterns + if ( + /^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/.test(trimmedText) || + /^www\.[a-zA-Z0-9.-]+/.test(trimmedText) + ) { + try { + new URL(`https://${trimmedText}`) + return true + } catch { + return false + } + } + + return false +} diff --git a/common/src/profiles/options.ts b/common/src/profiles/options.ts new file mode 100644 index 00000000..2ec6e040 --- /dev/null +++ b/common/src/profiles/options.ts @@ -0,0 +1,6 @@ +import {APIErrors} from 'common/api/utils' +import {OPTION_TABLES} from 'common/profiles/constants' + +export function validateTable(table: 'interests' | 'causes' | 'work') { + if (!OPTION_TABLES.includes(table)) throw APIErrors.badRequest('Invalid table') +} diff --git a/common/src/secrets.ts b/common/src/secrets.ts index b58322ac..e714905a 100644 --- a/common/src/secrets.ts +++ b/common/src/secrets.ts @@ -28,6 +28,7 @@ export const secrets = ( 'VAPID_PRIVATE_KEY', 'DB_ENC_MASTER_KEY_BASE64', 'GOOGLE_CLIENT_SECRET', + 'GEMINI_API_KEY', // Some typescript voodoo to keep the string literal types while being not readonly. ] as const ).concat() diff --git a/common/src/util/parse.ts b/common/src/util/parse.ts index be760014..d33c854f 100644 --- a/common/src/util/parse.ts +++ b/common/src/util/parse.ts @@ -2,6 +2,11 @@ import {getSchema, getText, getTextSerializersFromSchema, JSONContent} from '@ti import {Image} from '@tiptap/extension-image' import {Link} from '@tiptap/extension-link' import {Mention} from '@tiptap/extension-mention' +import Table from '@tiptap/extension-table' +import TableCell from '@tiptap/extension-table-cell' +import TableHeader from '@tiptap/extension-table-header' +import TableRow from '@tiptap/extension-table-row' +import Underline from '@tiptap/extension-underline' import {Node as ProseMirrorNode} from '@tiptap/pm/model' import {StarterKit} from '@tiptap/starter-kit' import {find} from 'linkifyjs' @@ -49,6 +54,11 @@ export const extensions = [ Iframe.extend({ renderText: ({node}) => ('[embed]' + node.attrs.src ? `(${node.attrs.src})` : ''), }), + Table.configure({resizable: false}), + TableRow, + TableCell, + TableHeader, + Underline, ] const extensionSchema = getSchema(extensions) diff --git a/common/src/util/string.ts b/common/src/util/string.ts index e2f1f12d..2075a015 100644 --- a/common/src/util/string.ts +++ b/common/src/util/string.ts @@ -5,3 +5,8 @@ export function removeEmojis(str: string) { '', ) } + +export function urlize(s: string) { + if (s.startsWith('http://') || s.startsWith('https://')) return s + return `https://${s}` +} diff --git a/package.json b/package.json index 528e34d5..78a8d5a6 100644 --- a/package.json +++ b/package.json @@ -52,11 +52,32 @@ "@tiptap/extension-blockquote": "2.10.4", "@tiptap/extension-bold": "2.10.4", "@tiptap/extension-bubble-menu": "2.10.4", + "@tiptap/extension-bullet-list": "2.10.4", + "@tiptap/extension-code": "2.10.4", + "@tiptap/extension-code-block": "2.10.4", + "@tiptap/extension-document": "2.10.4", + "@tiptap/extension-dropcursor": "2.10.4", "@tiptap/extension-floating-menu": "2.10.4", + "@tiptap/extension-gapcursor": "2.10.4", + "@tiptap/extension-hard-break": "2.10.4", + "@tiptap/extension-heading": "2.10.4", + "@tiptap/extension-history": "2.10.4", "@tiptap/extension-horizontal-rule": "2.10.4", "@tiptap/extension-image": "2.10.4", + "@tiptap/extension-italic": "2.10.4", "@tiptap/extension-link": "2.10.4", + "@tiptap/extension-list-item": "2.10.4", "@tiptap/extension-mention": "2.10.4", + "@tiptap/extension-ordered-list": "2.10.4", + "@tiptap/extension-paragraph": "2.10.4", + "@tiptap/extension-strike": "2.10.4", + "@tiptap/extension-table": "2.10.4", + "@tiptap/extension-table-cell": "2.10.4", + "@tiptap/extension-table-header": "2.10.4", + "@tiptap/extension-table-row": "2.10.4", + "@tiptap/extension-text": "2.10.4", + "@tiptap/extension-text-style": "2.10.4", + "@tiptap/extension-underline": "2.10.4", "@tiptap/html": "2.10.4", "@tiptap/pm": "2.10.4", "@tiptap/starter-kit": "2.10.4", diff --git a/scripts/e2e_services.sh b/scripts/e2e_services.sh index 0e0bcd19..679070d0 100755 --- a/scripts/e2e_services.sh +++ b/scripts/e2e_services.sh @@ -10,6 +10,7 @@ cd "$(dirname "$0")"/.. export NEXT_PUBLIC_ISOLATED_ENV=true +export $(cat .env.local | grep -v '^#' | xargs) export $(cat .env.test | grep -v '^#' | xargs) # Ensure Supabase local stack is running; if not, reset/start it diff --git a/web/components/bio/editable-bio.tsx b/web/components/bio/editable-bio.tsx index 88959230..133b18b1 100644 --- a/web/components/bio/editable-bio.tsx +++ b/web/components/bio/editable-bio.tsx @@ -203,3 +203,33 @@ export function BaseBio({defaultValue, onBlur, onEditor, onClickTips}: BaseBioPr ) } + +interface BaseTextEditorProps { + placeholder?: any + defaultValue?: any + onBlur?: (editor: any) => void + onEditor?: (editor: any) => void + onChange?: () => void +} + +export function BaseTextEditor({ + placeholder, + defaultValue, + onBlur, + onEditor, + onChange, +}: BaseTextEditorProps) { + const t = useT() + const editor = useTextEditor({ + max: MAX_DESCRIPTION_LENGTH, + defaultValue: defaultValue, + placeholder: placeholder ?? t('common.text_editor.placeholder', 'Write here...'), + onChange: onChange, + }) + + useEffect(() => { + onEditor?.(editor) + }, [editor, onEditor]) + + return onBlur?.(editor)} /> +} diff --git a/web/components/editor/floating-format-menu.tsx b/web/components/editor/floating-format-menu.tsx index d15e57ec..c543daef 100644 --- a/web/components/editor/floating-format-menu.tsx +++ b/web/components/editor/floating-format-menu.tsx @@ -29,7 +29,15 @@ export function FloatingFormatMenu(props: { const unsetLink = () => editor.chain().focus().unsetLink().run() return ( - + { + // CellSelection has $anchorCell, regular selections don't + if ('$anchorCell' in state.selection) return false + return !state.selection.empty + }} + className="text-ink-0 bg-ink-700 flex gap-2 rounded-sm p-1" + > {url === null ? ( <> {advanced && ( diff --git a/web/components/llm-extract-section.tsx b/web/components/llm-extract-section.tsx new file mode 100644 index 00000000..52777cd6 --- /dev/null +++ b/web/components/llm-extract-section.tsx @@ -0,0 +1,65 @@ +import {isUrl} from 'common/parsing' +import {useT} from 'web/lib/locale' + +import {BaseTextEditor} from './bio/editable-bio' +import {Button} from './buttons/button' +import {Col} from './layout/col' + +interface LLMExtractSectionProps { + parsingEditor: any + setParsingEditor: (editor: any) => void + isExtracting: boolean + isSubmitting: boolean + onExtract: () => void +} + +export function LLMExtractSection({ + parsingEditor, + setParsingEditor, + isExtracting, + isSubmitting, + onExtract, +}: LLMExtractSectionProps) { + const t = useT() + const parsingText = parsingEditor?.getText?.() + + return ( + +
+ {t( + 'profile.llm.extract.description', + 'Auto-fill your profile by dropping a link (LinkedIn, Notion, Google Docs, personal website, etc.) or pasting your content directly.', + )} +
+
+ {t( + 'profile.llm.extract.guidance', + 'Heads up: we use Google AI to extract your info. Google may use this content to improve their models. Prefer to keep things private? Just fill the form manually — no AI involved.', + )} +
+ { + if (e) setParsingEditor(e) + }} + onChange={() => { + // Trigger re-render to update button state and text on every key stroke + setParsingEditor({...parsingEditor}) + }} + placeholder={t( + 'profile.llm.extract.placeholder', + 'Insert a URL or paste your profile content here.', + )} + /> + + + ) +} diff --git a/web/components/optional-profile-form.tsx b/web/components/optional-profile-form.tsx index 1aae9f3f..fa69f37d 100644 --- a/web/components/optional-profile-form.tsx +++ b/web/components/optional-profile-form.tsx @@ -1,4 +1,3 @@ -import {PlusIcon, XMarkIcon} from '@heroicons/react/24/solid' import {Editor} from '@tiptap/react' import clsx from 'clsx' import { @@ -15,36 +14,38 @@ import { ROMANTIC_CHOICES, } from 'common/choices' import {debug} from 'common/logger' +import {isUrl} from 'common/parsing' import {MultipleChoiceOptions} from 'common/profiles/multiple-choice' import {Profile, ProfileWithoutUser} from 'common/profiles/profile' -import {PLATFORM_LABELS, type Site, SITE_ORDER, Socials} from 'common/socials' import {BaseUser} from 'common/user' -import {range} from 'lodash' -import {Fragment, useRef, useState} from 'react' +import {urlize} from 'common/util/string' +import {invert, range} from 'lodash' +import {useRef, useState} from 'react' import Textarea from 'react-expanding-textarea' import toast from 'react-hot-toast' import {AddOptionEntry} from 'web/components/add-option-entry' import {SignupBio} from 'web/components/bio/editable-bio' -import {Button, IconButton} from 'web/components/buttons/button' +import {Button} from 'web/components/buttons/button' import {Col} from 'web/components/layout/col' import {Row} from 'web/components/layout/row' import {CustomLink} from 'web/components/links' +import {LLMExtractSection} from 'web/components/llm-extract-section' import {MultiCheckbox} from 'web/components/multi-checkbox' import {City, CityRow, profileToCity, useCitySearch} from 'web/components/search-location' +import {SocialLinksSection} from 'web/components/social-links-section' import {Carousel} from 'web/components/widgets/carousel' import {ChoicesToggleGroup} from 'web/components/widgets/choices-toggle-group' import {Input} from 'web/components/widgets/input' -import {PlatformSelect} from 'web/components/widgets/platform-select' import {RadioToggleGroup} from 'web/components/widgets/radio-toggle-group' import {Select} from 'web/components/widgets/select' import {Slider} from 'web/components/widgets/slider' import {Title} from 'web/components/widgets/title' import {useChoicesContext} from 'web/hooks/use-choices' +import {api} from 'web/lib/api' import {useT} from 'web/lib/locale' import {track} from 'web/lib/service/analytics' import {colClassName, labelClassName} from 'web/pages/signup' -import {SocialIcon} from './user/social' import {AddPhotosWidget} from './widgets/add-photos' export const OptionalProfileUserForm = (props: { @@ -59,20 +60,8 @@ export const OptionalProfileUserForm = (props: { const [isSubmitting, setIsSubmitting] = useState(false) const [uploadingImages, setUploadingImages] = useState(false) - const [lookingRelationship, setLookingRelationship] = useState( - (profile.pref_relation_styles || []).includes('relationship'), - ) const [ageError, setAgeError] = useState(null) const t = useT() - const [heightFeet, setHeightFeet] = useState( - profile.height_in_inches ? Math.floor((profile['height_in_inches'] ?? 0) / 12) : undefined, - ) - const [heightInches, setHeightInches] = useState( - profile.height_in_inches ? Math.floor((profile['height_in_inches'] ?? 0) % 12) : undefined, - ) - - const [newLinkPlatform, setNewLinkPlatform] = useState('') - const [newLinkValue, setNewLinkValue] = useState('') const choices = useChoicesContext() const [interestChoices, setInterestChoices] = useState(choices.interests) @@ -81,13 +70,92 @@ export const OptionalProfileUserForm = (props: { const [keywordsString, setKeywordsString] = useState(profile.keywords?.join(', ') || '') + const lookingRelationship = (profile.pref_relation_styles || []).includes('relationship') + + const heightFeet = + typeof profile.height_in_inches === 'number' + ? Math.floor(profile.height_in_inches / 12) + : undefined + + const heightInches = + typeof profile.height_in_inches === 'number' ? profile.height_in_inches % 12 : undefined + + const [isExtracting, setIsExtracting] = useState(false) + const [parsingEditor, setParsingEditor] = useState(null) + + const handleLLMExtract = async () => { + const llmContent = parsingEditor?.getText?.() ?? '' + if (!llmContent) { + toast.error(t('profile.llm.extract.error_empty', 'Please enter content to extract from')) + return + } + setIsExtracting(true) + try { + const isInputUrl = isUrl(llmContent) + const payload = isInputUrl ? {url: urlize(llmContent).trim()} : {content: llmContent.trim()} + + const extracted = await api('llm-extract-profile', payload) + for (const data of Object.entries(extracted)) { + const key = data[0] + let value = data[1] + let choices, setChoices: any + if (key === 'interests') { + choices = interestChoices + setChoices = setInterestChoices + } else if (key === 'causes') { + choices = causeChoices + setChoices = setCauseChoices + } else if (key === 'work') { + choices = workChoices + setChoices = setWorkChoices + } + if (choices && setChoices) { + const newFields: string[] = [] + const converter = invert(choices) + value = (value as string[]).map((interest: string) => { + if (!converter[interest]) newFields.push(interest) + return converter[interest] ?? interest + }) + if (newFields.length) { + setChoices((prev: any) => ({ + ...prev, + ...Object.fromEntries(newFields.map((e) => [e, e])), + })) + } + debug({value, converter}) + } else if (key === 'keywords') setKeywordsString((value as string[]).join(', ')) + setProfile( + key as keyof ProfileWithoutUser, + value as ProfileWithoutUser[keyof ProfileWithoutUser], + ) + } + if (!isInputUrl) setProfile('bio', parsingEditor?.getJSON?.()) + debug({text: parsingEditor?.getText?.(), json: parsingEditor?.getJSON?.(), extracted}) + + parsingEditor?.commands?.clearContent?.() + + toast.success( + t('profile.llm.extract.success', 'Profile data extracted! Please review below.'), + ) + } catch (error) { + console.error(error) + toast.error(t('profile.llm.extract.error', 'Failed to extract profile data')) + } finally { + setIsExtracting(false) + } + } + const errorToast = () => { toast.error(t('profile.optional.error.invalid_fields', 'Some fields are incorrect...')) } const handleSubmit = async () => { + if (parsingEditor?.getText?.()?.trim()) { + await handleLLMExtract() + } + // Validate age before submitting - if (profile['age'] !== null && profile['age'] !== undefined) { + if (typeof profile['age'] === 'number') { if (profile['age'] < 18) { setAgeError(t('profile.optional.age.error_min', 'You must be at least 18 years old')) setIsSubmitting(false) @@ -115,18 +183,6 @@ export const OptionalProfileUserForm = (props: { setIsSubmitting(false) } - const updateUserLink = (platform: string, value: string | null) => { - setProfile('links', {...((profile.links as Socials) ?? {}), [platform]: value}) - } - - const addNewLink = () => { - if (newLinkPlatform && newLinkValue) { - updateUserLink(newLinkPlatform.toLowerCase().trim(), newLinkValue.trim()) - setNewLinkPlatform('') - setNewLinkValue('') - } - } - function setProfileCity(inputCity: City | undefined) { if (!inputCity) { setProfile('geodb_city_id', null) @@ -189,19 +245,20 @@ export const OptionalProfileUserForm = (props: { return ( <> - {/**/} - {/* */} - {/* {buttonLabel ?? t('common.next', 'Next')} / {t('common.skip', 'Skip')}*/} - {/* */} - {/**/} - {t('profile.optional.subtitle', 'Optional information')} + + + +
+ ) => { - if (e.target.value === '') { - setHeightFeet(undefined) - } else { - setHeightFeet(Number(e.target.value)) - const heightInInches = Number(e.target.value) * 12 + (heightInches ?? 0) - setProfile('height_in_inches', heightInInches) - } + const heightInInches = Number(e.target.value || 0) * 12 + (heightInches ?? 0) + setProfile('height_in_inches', heightInInches) }} className={'w-16'} value={typeof heightFeet === 'number' ? Math.floor(heightFeet) : ''} @@ -305,13 +357,8 @@ export const OptionalProfileUserForm = (props: { type="number" data-testid="height-inches" onChange={(e: React.ChangeEvent) => { - if (e.target.value === '') { - setHeightInches(undefined) - } else { - setHeightInches(Number(e.target.value)) - const heightInInches = Number(e.target.value) + 12 * (heightFeet ?? 0) - setProfile('height_in_inches', heightInInches) - } + const heightInInches = Number(e.target.value || 0) + 12 * (heightFeet ?? 0) + setProfile('height_in_inches', heightInInches) }} className={'w-16'} value={typeof heightInches === 'number' ? Math.floor(heightInches) : ''} @@ -328,14 +375,10 @@ export const OptionalProfileUserForm = (props: { data-testid="height-centimeters" onChange={(e: React.ChangeEvent) => { if (e.target.value === '') { - setHeightFeet(undefined) - setHeightInches(undefined) setProfile('height_in_inches', null) } else { // Convert cm to inches const totalInches = Number(e.target.value) / 2.54 - setHeightFeet(Math.floor(totalInches / 12)) - setHeightInches(totalInches % 12) setProfile('height_in_inches', totalInches) } }} @@ -449,7 +492,7 @@ export const OptionalProfileUserForm = (props: { .filter(Boolean) setProfile('keywords', keywords) }} - className={'w-full sm:w-96'} + className={'w-full sm:w-[600px]'} value={keywordsString} placeholder={t( 'profile.optional.keywords_placeholder', @@ -534,7 +577,6 @@ export const OptionalProfileUserForm = (props: { translationPrefix={'profile.relationship'} onChange={(selected) => { setProfile('pref_relation_styles', selected) - setLookingRelationship((selected || []).includes('relationship')) }} /> @@ -915,70 +957,7 @@ export const OptionalProfileUserForm = (props: { - - {/**/} - -
- {Object.entries((profile.links ?? {}) as Socials) - .filter(([_, value]) => value != null) - .map(([platform, value]) => ( - -
- - {PLATFORM_LABELS[platform as Site] ?? platform} -
- ) => - updateUserLink(platform, e.target.value) - } - className="col-span-2 sm:col-span-1" - /> - updateUserLink(platform, null)}> - -
{t('common.remove', 'Remove')}
-
-
- ))} - - {/* Spacer */} -
- - - ) => setNewLinkValue(e.target.value)} - // disable password managers - autoComplete="off" - data-1p-ignore - data-lpignore="true" - data-bwignore="true" - data-protonpass-ignore="true" - className="w-full" - /> - -
- +