diff --git a/backend/api/package.json b/backend/api/package.json index 8af54cc8..bc5df228 100644 --- a/backend/api/package.json +++ b/backend/api/package.json @@ -1,6 +1,6 @@ { "name": "@compass/api", - "version": "1.30.3", + "version": "1.31.0", "private": true, "description": "Backend API endpoints", "main": "src/serve.ts", diff --git a/backend/api/src/llm-extract-profile.ts b/backend/api/src/llm-extract-profile.ts index 4dc7f588..bac6a71c 100644 --- a/backend/api/src/llm-extract-profile.ts +++ b/backend/api/src/llm-extract-profile.ts @@ -43,6 +43,144 @@ function getCacheKey(content: string): string { return hash.digest('hex') } +async function validateProfileFields( + llmProfile: Partial, + validChoices: Record, +): Promise> { + const result: Partial> = {...llmProfile} + + const toArray: (keyof ProfileWithoutUser)[] = [ + 'diet', + 'ethnicity', + 'interests', + 'causes', + 'work', + 'languages', + 'religion', + 'political_beliefs', + 'pref_gender', + 'pref_relation_styles', + 'pref_romantic_styles', + 'relationship_status', + 'keywords', + ] + for (const key of toArray) { + if (result[key] !== undefined) { + if (!Array.isArray(result[key])) { + result[key] = [String(result[key])] + } else { + result[key] = result[key].map(String) + } + // Filter out invalid values + if (validChoices[key]) { + result[key] = result[key].filter((v: string) => validChoices[key].includes(v)) + if (result[key].length === 0) { + result[key] = undefined + } + } + } + } + + const toString: (keyof ProfileWithoutUser)[] = [ + 'gender', + 'education_level', + 'mbti', + 'psychedelics', + 'cannabis', + 'psychedelics_intention', + 'cannabis_intention', + 'psychedelics_pref', + 'cannabis_pref', + 'headline', + 'city', + 'country', + 'raised_in_city', + 'raised_in_country', + 'university', + 'company', + 'occupation_title', + 'religious_beliefs', + 'political_details', + ] + for (const key of toString) { + if (result[key] !== undefined) { + if (Array.isArray(result[key])) { + result[key] = result[key][0] ?? '' + } + result[key] = String(result[key]) + if (validChoices[key] && !validChoices[key].includes(result[key])) { + result[key] = undefined + } + } + } + + const toNumber: (keyof ProfileWithoutUser)[] = [ + 'age', + 'height_in_inches', + 'drinks_per_month', + 'has_kids', + 'wants_kids_strength', + 'big5_openness', + 'big5_conscientiousness', + 'big5_extraversion', + 'big5_agreeableness', + 'big5_neuroticism', + 'pref_age_min', + 'pref_age_max', + 'city_latitude', + 'city_longitude', + 'raised_in_lat', + 'raised_in_lon', + ] + for (const key of toNumber) { + if (result[key] !== undefined) { + const num = Number(result[key]) + result[key] = isNaN(num) ? undefined : num + } + } + + const toBoolean: (keyof ProfileWithoutUser)[] = ['is_smoker'] + for (const key of toBoolean) { + if (result[key] !== undefined) { + result[key] = Boolean(result[key]) + } + } + + if (result.city) { + if (!result.city_latitude || !result.city_longitude) { + const response = await searchLocation({term: result.city, limit: 1}) + const locations = response.data?.data + result.city_latitude = locations?.[0]?.latitude + result.city_longitude = locations?.[0]?.longitude + result.country ??= locations?.[0]?.country + } + } + + if (result.raised_in_city) { + if (!result.raised_in_lat || !result.raised_in_lon) { + const response = await searchLocation({term: result.raised_in_city, limit: 1}) + const locations = response.data?.data + result.raised_in_lat = locations?.[0]?.latitude + result.raised_in_lon = locations?.[0]?.longitude + result.raised_in_country ??= locations?.[0]?.country + } + } + + if (result.links) { + const sites = Object.keys(result.links).filter((key) => SITE_ORDER.includes(key as any)) + result.links = sites.reduce( + (acc, key) => { + const link = (result.links as Record)[key] + if (link) acc[key] = link + return acc + }, + {} as Record, + ) + } + + return result +} + async function getCachedResult(cacheKey: string): Promise | null> { if (!USE_CACHE) return null try { @@ -180,10 +318,34 @@ async function callLLM(content: string, locale?: string): Promise> = { + interests: INTERESTS, + causes: CAUSE_AREAS, + work: WORK_AREAS, + diet: Object.values(DIET_CHOICES), + ethnicity: Object.values(RACE_CHOICES), + languages: Object.values(LANGUAGE_CHOICES), + religion: Object.values(RELIGION_CHOICES), + political_beliefs: Object.values(POLITICAL_CHOICES), + pref_gender: Object.values(GENDERS), + pref_relation_styles: Object.values(RELATIONSHIP_CHOICES), + pref_romantic_styles: Object.values(ROMANTIC_CHOICES), + relationship_status: Object.values(RELATIONSHIP_STATUS_CHOICES), + cannabis: Object.values(CANNABIS_CHOICES), + education_level: Object.values(EDUCATION_CHOICES), + gender: Object.values(GENDERS), + mbti: Object.values(MBTI_CHOICES), + psychedelics: Object.values(PSYCHEDELICS_CHOICES), + psychedelics_intention: Object.values(SUBSTANCE_INTENTION_CHOICES), + cannabis_intention: Object.values(SUBSTANCE_INTENTION_CHOICES), + psychedelics_pref: Object.values(SUBSTANCE_PREFERENCE_CHOICES), + cannabis_pref: Object.values(SUBSTANCE_PREFERENCE_CHOICES), + } + const PROFILE_FIELDS: Partial> = { // Basic info age: 'Number. Age in years.', - gender: `One of: ${Object.values(GENDERS).join(', ')}. Infer if you have enough evidence`, + gender: `String. One of: ${validChoices.pref_gender?.join(', ')}. If multiple mentioned, use the most likely one. Infer if you have enough evidence`, height_in_inches: 'Number. Height converted to inches.', city: 'String. Current city of residence (English spelling).', country: 'String. Current country of residence (English spelling).', @@ -196,7 +358,7 @@ async function callLLM(content: string, locale?: string): Promise try { parsed = typeof outputText === 'string' ? JSON.parse(outputText) : outputText + parsed = await validateProfileFields(parsed, validChoices) parsed = removeNullOrUndefinedProps(parsed) } catch (parseError) { log('Failed to parse LLM response as JSON', {outputText, parseError}) throw APIErrors.internalServerError('Failed to parse extracted data') } - if (parsed.city) { - if (!parsed.city_latitude || !parsed.city_longitude) { - const result = await searchLocation({term: parsed.city, limit: 1}) - const locations = result.data?.data - parsed.city_latitude = locations?.[0]?.latitude - parsed.city_longitude = locations?.[0]?.longitude - parsed.country ??= locations?.[0]?.country - } - } - if (parsed.raised_in_city) { - if (!parsed.raised_in_lat || !parsed.raised_in_lon) { - const result = await searchLocation({term: parsed.raised_in_city, limit: 1}) - const locations = result.data?.data - parsed.raised_in_lat = locations?.[0]?.latitude - parsed.raised_in_lon = locations?.[0]?.longitude - parsed.raised_in_country ??= locations?.[0]?.country - } - } - if (parsed.links) { - const sites = Object.keys(parsed.links).filter((key) => SITE_ORDER.includes(key as any)) - parsed.links = sites.reduce( - (acc, key) => { - const link = (parsed.links as Record)[key] - if (link) acc[key] = link - return acc - }, - {} as Record, - ) - } - await setCachedResult(cacheKey, parsed) return parsed