diff --git a/apps/browser-extension/src/utils/formDetector/FieldPatterns.ts b/apps/browser-extension/src/utils/formDetector/FieldPatterns.ts index 111a1332a..dc4046538 100644 --- a/apps/browser-extension/src/utils/formDetector/FieldPatterns.ts +++ b/apps/browser-extension/src/utils/formDetector/FieldPatterns.ts @@ -1,20 +1,31 @@ +/** + * A single field pattern entry: terms that match a field type, plus optional + * terms that veto a match. Exclusions are applied with whole-word semantics + * and let us narrow broad include patterns (e.g. "token" hits "test-tokenfield" + * widgets that aren't actually 2FA inputs). + */ +export type FieldPatternEntry = { + include: string[]; + exclude?: string[]; +} + /** * Type for field patterns. These patterns are used to detect individual fields in the form. */ export type FieldPatterns = { - username: string[]; - firstName: string[]; - lastName: string[]; - fullName: string[]; - email: string[]; - emailConfirm: string[]; - password: string[]; - birthdate: string[]; - gender: string[]; - birthDateDay: string[]; - birthDateMonth: string[]; - birthDateYear: string[]; - totp: string[]; + username: FieldPatternEntry; + firstName: FieldPatternEntry; + lastName: FieldPatternEntry; + fullName: FieldPatternEntry; + email: FieldPatternEntry; + emailConfirm: FieldPatternEntry; + password: FieldPatternEntry; + birthdate: FieldPatternEntry; + gender: FieldPatternEntry; + birthDateDay: FieldPatternEntry; + birthDateMonth: FieldPatternEntry; + birthDateYear: FieldPatternEntry; + totp: FieldPatternEntry; } /** @@ -53,21 +64,29 @@ export type FieldExclusionPatterns = string[]; /** * English field patterns to detect English form fields. + * + * Each entry has an `include` list (terms that match the field type) and an + * optional `exclude` list (terms that veto a match for this field type only). + * Excludes are matched with whole-word semantics — useful for narrowing broad + * include terms (e.g. TOTP's "token" matches "test-tokenfield" widgets). */ export const EnglishFieldPatterns: FieldPatterns = { - username: ['username', 'login', 'identifier', 'user'], - fullName: ['fullname', 'full-name', 'full name'], - firstName: ['firstname', 'first-name', 'first_name', 'fname', 'name', 'given-name'], - lastName: ['lastname', 'last-name', 'last_name', 'lname', 'surname', 'family-name'], - email: ['email', 'mail', 'emailaddress'], - emailConfirm: ['confirm', 'verification', 'repeat', 'retype', 'verify', 'email2'], - password: ['password', 'pwd', 'pass'], - birthdate: ['birthdate', 'birth-date', 'dob', 'date-of-birth'], - gender: ['gender', 'sex'], - birthDateDay: ['-day', 'birthdate_d', 'birthdayday', '_day', 'day'], - birthDateMonth: ['-month', 'birthdate_m', 'birthdaymonth', '_month', 'month'], - birthDateYear: ['-year', 'birthdate_y', 'birthdayyear', '_year', 'year'], - totp: ['totp', 'otp', 'one-time', 'onetime', 'six-digit', 'digit-code', 'token', 'authenticator', 'authentication', '2fa', 'twofa', 'two-factor', 'mfa', 'security-code', 'auth-code', 'passcode', 'pin-code', 'pincode', 'google_code', 'verification-code', 'verificationcode', 'tfa', 'tfacode', 'second-factor', 'one time password', 'code'] + username: { include: ['username', 'login', 'identifier', 'user'] }, + fullName: { include: ['fullname', 'full-name', 'full name'] }, + firstName: { include: ['firstname', 'first-name', 'first_name', 'fname', 'name', 'given-name'] }, + lastName: { include: ['lastname', 'last-name', 'last_name', 'lname', 'surname', 'family-name'] }, + email: { include: ['email', 'mail', 'emailaddress'] }, + emailConfirm: { include: ['confirm', 'verification', 'repeat', 'retype', 'verify', 'email2'] }, + password: { include: ['password', 'pwd', 'pass'] }, + birthdate: { include: ['birthdate', 'birth-date', 'dob', 'date-of-birth'] }, + gender: { include: ['gender', 'sex'] }, + birthDateDay: { include: ['-day', 'birthdate_d', 'birthdayday', '_day', 'day'] }, + birthDateMonth: { include: ['-month', 'birthdate_m', 'birthdaymonth', '_month', 'month'] }, + birthDateYear: { include: ['-year', 'birthdate_y', 'birthdayyear', '_year', 'year'] }, + totp: { + include: ['totp', 'otp', 'one-time', 'onetime', 'six-digit', 'digit-code', 'token', 'authenticator', 'authentication', '2fa', 'twofa', 'two-factor', 'mfa', 'security-code', 'auth-code', 'passcode', 'pin-code', 'pincode', 'google_code', 'verification-code', 'verificationcode', 'tfa', 'tfacode', 'second-factor', 'one time password', 'code'], + exclude: ['test'] + } }; /** @@ -163,19 +182,19 @@ export const EnglishStopWords = new Set([ * Dutch field patterns used to detect Dutch form fields. */ export const DutchFieldPatterns: FieldPatterns = { - username: ['gebruikersnaam', 'gebruiker', 'login', 'identifier'], - fullName: ['volledige naam'], - firstName: ['voornaam', 'naam'], - lastName: ['achternaam'], - email: ['e-mailadres', 'e-mail'], - emailConfirm: ['bevestig', 'herhaal', 'verificatie'], - password: ['wachtwoord', 'pwd'], - birthdate: ['geboortedatum', 'geboorte-datum'], - gender: ['geslacht', 'aanhef'], - birthDateDay: ['dag'], - birthDateMonth: ['maand'], - birthDateYear: ['jaar'], - totp: ['verificatiecode', 'eenmalig', 'authenticatie', 'tweefactor', 'beveiligingscode'] + username: { include: ['gebruikersnaam', 'gebruiker', 'login', 'identifier'] }, + fullName: { include: ['volledige naam'] }, + firstName: { include: ['voornaam', 'naam'] }, + lastName: { include: ['achternaam'] }, + email: { include: ['e-mailadres', 'e-mail'] }, + emailConfirm: { include: ['bevestig', 'herhaal', 'verificatie'] }, + password: { include: ['wachtwoord', 'pwd'] }, + birthdate: { include: ['geboortedatum', 'geboorte-datum'] }, + gender: { include: ['geslacht', 'aanhef'] }, + birthDateDay: { include: ['dag'] }, + birthDateMonth: { include: ['maand'] }, + birthDateYear: { include: ['jaar'] }, + totp: { include: ['verificatiecode', 'eenmalig', 'authenticatie', 'tweefactor', 'beveiligingscode'] } }; /** @@ -271,6 +290,16 @@ export const DutchStopWords = new Set([ */ import { TranslationEmailPatterns, TranslationUsernamePatterns, TranslationPasswordPatterns } from './TranslationPatterns'; +/** + * Merge per-field-type entries (include + optional exclude) from one or more + * languages, deduping each list while preserving original order. + */ +function mergeEntries(...entries: FieldPatternEntry[]): FieldPatternEntry { + const include = [...new Set(entries.flatMap(e => e.include))]; + const exclude = [...new Set(entries.flatMap(e => e.exclude ?? []))]; + return exclude.length > 0 ? { include, exclude } : { include }; +} + /** * Combined field patterns which includes all supported languages. * This includes: @@ -278,10 +307,10 @@ import { TranslationEmailPatterns, TranslationUsernamePatterns, TranslationPassw * - Translation-based patterns from all supported languages */ export const CombinedFieldPatterns: FieldPatterns = { - username: [...new Set([...EnglishFieldPatterns.username, ...DutchFieldPatterns.username, ...TranslationUsernamePatterns])], - fullName: [...new Set([...EnglishFieldPatterns.fullName, ...DutchFieldPatterns.fullName])], - firstName: [...new Set([...EnglishFieldPatterns.firstName, ...DutchFieldPatterns.firstName])], - lastName: [...new Set([...EnglishFieldPatterns.lastName, ...DutchFieldPatterns.lastName])], + username: mergeEntries(EnglishFieldPatterns.username, DutchFieldPatterns.username, { include: TranslationUsernamePatterns }), + fullName: mergeEntries(EnglishFieldPatterns.fullName, DutchFieldPatterns.fullName), + firstName: mergeEntries(EnglishFieldPatterns.firstName, DutchFieldPatterns.firstName), + lastName: mergeEntries(EnglishFieldPatterns.lastName, DutchFieldPatterns.lastName), /** * NOTE: Dutch email patterns should be prioritized over English email patterns due to how * the nl-registration-form5.html honeypot field is named. The order of the patterns @@ -290,15 +319,15 @@ export const CombinedFieldPatterns: FieldPatterns = { * * Translation patterns are added last to catch all language variations (e.g., "E-post" in Swedish) */ - email: [...new Set([...DutchFieldPatterns.email, ...EnglishFieldPatterns.email, ...TranslationEmailPatterns])], - emailConfirm: [...new Set([...EnglishFieldPatterns.emailConfirm, ...DutchFieldPatterns.emailConfirm])], - password: [...new Set([...EnglishFieldPatterns.password, ...DutchFieldPatterns.password, ...TranslationPasswordPatterns])], - birthdate: [...new Set([...EnglishFieldPatterns.birthdate, ...DutchFieldPatterns.birthdate])], - gender: [...new Set([...EnglishFieldPatterns.gender, ...DutchFieldPatterns.gender])], - birthDateDay: [...new Set([...EnglishFieldPatterns.birthDateDay, ...DutchFieldPatterns.birthDateDay])], - birthDateMonth: [...new Set([...EnglishFieldPatterns.birthDateMonth, ...DutchFieldPatterns.birthDateMonth])], - birthDateYear: [...new Set([...EnglishFieldPatterns.birthDateYear, ...DutchFieldPatterns.birthDateYear])], - totp: [...new Set([...EnglishFieldPatterns.totp, ...DutchFieldPatterns.totp])] + email: mergeEntries(DutchFieldPatterns.email, EnglishFieldPatterns.email, { include: TranslationEmailPatterns }), + emailConfirm: mergeEntries(EnglishFieldPatterns.emailConfirm, DutchFieldPatterns.emailConfirm), + password: mergeEntries(EnglishFieldPatterns.password, DutchFieldPatterns.password, { include: TranslationPasswordPatterns }), + birthdate: mergeEntries(EnglishFieldPatterns.birthdate, DutchFieldPatterns.birthdate), + gender: mergeEntries(EnglishFieldPatterns.gender, DutchFieldPatterns.gender), + birthDateDay: mergeEntries(EnglishFieldPatterns.birthDateDay, DutchFieldPatterns.birthDateDay), + birthDateMonth: mergeEntries(EnglishFieldPatterns.birthDateMonth, DutchFieldPatterns.birthDateMonth), + birthDateYear: mergeEntries(EnglishFieldPatterns.birthDateYear, DutchFieldPatterns.birthDateYear), + totp: mergeEntries(EnglishFieldPatterns.totp, DutchFieldPatterns.totp) }; /** diff --git a/apps/browser-extension/src/utils/formDetector/FormDetector.ts b/apps/browser-extension/src/utils/formDetector/FormDetector.ts index f8e81daf7..bcb64c697 100644 --- a/apps/browser-extension/src/utils/formDetector/FormDetector.ts +++ b/apps/browser-extension/src/utils/formDetector/FormDetector.ts @@ -1,4 +1,4 @@ -import { CombinedEmailVerificationPatterns, CombinedFieldExclusionPatterns, CombinedFieldPatterns, CombinedGenderOptionPatterns, CombinedStopWords } from "./FieldPatterns"; +import { CombinedEmailVerificationPatterns, CombinedFieldExclusionPatterns, CombinedFieldPatterns, CombinedGenderOptionPatterns, CombinedStopWords, FieldPatternEntry } from "./FieldPatterns"; import { DetectedFieldType, FormFields } from "./types/FormFields"; /** @@ -284,6 +284,43 @@ export class FormDetector { return false; } + /** + * Check if an input matches a field-type-specific exclusion pattern (e.g. TOTP "test"). + * Whole-word matching is used so a pattern like "test" doesn't reject "latest". + */ + private matchesEntryExclude(input: HTMLInputElement, entry: FieldPatternEntry): boolean { + if (!entry.exclude || entry.exclude.length === 0) { + return false; + } + + const attributesToCheck = [ + input.id, + input.getAttribute('name'), + input.getAttribute('placeholder'), + input.getAttribute('class'), + input.getAttribute('aria-label') + ] + .map(a => a?.toLowerCase() ?? '') + .filter(a => a.length > 0); + + if (input.id || input.getAttribute('name')) { + const label = this.document.querySelector(`label[for="${input.id || input.getAttribute('name')}"]`); + if (label) { + attributesToCheck.push(label.textContent?.toLowerCase() ?? ''); + } + } + + for (const attr of attributesToCheck) { + for (const pattern of entry.exclude) { + if (this.matchesWordBoundary(attr, pattern)) { + return true; + } + } + } + + return false; + } + /** * Check if an input field is likely a fake/honeypot field used to prevent autofill. * These fields are intentionally hidden from users but present in the DOM. @@ -578,11 +615,12 @@ export class FormDetector { */ private findAllInputFields( form: HTMLFormElement | null, - patterns: string[], + entry: FieldPatternEntry, types: string[], excludeElements: HTMLInputElement[] = [], checkVisibility: boolean = true ): HTMLInputElement[] { + const patterns = entry.include; // Query for standard input elements, select elements, and elements with type attributes const standardCandidates = form ? Array.from(form.querySelectorAll('input, select, [type]')) @@ -625,6 +663,14 @@ export class FormDetector { continue; } + /* + * Skip fields that match this field-type's own exclude list (e.g. "test" + * for TOTP guards "test-tokenfield" widgets from matching "token"). + */ + if (this.matchesEntryExclude(input as HTMLInputElement, entry)) { + continue; + } + /* * Skip fake/honeypot fields (e.g., fields with "fake" in name/id, tabindex="-1", etc.) */ @@ -689,17 +735,17 @@ export class FormDetector { // Direct autocomplete matches take highest priority (score -2, higher than type=email at -1) if (autocomplete) { // Match autocomplete="username" for username patterns - if (patterns === CombinedFieldPatterns.username && autocomplete === 'username') { + if (entry === CombinedFieldPatterns.username && autocomplete === 'username') { matches.push({ input: input as HTMLInputElement, score: -2 }); continue; } // Match autocomplete="email" for email patterns - if (patterns === CombinedFieldPatterns.email && autocomplete === 'email') { + if (entry === CombinedFieldPatterns.email && autocomplete === 'email') { matches.push({ input: input as HTMLInputElement, score: -2 }); continue; } // Match autocomplete="current-password" or "new-password" for password patterns - if (patterns === CombinedFieldPatterns.password && + if (entry === CombinedFieldPatterns.password && (autocomplete === 'current-password' || autocomplete === 'new-password')) { matches.push({ input: input as HTMLInputElement, score: -2 }); continue; @@ -713,7 +759,7 @@ export class FormDetector { const ariaDescribedById = input.getAttribute('aria-describedby')?.toLowerCase() ?? ''; if (ariaDescribedById) { // Match aria-describedby containing "username" for username patterns - if (patterns === CombinedFieldPatterns.username && + if (entry === CombinedFieldPatterns.username && ariaDescribedById.includes('username')) { matches.push({ input: input as HTMLInputElement, score: -2 }); continue; @@ -855,12 +901,12 @@ export class FormDetector { */ private findInputField( form: HTMLFormElement | null, - patterns: string[], + entry: FieldPatternEntry, types: string[], excludeElements: HTMLInputElement[] = [], checkVisibility: boolean = true ): HTMLInputElement | null { - const all = this.findAllInputFields(form, patterns, types, excludeElements, checkVisibility); + const all = this.findAllInputFields(form, entry, types, excludeElements, checkVisibility); // Filter out parent-child duplicates and fields overlapping with excludeElements const filtered = this.filterOutNestedDuplicates(all, excludeElements); @@ -922,10 +968,10 @@ export class FormDetector { /* * Check if label contains BOTH username and email patterns (dual-purpose field) */ - const labelHasUsername = CombinedFieldPatterns.username.some(pattern => + const labelHasUsername = CombinedFieldPatterns.username.include.some(pattern => labelText.includes(pattern) ); - const labelHasEmail = CombinedFieldPatterns.email.some(pattern => + const labelHasEmail = CombinedFieldPatterns.email.include.some(pattern => labelText.includes(pattern) ); @@ -935,10 +981,10 @@ export class FormDetector { * 2. AND the field's name/id contains username pattern but NOT email pattern */ if (labelHasUsername && labelHasEmail) { - const hasUsernameInNameOrId = CombinedFieldPatterns.username.some(pattern => + const hasUsernameInNameOrId = CombinedFieldPatterns.username.include.some(pattern => fieldAttributes.includes(pattern) ); - const hasEmailInNameOrId = CombinedFieldPatterns.email.some(pattern => + const hasEmailInNameOrId = CombinedFieldPatterns.email.include.some(pattern => fieldAttributes.includes(pattern) ); @@ -1356,6 +1402,14 @@ export class FormDetector { continue; } + /* + * Apply the TOTP entry's exclude list to the heuristic fallback too + * (e.g. don't classify a "test-tokenfield" widget as TOTP). + */ + if (this.matchesEntryExclude(input, CombinedFieldPatterns.totp)) { + continue; + } + // Check for autocomplete="one-time-code" const autocomplete = input.getAttribute('autocomplete')?.toLowerCase() ?? ''; if (autocomplete === 'one-time-code') { diff --git a/apps/browser-extension/src/utils/formDetector/__tests__/FormDetector.exclusion.test.ts b/apps/browser-extension/src/utils/formDetector/__tests__/FormDetector.exclusion.test.ts index 388423cd0..e7b748be6 100644 --- a/apps/browser-extension/src/utils/formDetector/__tests__/FormDetector.exclusion.test.ts +++ b/apps/browser-extension/src/utils/formDetector/__tests__/FormDetector.exclusion.test.ts @@ -48,6 +48,37 @@ describe('FormDetector - Field Exclusion Patterns', () => { }); }); + describe('Real-world scenario: Test/widget tokenfield containing "token"', () => { + const htmlFile = 'exclusion-test-tokenfield.html'; + + it('should not detect a "test-tokenfield" widget input as TOTP', () => { + const dom = createTestDom(htmlFile); + const document = dom.window.document; + + const widgetInput = document.getElementById('active-input'); + const formDetector = new FormDetector(document, widgetInput as HTMLElement); + + /* + * The TOTP include pattern matches "token" via substring, which would otherwise + * hit the "test-tokenfield-input" class. The per-field exclude ('test') vetoes + * it, so this widget should not be classified as a 2FA field. + */ + expect(formDetector.containsLoginForm()).toBe(false); + expect(formDetector.getDetectedFieldType()).toBeNull(); + }); + + it('should not detect the placeholder editor input as TOTP either', () => { + const dom = createTestDom(htmlFile); + const document = dom.window.document; + + const placeholderInput = document.getElementById('placeholder-input'); + const formDetector = new FormDetector(document, placeholderInput as HTMLElement); + + expect(formDetector.containsLoginForm()).toBe(false); + expect(formDetector.getDetectedFieldType()).toBeNull(); + }); + }); + describe('Exclusion patterns should not affect legitimate login fields', () => { const htmlFile = 'exclusion-legitimate-login.html'; diff --git a/apps/browser-extension/src/utils/formDetector/__tests__/test-forms/exclusion-test-tokenfield.html b/apps/browser-extension/src/utils/formDetector/__tests__/test-forms/exclusion-test-tokenfield.html new file mode 100644 index 000000000..db68da742 --- /dev/null +++ b/apps/browser-extension/src/utils/formDetector/__tests__/test-forms/exclusion-test-tokenfield.html @@ -0,0 +1,35 @@ + + + + Test Tokenfield Widget + + + +
+
+
+
+
+
+ +
+ +
s​
+
+
+
+
+ +
+ +
+
+
+
+ +