Update browser extension detect to support include and exclude patterns (#1998)

This commit is contained in:
Leendert de Borst
2026-05-08 15:41:51 +02:00
committed by Leendert de Borst
parent ef3e026d86
commit 463df889ba
4 changed files with 213 additions and 64 deletions

View File

@@ -1,20 +1,31 @@
/**
* A single field pattern entry: terms that match a field type, plus optional
* terms that veto a match. Exclusions are applied with whole-word semantics
* and let us narrow broad include patterns (e.g. "token" hits "test-tokenfield"
* widgets that aren't actually 2FA inputs).
*/
export type FieldPatternEntry = {
include: string[];
exclude?: string[];
}
/**
* Type for field patterns. These patterns are used to detect individual fields in the form.
*/
export type FieldPatterns = {
username: string[];
firstName: string[];
lastName: string[];
fullName: string[];
email: string[];
emailConfirm: string[];
password: string[];
birthdate: string[];
gender: string[];
birthDateDay: string[];
birthDateMonth: string[];
birthDateYear: string[];
totp: string[];
username: FieldPatternEntry;
firstName: FieldPatternEntry;
lastName: FieldPatternEntry;
fullName: FieldPatternEntry;
email: FieldPatternEntry;
emailConfirm: FieldPatternEntry;
password: FieldPatternEntry;
birthdate: FieldPatternEntry;
gender: FieldPatternEntry;
birthDateDay: FieldPatternEntry;
birthDateMonth: FieldPatternEntry;
birthDateYear: FieldPatternEntry;
totp: FieldPatternEntry;
}
/**
@@ -53,21 +64,29 @@ export type FieldExclusionPatterns = string[];
/**
* English field patterns to detect English form fields.
*
* Each entry has an `include` list (terms that match the field type) and an
* optional `exclude` list (terms that veto a match for this field type only).
* Excludes are matched with whole-word semantics — useful for narrowing broad
* include terms (e.g. TOTP's "token" matches "test-tokenfield" widgets).
*/
export const EnglishFieldPatterns: FieldPatterns = {
username: ['username', 'login', 'identifier', 'user'],
fullName: ['fullname', 'full-name', 'full name'],
firstName: ['firstname', 'first-name', 'first_name', 'fname', 'name', 'given-name'],
lastName: ['lastname', 'last-name', 'last_name', 'lname', 'surname', 'family-name'],
email: ['email', 'mail', 'emailaddress'],
emailConfirm: ['confirm', 'verification', 'repeat', 'retype', 'verify', 'email2'],
password: ['password', 'pwd', 'pass'],
birthdate: ['birthdate', 'birth-date', 'dob', 'date-of-birth'],
gender: ['gender', 'sex'],
birthDateDay: ['-day', 'birthdate_d', 'birthdayday', '_day', 'day'],
birthDateMonth: ['-month', 'birthdate_m', 'birthdaymonth', '_month', 'month'],
birthDateYear: ['-year', 'birthdate_y', 'birthdayyear', '_year', 'year'],
totp: ['totp', 'otp', 'one-time', 'onetime', 'six-digit', 'digit-code', 'token', 'authenticator', 'authentication', '2fa', 'twofa', 'two-factor', 'mfa', 'security-code', 'auth-code', 'passcode', 'pin-code', 'pincode', 'google_code', 'verification-code', 'verificationcode', 'tfa', 'tfacode', 'second-factor', 'one time password', 'code']
username: { include: ['username', 'login', 'identifier', 'user'] },
fullName: { include: ['fullname', 'full-name', 'full name'] },
firstName: { include: ['firstname', 'first-name', 'first_name', 'fname', 'name', 'given-name'] },
lastName: { include: ['lastname', 'last-name', 'last_name', 'lname', 'surname', 'family-name'] },
email: { include: ['email', 'mail', 'emailaddress'] },
emailConfirm: { include: ['confirm', 'verification', 'repeat', 'retype', 'verify', 'email2'] },
password: { include: ['password', 'pwd', 'pass'] },
birthdate: { include: ['birthdate', 'birth-date', 'dob', 'date-of-birth'] },
gender: { include: ['gender', 'sex'] },
birthDateDay: { include: ['-day', 'birthdate_d', 'birthdayday', '_day', 'day'] },
birthDateMonth: { include: ['-month', 'birthdate_m', 'birthdaymonth', '_month', 'month'] },
birthDateYear: { include: ['-year', 'birthdate_y', 'birthdayyear', '_year', 'year'] },
totp: {
include: ['totp', 'otp', 'one-time', 'onetime', 'six-digit', 'digit-code', 'token', 'authenticator', 'authentication', '2fa', 'twofa', 'two-factor', 'mfa', 'security-code', 'auth-code', 'passcode', 'pin-code', 'pincode', 'google_code', 'verification-code', 'verificationcode', 'tfa', 'tfacode', 'second-factor', 'one time password', 'code'],
exclude: ['test']
}
};
/**
@@ -163,19 +182,19 @@ export const EnglishStopWords = new Set([
* Dutch field patterns used to detect Dutch form fields.
*/
export const DutchFieldPatterns: FieldPatterns = {
username: ['gebruikersnaam', 'gebruiker', 'login', 'identifier'],
fullName: ['volledige naam'],
firstName: ['voornaam', 'naam'],
lastName: ['achternaam'],
email: ['e-mailadres', 'e-mail'],
emailConfirm: ['bevestig', 'herhaal', 'verificatie'],
password: ['wachtwoord', 'pwd'],
birthdate: ['geboortedatum', 'geboorte-datum'],
gender: ['geslacht', 'aanhef'],
birthDateDay: ['dag'],
birthDateMonth: ['maand'],
birthDateYear: ['jaar'],
totp: ['verificatiecode', 'eenmalig', 'authenticatie', 'tweefactor', 'beveiligingscode']
username: { include: ['gebruikersnaam', 'gebruiker', 'login', 'identifier'] },
fullName: { include: ['volledige naam'] },
firstName: { include: ['voornaam', 'naam'] },
lastName: { include: ['achternaam'] },
email: { include: ['e-mailadres', 'e-mail'] },
emailConfirm: { include: ['bevestig', 'herhaal', 'verificatie'] },
password: { include: ['wachtwoord', 'pwd'] },
birthdate: { include: ['geboortedatum', 'geboorte-datum'] },
gender: { include: ['geslacht', 'aanhef'] },
birthDateDay: { include: ['dag'] },
birthDateMonth: { include: ['maand'] },
birthDateYear: { include: ['jaar'] },
totp: { include: ['verificatiecode', 'eenmalig', 'authenticatie', 'tweefactor', 'beveiligingscode'] }
};
/**
@@ -271,6 +290,16 @@ export const DutchStopWords = new Set([
*/
import { TranslationEmailPatterns, TranslationUsernamePatterns, TranslationPasswordPatterns } from './TranslationPatterns';
/**
* Merge per-field-type entries (include + optional exclude) from one or more
* languages, deduping each list while preserving original order.
*/
function mergeEntries(...entries: FieldPatternEntry[]): FieldPatternEntry {
const include = [...new Set(entries.flatMap(e => e.include))];
const exclude = [...new Set(entries.flatMap(e => e.exclude ?? []))];
return exclude.length > 0 ? { include, exclude } : { include };
}
/**
* Combined field patterns which includes all supported languages.
* This includes:
@@ -278,10 +307,10 @@ import { TranslationEmailPatterns, TranslationUsernamePatterns, TranslationPassw
* - Translation-based patterns from all supported languages
*/
export const CombinedFieldPatterns: FieldPatterns = {
username: [...new Set([...EnglishFieldPatterns.username, ...DutchFieldPatterns.username, ...TranslationUsernamePatterns])],
fullName: [...new Set([...EnglishFieldPatterns.fullName, ...DutchFieldPatterns.fullName])],
firstName: [...new Set([...EnglishFieldPatterns.firstName, ...DutchFieldPatterns.firstName])],
lastName: [...new Set([...EnglishFieldPatterns.lastName, ...DutchFieldPatterns.lastName])],
username: mergeEntries(EnglishFieldPatterns.username, DutchFieldPatterns.username, { include: TranslationUsernamePatterns }),
fullName: mergeEntries(EnglishFieldPatterns.fullName, DutchFieldPatterns.fullName),
firstName: mergeEntries(EnglishFieldPatterns.firstName, DutchFieldPatterns.firstName),
lastName: mergeEntries(EnglishFieldPatterns.lastName, DutchFieldPatterns.lastName),
/**
* NOTE: Dutch email patterns should be prioritized over English email patterns due to how
* the nl-registration-form5.html honeypot field is named. The order of the patterns
@@ -290,15 +319,15 @@ export const CombinedFieldPatterns: FieldPatterns = {
*
* Translation patterns are added last to catch all language variations (e.g., "E-post" in Swedish)
*/
email: [...new Set([...DutchFieldPatterns.email, ...EnglishFieldPatterns.email, ...TranslationEmailPatterns])],
emailConfirm: [...new Set([...EnglishFieldPatterns.emailConfirm, ...DutchFieldPatterns.emailConfirm])],
password: [...new Set([...EnglishFieldPatterns.password, ...DutchFieldPatterns.password, ...TranslationPasswordPatterns])],
birthdate: [...new Set([...EnglishFieldPatterns.birthdate, ...DutchFieldPatterns.birthdate])],
gender: [...new Set([...EnglishFieldPatterns.gender, ...DutchFieldPatterns.gender])],
birthDateDay: [...new Set([...EnglishFieldPatterns.birthDateDay, ...DutchFieldPatterns.birthDateDay])],
birthDateMonth: [...new Set([...EnglishFieldPatterns.birthDateMonth, ...DutchFieldPatterns.birthDateMonth])],
birthDateYear: [...new Set([...EnglishFieldPatterns.birthDateYear, ...DutchFieldPatterns.birthDateYear])],
totp: [...new Set([...EnglishFieldPatterns.totp, ...DutchFieldPatterns.totp])]
email: mergeEntries(DutchFieldPatterns.email, EnglishFieldPatterns.email, { include: TranslationEmailPatterns }),
emailConfirm: mergeEntries(EnglishFieldPatterns.emailConfirm, DutchFieldPatterns.emailConfirm),
password: mergeEntries(EnglishFieldPatterns.password, DutchFieldPatterns.password, { include: TranslationPasswordPatterns }),
birthdate: mergeEntries(EnglishFieldPatterns.birthdate, DutchFieldPatterns.birthdate),
gender: mergeEntries(EnglishFieldPatterns.gender, DutchFieldPatterns.gender),
birthDateDay: mergeEntries(EnglishFieldPatterns.birthDateDay, DutchFieldPatterns.birthDateDay),
birthDateMonth: mergeEntries(EnglishFieldPatterns.birthDateMonth, DutchFieldPatterns.birthDateMonth),
birthDateYear: mergeEntries(EnglishFieldPatterns.birthDateYear, DutchFieldPatterns.birthDateYear),
totp: mergeEntries(EnglishFieldPatterns.totp, DutchFieldPatterns.totp)
};
/**

View File

@@ -1,4 +1,4 @@
import { CombinedEmailVerificationPatterns, CombinedFieldExclusionPatterns, CombinedFieldPatterns, CombinedGenderOptionPatterns, CombinedStopWords } from "./FieldPatterns";
import { CombinedEmailVerificationPatterns, CombinedFieldExclusionPatterns, CombinedFieldPatterns, CombinedGenderOptionPatterns, CombinedStopWords, FieldPatternEntry } from "./FieldPatterns";
import { DetectedFieldType, FormFields } from "./types/FormFields";
/**
@@ -284,6 +284,43 @@ export class FormDetector {
return false;
}
/**
* Check if an input matches a field-type-specific exclusion pattern (e.g. TOTP "test").
* Whole-word matching is used so a pattern like "test" doesn't reject "latest".
*/
private matchesEntryExclude(input: HTMLInputElement, entry: FieldPatternEntry): boolean {
if (!entry.exclude || entry.exclude.length === 0) {
return false;
}
const attributesToCheck = [
input.id,
input.getAttribute('name'),
input.getAttribute('placeholder'),
input.getAttribute('class'),
input.getAttribute('aria-label')
]
.map(a => a?.toLowerCase() ?? '')
.filter(a => a.length > 0);
if (input.id || input.getAttribute('name')) {
const label = this.document.querySelector(`label[for="${input.id || input.getAttribute('name')}"]`);
if (label) {
attributesToCheck.push(label.textContent?.toLowerCase() ?? '');
}
}
for (const attr of attributesToCheck) {
for (const pattern of entry.exclude) {
if (this.matchesWordBoundary(attr, pattern)) {
return true;
}
}
}
return false;
}
/**
* Check if an input field is likely a fake/honeypot field used to prevent autofill.
* These fields are intentionally hidden from users but present in the DOM.
@@ -578,11 +615,12 @@ export class FormDetector {
*/
private findAllInputFields(
form: HTMLFormElement | null,
patterns: string[],
entry: FieldPatternEntry,
types: string[],
excludeElements: HTMLInputElement[] = [],
checkVisibility: boolean = true
): HTMLInputElement[] {
const patterns = entry.include;
// Query for standard input elements, select elements, and elements with type attributes
const standardCandidates = form
? Array.from(form.querySelectorAll<HTMLElement>('input, select, [type]'))
@@ -625,6 +663,14 @@ export class FormDetector {
continue;
}
/*
* Skip fields that match this field-type's own exclude list (e.g. "test"
* for TOTP guards "test-tokenfield" widgets from matching "token").
*/
if (this.matchesEntryExclude(input as HTMLInputElement, entry)) {
continue;
}
/*
* Skip fake/honeypot fields (e.g., fields with "fake" in name/id, tabindex="-1", etc.)
*/
@@ -689,17 +735,17 @@ export class FormDetector {
// Direct autocomplete matches take highest priority (score -2, higher than type=email at -1)
if (autocomplete) {
// Match autocomplete="username" for username patterns
if (patterns === CombinedFieldPatterns.username && autocomplete === 'username') {
if (entry === CombinedFieldPatterns.username && autocomplete === 'username') {
matches.push({ input: input as HTMLInputElement, score: -2 });
continue;
}
// Match autocomplete="email" for email patterns
if (patterns === CombinedFieldPatterns.email && autocomplete === 'email') {
if (entry === CombinedFieldPatterns.email && autocomplete === 'email') {
matches.push({ input: input as HTMLInputElement, score: -2 });
continue;
}
// Match autocomplete="current-password" or "new-password" for password patterns
if (patterns === CombinedFieldPatterns.password &&
if (entry === CombinedFieldPatterns.password &&
(autocomplete === 'current-password' || autocomplete === 'new-password')) {
matches.push({ input: input as HTMLInputElement, score: -2 });
continue;
@@ -713,7 +759,7 @@ export class FormDetector {
const ariaDescribedById = input.getAttribute('aria-describedby')?.toLowerCase() ?? '';
if (ariaDescribedById) {
// Match aria-describedby containing "username" for username patterns
if (patterns === CombinedFieldPatterns.username &&
if (entry === CombinedFieldPatterns.username &&
ariaDescribedById.includes('username')) {
matches.push({ input: input as HTMLInputElement, score: -2 });
continue;
@@ -855,12 +901,12 @@ export class FormDetector {
*/
private findInputField(
form: HTMLFormElement | null,
patterns: string[],
entry: FieldPatternEntry,
types: string[],
excludeElements: HTMLInputElement[] = [],
checkVisibility: boolean = true
): HTMLInputElement | null {
const all = this.findAllInputFields(form, patterns, types, excludeElements, checkVisibility);
const all = this.findAllInputFields(form, entry, types, excludeElements, checkVisibility);
// Filter out parent-child duplicates and fields overlapping with excludeElements
const filtered = this.filterOutNestedDuplicates(all, excludeElements);
@@ -922,10 +968,10 @@ export class FormDetector {
/*
* Check if label contains BOTH username and email patterns (dual-purpose field)
*/
const labelHasUsername = CombinedFieldPatterns.username.some(pattern =>
const labelHasUsername = CombinedFieldPatterns.username.include.some(pattern =>
labelText.includes(pattern)
);
const labelHasEmail = CombinedFieldPatterns.email.some(pattern =>
const labelHasEmail = CombinedFieldPatterns.email.include.some(pattern =>
labelText.includes(pattern)
);
@@ -935,10 +981,10 @@ export class FormDetector {
* 2. AND the field's name/id contains username pattern but NOT email pattern
*/
if (labelHasUsername && labelHasEmail) {
const hasUsernameInNameOrId = CombinedFieldPatterns.username.some(pattern =>
const hasUsernameInNameOrId = CombinedFieldPatterns.username.include.some(pattern =>
fieldAttributes.includes(pattern)
);
const hasEmailInNameOrId = CombinedFieldPatterns.email.some(pattern =>
const hasEmailInNameOrId = CombinedFieldPatterns.email.include.some(pattern =>
fieldAttributes.includes(pattern)
);
@@ -1356,6 +1402,14 @@ export class FormDetector {
continue;
}
/*
* Apply the TOTP entry's exclude list to the heuristic fallback too
* (e.g. don't classify a "test-tokenfield" widget as TOTP).
*/
if (this.matchesEntryExclude(input, CombinedFieldPatterns.totp)) {
continue;
}
// Check for autocomplete="one-time-code"
const autocomplete = input.getAttribute('autocomplete')?.toLowerCase() ?? '';
if (autocomplete === 'one-time-code') {

View File

@@ -48,6 +48,37 @@ describe('FormDetector - Field Exclusion Patterns', () => {
});
});
describe('Real-world scenario: Test/widget tokenfield containing "token"', () => {
const htmlFile = 'exclusion-test-tokenfield.html';
it('should not detect a "test-tokenfield" widget input as TOTP', () => {
const dom = createTestDom(htmlFile);
const document = dom.window.document;
const widgetInput = document.getElementById('active-input');
const formDetector = new FormDetector(document, widgetInput as HTMLElement);
/*
* The TOTP include pattern matches "token" via substring, which would otherwise
* hit the "test-tokenfield-input" class. The per-field exclude ('test') vetoes
* it, so this widget should not be classified as a 2FA field.
*/
expect(formDetector.containsLoginForm()).toBe(false);
expect(formDetector.getDetectedFieldType()).toBeNull();
});
it('should not detect the placeholder editor input as TOTP either', () => {
const dom = createTestDom(htmlFile);
const document = dom.window.document;
const placeholderInput = document.getElementById('placeholder-input');
const formDetector = new FormDetector(document, placeholderInput as HTMLElement);
expect(formDetector.containsLoginForm()).toBe(false);
expect(formDetector.getDetectedFieldType()).toBeNull();
});
});
describe('Exclusion patterns should not affect legitimate login fields', () => {
const htmlFile = 'exclusion-legitimate-login.html';

View File

@@ -0,0 +1,35 @@
<!DOCTYPE html>
<html>
<head>
<title>Test Tokenfield Widget</title>
</head>
<body>
<!--
Real-world widget where the host page uses class names containing the
substring "token" (e.g. "test-tokenfield"). The TOTP detector matches
"token" via substring inclusion, so without a per-field exclusion this
field gets misclassified as a 2FA code input.
-->
<div class="cell_editor" style="visibility: visible;">
<div class="default_editor">
<div class="test-widget-text-editor _grain1346_">
<div class="_grain1351_" style="max-width: 560px;">
<div tabindex="-1" class="_grain573_ _grain1347_" style="justify-content: left;">
<div class="_grain575_">
<input id="placeholder-input" type="text" autocomplete="new-password" class="test-tokenfield-input _grain576_">
</div>
<input type="text" tabindex="-1" class="_grain579_">
<div class="_grain1352_">s&ZeroWidthSpace;</div>
</div>
</div>
<div tabindex="-1" class="test-tokenfield _grain573_ _grain1347_" style="justify-content: left; width: 99px; height: 22px;">
<div class="_grain575_">
<input id="active-input" type="text" autocomplete="new-password" class="test-tokenfield-input _grain576_" style="width: 24px;">
</div>
<input type="text" tabindex="-1" class="_grain579_">
</div>
</div>
</div>
</div>
</body>
</html>