Improve browser extension service name extractor (#798)

This commit is contained in:
Leendert de Borst
2025-04-15 12:56:55 +02:00
parent c5244b31ec
commit 5d3ad60dee
4 changed files with 184 additions and 143 deletions

View File

@@ -6,11 +6,11 @@ import { PasswordGenerator } from '../../utils/generators/Password/PasswordGener
import { storage } from "wxt/storage";
import { sendMessage } from "webext-bridge/content-script";
import { CredentialsResponse } from '@/utils/types/messaging/CredentialsResponse';
import { CombinedStopWords } from '../../utils/formDetector/FieldPatterns';
import { PasswordSettingsResponse } from '@/utils/types/messaging/PasswordSettingsResponse';
import SqliteClient from '../../utils/SqliteClient';
import { BaseIdentityGenerator } from '@/utils/generators/Identity/implementations/base/BaseIdentityGenerator';
import { StringResponse } from '@/utils/types/messaging/StringResponse';
import { FormDetector } from '@/utils/formDetector/FormDetector';
// TODO: store generic setting constants somewhere else.
export const DISABLED_SITES_KEY = 'local:aliasvault_disabled_sites';
@@ -211,7 +211,7 @@ export function createAutofillPopup(input: HTMLInputElement, credentials: Creden
e.stopPropagation();
e.stopImmediatePropagation();
const suggestedName = getSuggestedServiceName(document, window.location);
const suggestedName = FormDetector.getSuggestedServiceName(document, window.location);
const result = await createAliasCreationPopup(suggestedName, rootContainer);
if (!result) {
@@ -1299,57 +1299,6 @@ export async function dismissVaultLockedPopup(): Promise<void> {
}
}
/**
* Get a suggested service name from the page title and URL.
* Attempts to extract meaningful parts while maintaining original capitalization.
*/
function getSuggestedServiceName(document: Document, location: Location): string {
const title = document.title;
/**
* Filter out common words and keep meaningful parts of the title
*/
const getMeaningfulTitleParts = (title: string): string[] => {
return title
.toLowerCase()
.split(/[\s|\-—/\\]+/) // Split on spaces and common dividers
.filter(word =>
word.length > 1 && // Filter out single characters
!CombinedStopWords.has(word.toLowerCase()) // Filter out common words
);
};
/**
* Get original case version of meaningful words
*/
const getOriginalCase = (text: string, meaningfulParts: string[]): string => {
return text
.split(/[\s|\-—/\\]+/)
.filter(word => meaningfulParts.includes(word.toLowerCase()))
.join(' ');
};
// First try to extract meaningful parts after the last divider
const dividerRegex = /[|\-—/\\][^|\-—/\\]*$/;
const dividerMatch = dividerRegex.exec(title);
if (dividerMatch) {
const meaningfulParts = getMeaningfulTitleParts(dividerMatch[0]);
if (meaningfulParts.length > 0) {
return getOriginalCase(dividerMatch[0].trim(), meaningfulParts);
}
}
// If no meaningful parts found after divider, try the full title
const meaningfulParts = getMeaningfulTitleParts(title);
if (meaningfulParts.length > 0) {
return getOriginalCase(title, meaningfulParts);
}
// Fall back to domain name if no meaningful parts found
const domainParts = location.hostname.replace(/^www\./, '').split('.');
return domainParts.slice(-2).join('.');
}
/**
* Get a valid service URL from the current page.
*/

View File

@@ -86,7 +86,7 @@ export const EnglishStopWords = new Set([
// Marketing/Promotional
'free', 'create', 'new', 'your', 'special', 'offer',
'deal', 'discount', 'promotion',
'deal', 'discount', 'promotion', 'newsletter',
// Common website sections
'help', 'support', 'contact', 'about', 'faq', 'terms',
@@ -102,7 +102,10 @@ export const EnglishStopWords = new Set([
'gateway', 'api', 'interface', 'console',
// Time-related
'today', 'now', 'current', 'latest', 'newest', 'recent'
'today', 'now', 'current', 'latest', 'newest', 'recent',
// General
'the', 'and', 'or', 'but', 'to', 'up'
]);
/**
@@ -174,7 +177,10 @@ export const DutchStopWords = new Set([
'interface', 'console',
// Time-related
'vandaag', 'nu', 'huidig', 'recent', 'nieuwste'
'vandaag', 'nu', 'huidig', 'recent', 'nieuwste',
// General
'je', 'in', 'op', 'de'
]);
/**

View File

@@ -1,5 +1,5 @@
import { FormFields } from "./types/FormFields";
import { CombinedFieldPatterns, CombinedGenderOptionPatterns } from "./FieldPatterns";
import { CombinedFieldPatterns, CombinedGenderOptionPatterns, CombinedStopWords } from "./FieldPatterns";
/**
* Form detector.
@@ -18,85 +18,6 @@ export class FormDetector {
this.visibilityCache = new Map();
}
/**
* Check if an element and all its parents are visible.
* This checks for display:none, visibility:hidden, and opacity:0
* Uses a cache to avoid redundant checks of the same elements.
*/
private isElementVisible(element: HTMLElement | null): boolean {
if (!element) {
return false;
}
// Check cache first
if (this.visibilityCache.has(element)) {
return this.visibilityCache.get(element)!;
}
let current: HTMLElement | null = element;
while (current) {
try {
const style = this.document.defaultView?.getComputedStyle(current);
if (!style) {
// Cache and return true for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, true);
parent = parent.parentElement;
}
return true;
}
// Check for display:none
if (style.display === 'none') {
// Cache and return false for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, false);
parent = parent.parentElement;
}
return false;
}
// Check for visibility:hidden
if (style.visibility === 'hidden') {
// Cache and return false for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, false);
parent = parent.parentElement;
}
return false;
}
// Check for opacity:0
if (parseFloat(style.opacity) === 0) {
// Cache and return false for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, false);
parent = parent.parentElement;
}
return false;
}
} catch {
// If we can't get computed style, cache and return true for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, true);
parent = parent.parentElement;
}
return true;
}
current = current.parentElement;
}
// Cache and return true for the original element
this.visibilityCache.set(element, true);
return true;
}
/**
* Detect login forms on the page based on the clicked element.
*/
@@ -132,6 +53,171 @@ export class FormDetector {
return this.detectFormFields(formWrapper);
}
/**
* Get a suggested service name from the page title and URL.
* Attempts to extract meaningful parts while maintaining original capitalization.
*/
public static getSuggestedServiceName(document: Document, location: Location): string {
const title = document.title;
const maxWords = 4;
const maxLength = 50;
/**
* We apply a limit to the length and word count of the title to prevent
* the service name from being too long or containing too many words which
* is not likely to be a good service name.
*/
const validLength = (text: string): boolean => {
const validLength = text.length >= 3 && text.length <= maxLength;
const validWordCount = text.split(/[\s|\-—/\\]+/).length <= maxWords;
return validLength && validWordCount;
};
/**
* Filter out common words from prefix/suffix until no more matches found
*/
const getMeaningfulTitleParts = (title: string): string[] => {
const words = title.toLowerCase().split(' ').map(word => word.toLowerCase());
// Strip stopwords from start until no more matches
let startIndex = 0;
while (startIndex < words.length && CombinedStopWords.has(words[startIndex].toLowerCase())) {
startIndex++;
}
// Strip stopwords from end until no more matches
let endIndex = words.length - 1;
while (endIndex > startIndex && CombinedStopWords.has(words[endIndex].toLowerCase())) {
endIndex--;
}
// Return remaining words
return words.slice(startIndex, endIndex + 1);
};
/**
* Get original case version of meaningful words
*/
const getOriginalCase = (text: string, meaningfulParts: string[]): string => {
return text
.split(/[\s|]+/)
.filter(word => meaningfulParts.includes(word.toLowerCase()))
.join(' ');
};
// First try to extract meaningful parts based on the divider
const dividerRegex = /[|\-—/\\:]/;
const dividerMatch = dividerRegex.exec(title);
if (dividerMatch) {
const dividerIndex = dividerMatch.index;
const beforeDivider = title.substring(0, dividerIndex).trim();
const afterDivider = title.substring(dividerIndex + 1).trim();
// Count meaningful words on each side
const beforeWords = getMeaningfulTitleParts(beforeDivider);
const afterWords = getMeaningfulTitleParts(afterDivider);
// Choose the part with fewer meaningful words
const chosenPart = beforeWords.length <= afterWords.length ? beforeDivider : afterDivider;
const meaningfulParts = getMeaningfulTitleParts(chosenPart);
const serviceName = getOriginalCase(chosenPart, meaningfulParts);
if (validLength(serviceName)) {
return serviceName;
}
}
// If no meaningful parts found after divider, try the full title
const meaningfulParts = getMeaningfulTitleParts(title);
const serviceName = getOriginalCase(title, meaningfulParts);
if (validLength(serviceName)) {
return serviceName;
}
// Fall back to domain name if no meaningful parts found
const domainParts = location.hostname.replace(/^www\./, '').split('.');
return domainParts.slice(-2).join('.');
}
/**
* Check if an element and all its parents are visible.
* This checks for display:none, visibility:hidden, and opacity:0
* Uses a cache to avoid redundant checks of the same elements.
*/
private isElementVisible(element: HTMLElement | null): boolean {
if (!element) {
return false;
}
// Check cache first
if (this.visibilityCache.has(element)) {
return this.visibilityCache.get(element)!;
}
let current: HTMLElement | null = element;
while (current) {
try {
const style = this.document.defaultView?.getComputedStyle(current);
if (!style) {
// Cache and return true for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, true);
parent = parent.parentElement;
}
return true;
}
// Check for display:none
if (style.display === 'none') {
// Cache and return false for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, false);
parent = parent.parentElement;
}
return false;
}
// Check for visibility:hidden
if (style.visibility === 'hidden') {
// Cache and return false for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, false);
parent = parent.parentElement;
}
return false;
}
// Check for opacity:0
if (parseFloat(style.opacity) === 0) {
// Cache and return false for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, false);
parent = parent.parentElement;
}
return false;
}
} catch {
// If we can't get computed style, cache and return true for this element and all its parents
let parent: HTMLElement | null = current;
while (parent) {
this.visibilityCache.set(parent, true);
parent = parent.parentElement;
}
return true;
}
current = current.parentElement;
}
// Cache and return true for the original element
this.visibilityCache.set(element, true);
return true;
}
/**
* Find an input field based on common patterns in its attributes.
*/

View File

@@ -105,11 +105,11 @@ The following websites have been known to cause issues in the past (but should b
| Website | Reason |
| --- | --- |
| https://www.paprika-shopping.nl/nieuwsbrief/newsletter-register-landing.html | Popup CSS style conflicts |
| https://bloshing.com/inschrijven-nieuwsbrief | Popup CSS style conflicts |
| https://gamefaqs.gamespot.com/user | Popup buttons not working |
| https://news.ycombinator.com/login?goto=news | Popup and client favicon not showing due to SVG format |
| https://vault.bitwarden.com/#/login | Autofill password not detected (input not long enough), manually typing in works |
| https://login.microsoftonline.com/ | Password gets reset after autofill |
| https://mijn.ing.nl/login/ | Autofill doesn't detect input fields and AliasVault autofill icon placement is off |
| [Paprika Shopping](https://www.paprika-shopping.nl/nieuwsbrief/newsletter-register-landing.html) | Popup CSS style conflicts |
| [Bloshing](https://bloshing.com/inschrijven-nieuwsbrief) | Popup CSS style conflicts |
| [GameFAQs](https://gamefaqs.gamespot.com/user) | Popup buttons not working |
| [Hacker News](https://news.ycombinator.com/login?goto=news) | Popup and client favicon not showing due to SVG format |
| [Bitwarden](https://vault.bitwarden.com/#/login) | Autofill password not detected (input not long enough), manually typing in works |
| [Microsoft Online](https://login.microsoftonline.com/) | Password gets reset after autofill |
| [ING Bank](https://mijn.ing.nl/login/) | Autofill doesn't detect input fields and AliasVault autofill icon placement is off |