Files
Compass/common/src/util/matrix.ts
2025-10-09 17:59:10 +02:00

100 lines
3.6 KiB
TypeScript

import { max, sumBy } from 'lodash'
// each row has [column, value] pairs
type SparseMatrix = [number, number][][]
// Code originally from: https://github.com/johnpaulada/matrix-factorization-js/blob/master/src/matrix-factorization.js
// Used to implement recommendations through collaborative filtering: https://towardsdatascience.com/recommender-systems-matrix-factorization-using-pytorch-bd52f46aa199
// See also: https://en.wikipedia.org/wiki/Matrix_factorization_(recommender_systems)
/**
* Gets the factors of a sparse matrix
*
* @param TARGET_MATRIX target matrix, where each row specifies a subset of all columns.
* @param FEATURES Number of latent features
* @param ITERS Number of times to move towards the real factors
* @param LEARNING_RATE Learning rate
* @param REGULARIZATION_RATE Regularization amount, i.e. amount of bias reduction
* @returns An array containing the two factor matrices
*/
export function factorizeMatrix(
TARGET_MATRIX: SparseMatrix,
FEATURES = 5,
ITERS = 5000,
LEARNING_RATE = 0.0002,
REGULARIZATION_RATE = 0.02,
THRESHOLD = 0.001
) {
const initCell = () => (2 * Math.random()) / FEATURES
const m = TARGET_MATRIX.length
const n = (max(TARGET_MATRIX.flatMap((r) => r.map(([j]) => j))) ?? -1) + 1
const points = sumBy(TARGET_MATRIX, (r) => r.length)
const mFeatures = fillMatrix(m, FEATURES, initCell)
const nFeatures = fillMatrix(n, FEATURES, initCell)
console.debug('rows', m, 'columns', n, 'numPoints', points)
const updateFeature = (a: number, b: number, error: number) =>
a + LEARNING_RATE * (2 * error * b - REGULARIZATION_RATE * a)
const dotProduct = (i: number, j: number) => {
let result = 0
for (let k = 0; k < FEATURES; k++) {
result += mFeatures[i * FEATURES + k] * nFeatures[j * FEATURES + k]
}
return result
}
// Iteratively figure out correct factors.
for (let iter = 0; iter < ITERS; iter++) {
for (let i = 0; i < m; i++) {
for (const [j, targetValue] of TARGET_MATRIX[i]) {
// to approximate the value for target_ij, we take the dot product of the features for m[i] and n[j]
const error = targetValue - dotProduct(i, j)
// update factor matrices
for (let k = 0; k < FEATURES; k++) {
const a = mFeatures[i * FEATURES + k]
const b = nFeatures[j * FEATURES + k]
mFeatures[i * FEATURES + k] = updateFeature(a, b, error)
nFeatures[j * FEATURES + k] = updateFeature(b, a, error)
}
}
}
if (iter % 50 === 0 || iter === ITERS - 1) {
let totalError = 0
for (let i = 0; i < m; i++) {
for (const [j, targetValue] of TARGET_MATRIX[i]) {
// add up squared error of current approximated value
totalError += (targetValue - dotProduct(i, j)) ** 2
// mqp: idk what this part of the error means lol
for (let k = 0; k < FEATURES; k++) {
const a = mFeatures[i * FEATURES + k]
const b = nFeatures[j * FEATURES + k]
totalError += (REGULARIZATION_RATE / 2) * (a ** 2 + b ** 2)
}
}
}
console.debug(iter, 'error', totalError / points)
// Complete factorization process if total error falls below a certain threshold
if (totalError / points < THRESHOLD) break
}
}
return [mFeatures, nFeatures, dotProduct] as const
}
/**
* Creates an m x n matrix filled with the result of given fill function.
*/
function fillMatrix(m: number, n: number, fill: () => number) {
const matrix = new Float64Array(m * n)
for (let i = 0; i < m; i++) {
for (let j = 0; j < n; j++) {
matrix[i * n + j] = fill()
}
}
return matrix
}