From d25e2c18e48d2d6af11adbed119e744e17ec2976 Mon Sep 17 00:00:00 2001 From: Lain Date: Mon, 17 Mar 2025 21:54:01 -0700 Subject: [PATCH] libobs/graphics: Optimize certain matrix4 functions Optimizes certain 4x4 matrix functions by minimizing unnecessary math operations. This ends up optimizing gs_matrix_transform() and gs_matrix_rotate() functions and makes positional/rotational operations a bit more optimal and requiring much less math than they would otherwise. --- libobs/graphics/matrix4.c | 103 ++++++++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 27 deletions(-) diff --git a/libobs/graphics/matrix4.c b/libobs/graphics/matrix4.c index 8168ecbce..33476d26b 100644 --- a/libobs/graphics/matrix4.c +++ b/libobs/graphics/matrix4.c @@ -59,20 +59,57 @@ void matrix4_from_axisang(struct matrix4 *dst, const struct axisang *aa) void matrix4_mul(struct matrix4 *dst, const struct matrix4 *m1, const struct matrix4 *m2) { - const struct vec4 *m1v = (const struct vec4 *)m1; - const float *m2f = (const float *)m2; - struct vec4 out[4]; - int i, j; + struct matrix4 transposed; + struct matrix4 out; - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - struct vec4 temp; - vec4_set(&temp, m2f[j], m2f[j + 4], m2f[j + 8], m2f[j + 12]); - out[i].ptr[j] = vec4_dot(&m1v[i], &temp); - } - } + matrix4_transpose(&transposed, m2); - matrix4_copy(dst, (struct matrix4 *)out); + out.x.x = vec4_dot(&m1->x, &transposed.x); + out.x.y = vec4_dot(&m1->x, &transposed.y); + out.x.z = vec4_dot(&m1->x, &transposed.z); + out.x.w = vec4_dot(&m1->x, &transposed.t); + out.y.x = vec4_dot(&m1->y, &transposed.x); + out.y.y = vec4_dot(&m1->y, &transposed.y); + out.y.z = vec4_dot(&m1->y, &transposed.z); + out.y.w = vec4_dot(&m1->y, &transposed.t); + out.z.x = vec4_dot(&m1->z, &transposed.x); + out.z.y = vec4_dot(&m1->z, &transposed.y); + out.z.z = vec4_dot(&m1->z, &transposed.z); + out.z.w = vec4_dot(&m1->z, &transposed.t); + out.t.x = vec4_dot(&m1->t, &transposed.x); + out.t.y = vec4_dot(&m1->t, &transposed.y); + out.t.z = vec4_dot(&m1->t, &transposed.z); + out.t.w = vec4_dot(&m1->t, &transposed.t); + + matrix4_copy(dst, &out); +} + +void matrix4_mul_4x3_only(struct matrix4 *dst, const struct matrix4 *m1, const struct matrix4 *m2) +{ + struct matrix4 transposed; + struct vec4 x; + struct vec4 y; + struct vec4 z; + + matrix4_transpose(&transposed, m2); + + x.x = vec4_dot(&m1->x, &transposed.x); + x.y = vec4_dot(&m1->x, &transposed.y); + x.z = vec4_dot(&m1->x, &transposed.z); + x.w = vec4_dot(&m1->x, &transposed.t); + y.x = vec4_dot(&m1->y, &transposed.x); + y.y = vec4_dot(&m1->y, &transposed.y); + y.z = vec4_dot(&m1->y, &transposed.z); + y.w = vec4_dot(&m1->y, &transposed.t); + z.x = vec4_dot(&m1->z, &transposed.x); + z.y = vec4_dot(&m1->z, &transposed.y); + z.z = vec4_dot(&m1->z, &transposed.z); + z.w = vec4_dot(&m1->z, &transposed.t); + + vec4_copy(&dst->x, &x); + vec4_copy(&dst->y, &y); + vec4_copy(&dst->z, &z); + vec4_copy(&dst->t, &m2->t); } static inline void get_3x3_submatrix(float *dst, const struct matrix4 *m, int i, int j) @@ -172,38 +209,50 @@ void matrix4_scale(struct matrix4 *dst, const struct matrix4 *m, const struct ve void matrix4_translate3v_i(struct matrix4 *dst, const struct vec3 *v, const struct matrix4 *m) { - struct matrix4 temp; - vec4_set(&temp.x, 1.0f, 0.0f, 0.0f, 0.0f); - vec4_set(&temp.y, 0.0f, 1.0f, 0.0f, 0.0f); - vec4_set(&temp.z, 0.0f, 0.0f, 1.0f, 0.0f); - vec4_from_vec3(&temp.t, v); + struct matrix4 transposed; + struct vec4 v4; + struct vec4 t; - matrix4_mul(dst, &temp, m); + vec4_from_vec3(&v4, v); + matrix4_transpose(&transposed, m); + t.x = vec4_dot(&v4, &transposed.x); + t.y = vec4_dot(&v4, &transposed.y); + t.z = vec4_dot(&v4, &transposed.z); + t.w = vec4_dot(&v4, &transposed.t); + vec4_copy(&dst->x, &m->x); + vec4_copy(&dst->y, &m->y); + vec4_copy(&dst->z, &m->z); + vec4_copy(&dst->t, &t); } void matrix4_translate4v_i(struct matrix4 *dst, const struct vec4 *v, const struct matrix4 *m) { - struct matrix4 temp; - vec4_set(&temp.x, 1.0f, 0.0f, 0.0f, 0.0f); - vec4_set(&temp.y, 0.0f, 1.0f, 0.0f, 0.0f); - vec4_set(&temp.z, 0.0f, 0.0f, 1.0f, 0.0f); - vec4_copy(&temp.t, v); + struct matrix4 transposed; + struct vec4 t; - matrix4_mul(dst, &temp, m); + matrix4_transpose(&transposed, m); + t.x = vec4_dot(v, &transposed.x); + t.y = vec4_dot(v, &transposed.y); + t.z = vec4_dot(v, &transposed.z); + t.w = vec4_dot(v, &transposed.t); + vec4_copy(&dst->x, &m->x); + vec4_copy(&dst->y, &m->y); + vec4_copy(&dst->z, &m->z); + vec4_copy(&dst->t, &t); } void matrix4_rotate_i(struct matrix4 *dst, const struct quat *q, const struct matrix4 *m) { struct matrix4 temp; matrix4_from_quat(&temp, q); - matrix4_mul(dst, &temp, m); + matrix4_mul_4x3_only(dst, &temp, m); } void matrix4_rotate_aa_i(struct matrix4 *dst, const struct axisang *aa, const struct matrix4 *m) { struct matrix4 temp; matrix4_from_axisang(&temp, aa); - matrix4_mul(dst, &temp, m); + matrix4_mul_4x3_only(dst, &temp, m); } void matrix4_scale_i(struct matrix4 *dst, const struct vec3 *v, const struct matrix4 *m) @@ -213,7 +262,7 @@ void matrix4_scale_i(struct matrix4 *dst, const struct vec3 *v, const struct mat vec4_set(&temp.y, 0.0f, v->y, 0.0f, 0.0f); vec4_set(&temp.z, 0.0f, 0.0f, v->z, 0.0f); vec4_set(&temp.t, 0.0f, 0.0f, 0.0f, 1.0f); - matrix4_mul(dst, &temp, m); + matrix4_mul_4x3_only(dst, &temp, m); } bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m)