libobs/graphics: Optimize certain matrix4 functions

Optimizes certain 4x4 matrix functions by minimizing unnecessary math
operations. This ends up optimizing gs_matrix_transform() and
gs_matrix_rotate() functions and makes positional/rotational operations
a bit more optimal and requiring much less math than they would
otherwise.
This commit is contained in:
Lain
2025-03-17 21:54:01 -07:00
committed by Ryan Foster
parent 9fa3b045a3
commit d25e2c18e4

View File

@@ -59,20 +59,57 @@ void matrix4_from_axisang(struct matrix4 *dst, const struct axisang *aa)
void matrix4_mul(struct matrix4 *dst, const struct matrix4 *m1, const struct matrix4 *m2)
{
const struct vec4 *m1v = (const struct vec4 *)m1;
const float *m2f = (const float *)m2;
struct vec4 out[4];
int i, j;
struct matrix4 transposed;
struct matrix4 out;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
struct vec4 temp;
vec4_set(&temp, m2f[j], m2f[j + 4], m2f[j + 8], m2f[j + 12]);
out[i].ptr[j] = vec4_dot(&m1v[i], &temp);
}
}
matrix4_transpose(&transposed, m2);
matrix4_copy(dst, (struct matrix4 *)out);
out.x.x = vec4_dot(&m1->x, &transposed.x);
out.x.y = vec4_dot(&m1->x, &transposed.y);
out.x.z = vec4_dot(&m1->x, &transposed.z);
out.x.w = vec4_dot(&m1->x, &transposed.t);
out.y.x = vec4_dot(&m1->y, &transposed.x);
out.y.y = vec4_dot(&m1->y, &transposed.y);
out.y.z = vec4_dot(&m1->y, &transposed.z);
out.y.w = vec4_dot(&m1->y, &transposed.t);
out.z.x = vec4_dot(&m1->z, &transposed.x);
out.z.y = vec4_dot(&m1->z, &transposed.y);
out.z.z = vec4_dot(&m1->z, &transposed.z);
out.z.w = vec4_dot(&m1->z, &transposed.t);
out.t.x = vec4_dot(&m1->t, &transposed.x);
out.t.y = vec4_dot(&m1->t, &transposed.y);
out.t.z = vec4_dot(&m1->t, &transposed.z);
out.t.w = vec4_dot(&m1->t, &transposed.t);
matrix4_copy(dst, &out);
}
void matrix4_mul_4x3_only(struct matrix4 *dst, const struct matrix4 *m1, const struct matrix4 *m2)
{
struct matrix4 transposed;
struct vec4 x;
struct vec4 y;
struct vec4 z;
matrix4_transpose(&transposed, m2);
x.x = vec4_dot(&m1->x, &transposed.x);
x.y = vec4_dot(&m1->x, &transposed.y);
x.z = vec4_dot(&m1->x, &transposed.z);
x.w = vec4_dot(&m1->x, &transposed.t);
y.x = vec4_dot(&m1->y, &transposed.x);
y.y = vec4_dot(&m1->y, &transposed.y);
y.z = vec4_dot(&m1->y, &transposed.z);
y.w = vec4_dot(&m1->y, &transposed.t);
z.x = vec4_dot(&m1->z, &transposed.x);
z.y = vec4_dot(&m1->z, &transposed.y);
z.z = vec4_dot(&m1->z, &transposed.z);
z.w = vec4_dot(&m1->z, &transposed.t);
vec4_copy(&dst->x, &x);
vec4_copy(&dst->y, &y);
vec4_copy(&dst->z, &z);
vec4_copy(&dst->t, &m2->t);
}
static inline void get_3x3_submatrix(float *dst, const struct matrix4 *m, int i, int j)
@@ -172,38 +209,50 @@ void matrix4_scale(struct matrix4 *dst, const struct matrix4 *m, const struct ve
void matrix4_translate3v_i(struct matrix4 *dst, const struct vec3 *v, const struct matrix4 *m)
{
struct matrix4 temp;
vec4_set(&temp.x, 1.0f, 0.0f, 0.0f, 0.0f);
vec4_set(&temp.y, 0.0f, 1.0f, 0.0f, 0.0f);
vec4_set(&temp.z, 0.0f, 0.0f, 1.0f, 0.0f);
vec4_from_vec3(&temp.t, v);
struct matrix4 transposed;
struct vec4 v4;
struct vec4 t;
matrix4_mul(dst, &temp, m);
vec4_from_vec3(&v4, v);
matrix4_transpose(&transposed, m);
t.x = vec4_dot(&v4, &transposed.x);
t.y = vec4_dot(&v4, &transposed.y);
t.z = vec4_dot(&v4, &transposed.z);
t.w = vec4_dot(&v4, &transposed.t);
vec4_copy(&dst->x, &m->x);
vec4_copy(&dst->y, &m->y);
vec4_copy(&dst->z, &m->z);
vec4_copy(&dst->t, &t);
}
void matrix4_translate4v_i(struct matrix4 *dst, const struct vec4 *v, const struct matrix4 *m)
{
struct matrix4 temp;
vec4_set(&temp.x, 1.0f, 0.0f, 0.0f, 0.0f);
vec4_set(&temp.y, 0.0f, 1.0f, 0.0f, 0.0f);
vec4_set(&temp.z, 0.0f, 0.0f, 1.0f, 0.0f);
vec4_copy(&temp.t, v);
struct matrix4 transposed;
struct vec4 t;
matrix4_mul(dst, &temp, m);
matrix4_transpose(&transposed, m);
t.x = vec4_dot(v, &transposed.x);
t.y = vec4_dot(v, &transposed.y);
t.z = vec4_dot(v, &transposed.z);
t.w = vec4_dot(v, &transposed.t);
vec4_copy(&dst->x, &m->x);
vec4_copy(&dst->y, &m->y);
vec4_copy(&dst->z, &m->z);
vec4_copy(&dst->t, &t);
}
void matrix4_rotate_i(struct matrix4 *dst, const struct quat *q, const struct matrix4 *m)
{
struct matrix4 temp;
matrix4_from_quat(&temp, q);
matrix4_mul(dst, &temp, m);
matrix4_mul_4x3_only(dst, &temp, m);
}
void matrix4_rotate_aa_i(struct matrix4 *dst, const struct axisang *aa, const struct matrix4 *m)
{
struct matrix4 temp;
matrix4_from_axisang(&temp, aa);
matrix4_mul(dst, &temp, m);
matrix4_mul_4x3_only(dst, &temp, m);
}
void matrix4_scale_i(struct matrix4 *dst, const struct vec3 *v, const struct matrix4 *m)
@@ -213,7 +262,7 @@ void matrix4_scale_i(struct matrix4 *dst, const struct vec3 *v, const struct mat
vec4_set(&temp.y, 0.0f, v->y, 0.0f, 0.0f);
vec4_set(&temp.z, 0.0f, 0.0f, v->z, 0.0f);
vec4_set(&temp.t, 0.0f, 0.0f, 0.0f, 1.0f);
matrix4_mul(dst, &temp, m);
matrix4_mul_4x3_only(dst, &temp, m);
}
bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m)