simd, sse: reduce some computation at glm_mul_rot_sse2()

This commit is contained in:
Recep Aslantas
2021-05-01 23:16:03 +03:00
parent e1b142bce7
commit 28705be5a3
2 changed files with 3 additions and 7 deletions

View File

@@ -55,30 +55,26 @@ void
glm_mul_rot_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */
glmm_128 l, r0, r1, r2, r3, v0, v1, v2, v3;
glmm_128 l, r0, r1, r2, v0, v1, v2;
l = glmm_load(m1[0]);
r0 = glmm_load(m2[0]);
r1 = glmm_load(m2[1]);
r2 = glmm_load(m2[2]);
r3 = glmm_load(m2[3]);
v0 = _mm_mul_ps(glmm_splat_x(r0), l);
v1 = _mm_mul_ps(glmm_splat_x(r1), l);
v2 = _mm_mul_ps(glmm_splat_x(r2), l);
v3 = _mm_mul_ps(glmm_splat_x(r3), l);
l = glmm_load(m1[1]);
v0 = glmm_fmadd(glmm_splat_y(r0), l, v0);
v1 = glmm_fmadd(glmm_splat_y(r1), l, v1);
v2 = glmm_fmadd(glmm_splat_y(r2), l, v2);
v3 = glmm_fmadd(glmm_splat_y(r3), l, v3);
l = glmm_load(m1[2]);
v0 = glmm_fmadd(glmm_splat_z(r0), l, v0);
v1 = glmm_fmadd(glmm_splat_z(r1), l, v1);
v2 = glmm_fmadd(glmm_splat_z(r2), l, v2);
v3 = glmm_fmadd(glmm_splat_z(r3), l, v3);
glmm_store(dest[0], v0);
glmm_store(dest[1], v1);

View File

@@ -67,9 +67,9 @@ glm_mat3_mul_sse2(mat3 m1, mat3 m2, mat3 dest) {
x3 = _mm_movelh_ps(x9, r2); /* 0.f b22 b21 b20 */
x2 = glmm_vdots(x2, x3);
_mm_storeu_ps(dest[0], x0);
_mm_storeu_ps(&dest[0][0], x0);
_mm_storeu_ps(&dest[1][1], x1);
_mm_store_ss(&dest[2][2], x2);
_mm_store_ss (&dest[2][2], x2);
}
#endif