mirror of
https://github.com/recp/cglm.git
synced 2025-12-24 20:34:58 +00:00
opitimize mat3 operations with fma
This commit is contained in:
@@ -30,24 +30,17 @@ glm_mat3_mul_sse2(mat3 m1, mat3 m2, mat3 dest) {
|
||||
x1 = glmm_shuff2(l0, l1, 1, 0, 3, 3, 0, 3, 2, 0);
|
||||
x2 = glmm_shuff2(l1, l2, 0, 0, 3, 2, 0, 2, 1, 0);
|
||||
|
||||
x0 = _mm_add_ps(_mm_mul_ps(glmm_shuff1(l0, 0, 2, 1, 0),
|
||||
glmm_shuff1(r0, 3, 0, 0, 0)),
|
||||
_mm_mul_ps(x1, glmm_shuff2(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0)));
|
||||
|
||||
x0 = _mm_add_ps(x0,
|
||||
_mm_mul_ps(x2, glmm_shuff2(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0)));
|
||||
x0 = glmm_fmadd(glmm_shuff1(l0, 0, 2, 1, 0), glmm_shuff1(r0, 3, 0, 0, 0),
|
||||
glmm_fmadd(x1, glmm_shuff2(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0),
|
||||
_mm_mul_ps(x2, glmm_shuff2(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0))));
|
||||
|
||||
_mm_storeu_ps(dest[0], x0);
|
||||
|
||||
x0 = _mm_add_ps(_mm_mul_ps(glmm_shuff1(l0, 1, 0, 2, 1),
|
||||
_mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 2, 3, 3))),
|
||||
_mm_mul_ps(glmm_shuff1(x1, 1, 0, 2, 1),
|
||||
glmm_shuff1(r1, 3, 3, 0, 0)));
|
||||
|
||||
x0 = _mm_add_ps(x0,
|
||||
_mm_mul_ps(glmm_shuff1(x2, 1, 0, 2, 1),
|
||||
_mm_shuffle_ps(r1, r2, _MM_SHUFFLE(0, 0, 1, 1))));
|
||||
|
||||
x0 = glmm_fmadd(glmm_shuff1(l0, 1, 0, 2, 1), _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 2, 3, 3)),
|
||||
glmm_fmadd(glmm_shuff1(x1, 1, 0, 2, 1), glmm_shuff1(r1, 3, 3, 0, 0),
|
||||
_mm_mul_ps(glmm_shuff1(x2, 1, 0, 2, 1),
|
||||
_mm_shuffle_ps(r1, r2, _MM_SHUFFLE(0, 0, 1, 1)))));
|
||||
|
||||
_mm_storeu_ps(&dest[1][1], x0);
|
||||
|
||||
dest[2][2] = m1[0][2] * m2[2][0]
|
||||
|
||||
Reference in New Issue
Block a user