diff --git a/include/cglm-mat-simd.h b/include/cglm-mat-simd.h index 65e9f4d..3ca9cc6 100644 --- a/include/cglm-mat-simd.h +++ b/include/cglm-mat-simd.h @@ -28,4 +28,24 @@ _mm_store_ps(D + 12, _mm_madd4_ps(L + 12, r0, r1, r2, r3)); \ } while (0) +#define CGLM_MAT_TRANSP_SSE_4x4f(M, D) \ + do { \ + __m128 r0; \ + __m128 r1; \ + __m128 r2; \ + __m128 r3; \ + \ + r0 = _mm_load_ps(M[0]); \ + r1 = _mm_load_ps(M[1]); \ + r2 = _mm_load_ps(M[2]); \ + r3 = _mm_load_ps(M[3]); \ + \ + _MM_TRANSPOSE4_PS(r0, r1, r2, r3); \ + \ + _mm_store_ps(D[0], r0); \ + _mm_store_ps(D[1], r1); \ + _mm_store_ps(D[2], r2); \ + _mm_store_ps(D[3], r3); \ + } while (0) + #endif /* cglm_mat_sse_h */ diff --git a/include/cglm-mat.h b/include/cglm-mat.h index aad0a29..81ce791 100644 --- a/include/cglm-mat.h +++ b/include/cglm-mat.h @@ -82,4 +82,46 @@ glm_mat_mul4N(mat4 * __restrict matrices[], int len, mat4 dest) { dest); } +CGLM_INLINE +void +glm_mat_transpose(mat4 m, mat4 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + CGLM_MAT_TRANSP_SSE_4x4f(m, dest); +#else + dest[0][0] = m[0][0]; + dest[0][1] = m[1][0]; + dest[0][2] = m[2][0]; + dest[0][3] = m[3][0]; + + dest[1][0] = m[0][1]; + dest[1][1] = m[1][1]; + dest[1][2] = m[2][1]; + dest[1][3] = m[3][1]; + + dest[2][0] = m[0][2]; + dest[2][1] = m[1][2]; + dest[2][2] = m[2][2]; + dest[2][3] = m[3][2]; + + dest[3][0] = m[0][3]; + dest[3][1] = m[1][3]; + dest[3][2] = m[2][3]; + dest[3][3] = m[3][3]; +#endif +} + +CGLM_INLINE +void +glm_mat_transpose_self(mat4 m) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + CGLM_MAT_TRANSP_SSE_4x4f(m, m); +#else + mat4 d; + + glm_mat_transpose(m, d); + + glm__memcpy(m, d, sizeof(mat4)); +#endif +} + #endif /* cglm_mat_h */