From 5e05eec6d6e04e044108d6f4ddf3e22ef9590790 Mon Sep 17 00:00:00 2001 From: myfreeer Date: Sat, 1 Apr 2023 19:03:48 +0800 Subject: [PATCH] simd128: inline _MM_TRANSPOSE4_PS --- include/cglm/simd/wasm.h | 16 ---------------- include/cglm/simd/wasm/affine.h | 11 ++++++++++- include/cglm/simd/wasm/mat4.h | 12 ++++++++++-- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/cglm/simd/wasm.h b/include/cglm/simd/wasm.h index 1567d0e..82bc4ae 100644 --- a/include/cglm/simd/wasm.h +++ b/include/cglm/simd/wasm.h @@ -34,22 +34,6 @@ _mm_movelh_ps(glmm_128 __a, glmm_128 __b) return wasm_i32x4_shuffle(__a, __b, 0, 1, 4, 5); } -#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - glmm_128 __row0 = (row0); \ - glmm_128 __row1 = (row1); \ - glmm_128 __row2 = (row2); \ - glmm_128 __row3 = (row3); \ - glmm_128 __tmp0 = wasm_i32x4_shuffle(__row0, __row1, 0, 4, 1, 5); \ - glmm_128 __tmp1 = wasm_i32x4_shuffle(__row0, __row1, 2, 6, 3, 7); \ - glmm_128 __tmp2 = wasm_i32x4_shuffle(__row2, __row3, 0, 4, 1, 5); \ - glmm_128 __tmp3 = wasm_i32x4_shuffle(__row2, __row3, 2, 6, 3, 7); \ - (row0) = _mm_movelh_ps(__tmp0, __tmp2); \ - (row1) = _mm_movehl_ps(__tmp2, __tmp0); \ - (row2) = _mm_movelh_ps(__tmp1, __tmp3); \ - (row3) = _mm_movehl_ps(__tmp3, __tmp1); \ - } while (0) - static inline glmm_128 glmm_abs(glmm_128 x) { diff --git a/include/cglm/simd/wasm/affine.h b/include/cglm/simd/wasm/affine.h index ebf05fc..4fa2c44 100644 --- a/include/cglm/simd/wasm/affine.h +++ b/include/cglm/simd/wasm/affine.h @@ -92,7 +92,16 @@ glm_inv_tr_wasm(mat4 mat) { r2 = glmm_load(mat[2]); r3 = glmm_load(mat[3]); x1 = wasm_f32x4_const(0.0f, 0.0f, 0.0f, 1.0f); - _MM_TRANSPOSE4_PS(r0, r1, r2, x1); + + // _MM_TRANSPOSE4_PS(r0, r1, r2, x1); + x2 = wasm_i32x4_shuffle(r0, r1, 0, 4, 1, 5); + x3 = wasm_i32x4_shuffle(r0, r1, 2, 6, 3, 7); + x4 = wasm_i32x4_shuffle(r2, x1, 0, 4, 1, 5); + x5 = wasm_i32x4_shuffle(r2, x1, 2, 6, 3, 7); + r0 = _mm_movelh_ps(x2, x4); + r1 = _mm_movehl_ps(x4, x2); + r2 = _mm_movelh_ps(x3, x5); + x1 = _mm_movehl_ps(x5, x3); x2 = glmm_shuff1(r3, 0, 0, 0, 0); x3 = glmm_shuff1(r3, 1, 1, 1, 1); diff --git a/include/cglm/simd/wasm/mat4.h b/include/cglm/simd/wasm/mat4.h index ea1e90e..b90daee 100644 --- a/include/cglm/simd/wasm/mat4.h +++ b/include/cglm/simd/wasm/mat4.h @@ -29,14 +29,22 @@ glm_mat4_scale_wasm(mat4 m, float s) { CGLM_INLINE void glm_mat4_transp_wasm(mat4 m, mat4 dest) { - glmm_128 r0, r1, r2, r3; + glmm_128 r0, r1, r2, r3, tmp0, tmp1, tmp2, tmp3; r0 = glmm_load(m[0]); r1 = glmm_load(m[1]); r2 = glmm_load(m[2]); r3 = glmm_load(m[3]); - _MM_TRANSPOSE4_PS(r0, r1, r2, r3); + // _MM_TRANSPOSE4_PS(r0, r1, r2, r3); + tmp0 = wasm_i32x4_shuffle(r0, r1, 0, 4, 1, 5); + tmp1 = wasm_i32x4_shuffle(r0, r1, 2, 6, 3, 7); + tmp2 = wasm_i32x4_shuffle(r2, r3, 0, 4, 1, 5); + tmp3 = wasm_i32x4_shuffle(r2, r3, 2, 6, 3, 7); + r0 = _mm_movelh_ps(tmp0, tmp2); + r1 = _mm_movehl_ps(tmp2, tmp0); + r2 = _mm_movelh_ps(tmp1, tmp3); + r3 = _mm_movehl_ps(tmp3, tmp1); glmm_store(dest[0], r0); glmm_store(dest[1], r1);