simd128: enable in mat3

This commit is contained in:
myfreeer
2023-03-06 16:50:37 +08:00
parent a0dd85f3d1
commit fdef58bd1a
3 changed files with 24 additions and 3 deletions

View File

@@ -42,6 +42,10 @@
# include "simd/sse2/mat3.h"
#endif
#ifdef CGLM_SIMD_WASM
# include "simd/wasm/mat3.h"
#endif
#define GLM_MAT3_IDENTITY_INIT {{1.0f, 0.0f, 0.0f}, \
{0.0f, 1.0f, 0.0f}, \
{0.0f, 0.0f, 1.0f}}
@@ -150,6 +154,8 @@ void
glm_mat3_mul(mat3 m1, mat3 m2, mat3 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat3_mul_sse2(m1, m2, dest);
#elif defined(__wasm__) && defined(__wasm_simd128__)
glm_mat3_mul_wasm(m1, m2, dest);
#else
float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2],
a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2],

View File

@@ -92,7 +92,22 @@ _mm_sqrt_ss(glmm_128 __a)
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
_mm_rcp_ps(glmm_128 __a)
{
return (glmm_128)wasm_f32x4_div((v128_t)wasm_f32x4_splat(1.0f), (v128_t)__a);
return (glmm_128)wasm_f32x4_div((glmm_128)wasm_f32x4_splat(1.0f), (glmm_128)__a);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storeu_ps(float *__p, glmm_128 __a)
{
struct __unaligned {
glmm_128 __v;
} __attribute__((__packed__, __may_alias__));
((struct __unaligned *)__p)->__v = __a;
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_store_ss(float *__p, glmm_128 __a)
{
wasm_v128_store32_lane((void*)__p, (glmm_128)__a, 0);
}
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \

View File

@@ -27,13 +27,13 @@ glm_mat3_mul_wasm(mat3 m1, mat3 m2, mat3 dest) {
x1 = glmm_shuff1(r0, 3, 0, 0, 0); /* b10 b00 b00 b00 */
x2 = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(1, 0, 3, 3)); /* a12 a11 a10 a10 */
x3 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 0, 3, 1)); /* b20 b11 b10 b01 */
x0 = _mm_mul_ps(x8, x1);
x0 = wasm_f32x4_mul(x8, x1);
x6 = glmm_shuff1(l0, 1, 0, 2, 1); /* a01 a00 a02 a01 */
x7 = glmm_shuff1(x3, 3, 3, 1, 1); /* b20 b20 b10 b10 */
l2 = wasm_v128_load32_zero(&m1[2][2]);
r2 = wasm_v128_load32_zero(&m2[2][2]);
x1 = _mm_mul_ps(x6, x7);
x1 = wasm_f32x4_mul(x6, x7);
l2 = glmm_shuff1(l2, 0, 0, 1, 0); /* a22 a22 0.f a22 */
r2 = glmm_shuff1(r2, 0, 0, 1, 0); /* b22 b22 0.f b22 */