diff --git a/include/cglm/quat.h b/include/cglm/quat.h index eeee5a9..5ce27ff 100644 --- a/include/cglm/quat.h +++ b/include/cglm/quat.h @@ -262,7 +262,7 @@ glm_quat_normalize_to(versor q, versor dest) { x0 = glmm_load(q); xdot = glmm_vdot(x0, x0); - // dot = _mm_cvtss_f32(xdot); + /* dot = _mm_cvtss_f32(xdot); */ dot = wasm_f32x4_extract_lane(xdot, 0); if (dot <= 0.0f) { diff --git a/include/cglm/simd/wasm.h b/include/cglm/simd/wasm.h index 78f4605..133f8ac 100644 --- a/include/cglm/simd/wasm.h +++ b/include/cglm/simd/wasm.h @@ -41,7 +41,7 @@ glmm_vhadds(glmm_128 v) { glmm_128 shuf, sums; shuf = glmm_shuff1(v, 2, 3, 0, 1); sums = wasm_f32x4_add(v, shuf); - // shuf = _mm_movehl_ps(shuf, sums); + /* shuf = _mm_movehl_ps(shuf, sums); */ shuf = wasm_i32x4_shuffle(shuf, sums, 6, 7, 2, 3); sums = wasm_i32x4_shuffle(sums, wasm_f32x4_add(sums, shuf), 4, 1, 2, 3); return sums; @@ -76,7 +76,7 @@ glmm_vhmax(glmm_128 v) { x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */ x1 = wasm_f32x4_pmax(x0, v); /* [0|2, 1|3, 2|2, 3|3] */ x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */ - // _mm_max_ss + /* _mm_max_ss */ return wasm_i32x4_shuffle(x1, wasm_f32x4_pmax(x1, x2), 4, 1, 2, 3); } diff --git a/include/cglm/simd/wasm/affine.h b/include/cglm/simd/wasm/affine.h index 518cf64..59b36ac 100644 --- a/include/cglm/simd/wasm/affine.h +++ b/include/cglm/simd/wasm/affine.h @@ -93,18 +93,18 @@ glm_inv_tr_wasm(mat4 mat) { r3 = glmm_load(mat[3]); x1 = wasm_f32x4_const(0.0f, 0.0f, 0.0f, 1.0f); - // _MM_TRANSPOSE4_PS(r0, r1, r2, x1); + /* _MM_TRANSPOSE4_PS(r0, r1, r2, x1); */ x2 = wasm_i32x4_shuffle(r0, r1, 0, 4, 1, 5); x3 = wasm_i32x4_shuffle(r0, r1, 2, 6, 3, 7); x4 = wasm_i32x4_shuffle(r2, x1, 0, 4, 1, 5); x5 = wasm_i32x4_shuffle(r2, x1, 2, 6, 3, 7); - // r0 = _mm_movelh_ps(x2, x4); + /* r0 = _mm_movelh_ps(x2, x4); */ r0 = wasm_i32x4_shuffle(x2, x4, 0, 1, 4, 5); - // r1 = _mm_movehl_ps(x4, x2); + /* r1 = _mm_movehl_ps(x4, x2); */ r1 = wasm_i32x4_shuffle(x4, x2, 6, 7, 2, 3); - // r2 = _mm_movelh_ps(x3, x5); + /* r2 = _mm_movelh_ps(x3, x5); */ r2 = wasm_i32x4_shuffle(x3, x5, 0, 1, 4, 5); - // x1 = _mm_movehl_ps(x5, x3); + /* x1 = _mm_movehl_ps(x5, x3); */ x1 = wasm_i32x4_shuffle(x5, x3, 6, 7, 2, 3); x2 = glmm_shuff1(r3, 0, 0, 0, 0); diff --git a/include/cglm/simd/wasm/mat2.h b/include/cglm/simd/wasm/mat2.h index 9caefd1..80ce0fb 100644 --- a/include/cglm/simd/wasm/mat2.h +++ b/include/cglm/simd/wasm/mat2.h @@ -22,9 +22,9 @@ glm_mat2_mul_wasm(mat2 m1, mat2 m2, mat2 dest) { x3 = glmm_shuff1(x2, 2, 2, 0, 0); x4 = glmm_shuff1(x2, 3, 3, 1, 1); - // x0 = _mm_movelh_ps(x1, x1); + /* x0 = _mm_movelh_ps(x1, x1); */ x0 = wasm_i32x4_shuffle(x1, x1, 0, 1, 4, 5); - // x2 = _mm_movehl_ps(x1, x1); + /* x2 = _mm_movehl_ps(x1, x1); */ x2 = wasm_i32x4_shuffle(x1, x1, 6, 7, 2, 3); /* diff --git a/include/cglm/simd/wasm/mat3.h b/include/cglm/simd/wasm/mat3.h index 835f5a3..dfe192d 100644 --- a/include/cglm/simd/wasm/mat3.h +++ b/include/cglm/simd/wasm/mat3.h @@ -42,22 +42,22 @@ glm_mat3_mul_wasm(mat3 m1, mat3 m2, mat3 dest) { x6 = glmm_shuff1(x3, 2, 0, 0, 0); /* b11 b01 b01 b01 */ x2 = glmm_shuff1(r1, 3, 3, 0, 0); /* b21 b21 b11 b11 */ - // x8 = _mm_unpackhi_ps(x8, x4); - // x9 = _mm_unpackhi_ps(x7, x2); + /* x8 = _mm_unpackhi_ps(x8, x4); */ + /* x9 = _mm_unpackhi_ps(x7, x2); */ x8 = wasm_i32x4_shuffle(x8, x4, 2, 6, 3, 7); /* a10 a00 a12 a02 */ x9 = wasm_i32x4_shuffle(x7, x2, 2, 6, 3, 7); /* b21 b20 b21 b20 */ x0 = glmm_fmadd(x4, x6, x0); x1 = glmm_fmadd(x5, x2, x1); - // x2 = _mm_movehl_ps(l2, l1); + /* x2 = _mm_movehl_ps(l2, l1); */ x2 = wasm_i32x4_shuffle(l2, l1, 6, 7, 2, 3); /* a22 a22 a21 a20 */ x3 = glmm_shuff1(x2, 0, 2, 1, 0); /* a20 a22 a21 a20 */ x2 = glmm_shuff1(x2, 1, 0, 2, 1); /* a21 a20 a22 a21 */ x4 = wasm_i32x4_shuffle(r0, r1, 2, 2, 5, 5); /* b12 b12 b02 b02 */ x5 = glmm_shuff1(x4, 3, 0, 0, 0); /* b12 b02 b02 b02 */ - // x4 = _mm_movehl_ps(r2, x4); + /* x4 = _mm_movehl_ps(r2, x4); */ x4 = wasm_i32x4_shuffle(r2, x4, 6, 7, 2, 3); /* b22 b22 b12 b12 */ x0 = glmm_fmadd(x3, x5, x0); x1 = glmm_fmadd(x2, x4, x1); @@ -67,17 +67,17 @@ glm_mat3_mul_wasm(mat3 m1, mat3 m2, mat3 dest) { a12 * b21 + a22 * b22 + 0 * 00 */ - // x2 = _mm_movelh_ps(x8, l2); - // x3 = _mm_movelh_ps(x9, r2); + /* x2 = _mm_movelh_ps(x8, l2); */ + /* x3 = _mm_movelh_ps(x9, r2); */ x2 = wasm_i32x4_shuffle(x8, l2, 0, 1, 4, 5); /* 0.f a22 a12 a02 */ x3 = wasm_i32x4_shuffle(x9, r2, 0, 1, 4, 5); /* 0.f b22 b21 b20 */ x2 = glmm_vdots(x2, x3); - // _mm_storeu_ps(&dest[0][0], x0); + /* _mm_storeu_ps(&dest[0][0], x0); */ wasm_v128_store(&dest[0][0], x0); - // _mm_storeu_ps(&dest[1][1], x1); + /* _mm_storeu_ps(&dest[1][1], x1); */ wasm_v128_store(&dest[1][1], x1); - // _mm_store_ss (&dest[2][2], x2); + /* _mm_store_ss (&dest[2][2], x2); */ wasm_v128_store32_lane(&dest[2][2], x2, 0); } diff --git a/include/cglm/simd/wasm/mat4.h b/include/cglm/simd/wasm/mat4.h index f86a4ff..3711843 100644 --- a/include/cglm/simd/wasm/mat4.h +++ b/include/cglm/simd/wasm/mat4.h @@ -36,18 +36,18 @@ glm_mat4_transp_wasm(mat4 m, mat4 dest) { r2 = glmm_load(m[2]); r3 = glmm_load(m[3]); - // _MM_TRANSPOSE4_PS(r0, r1, r2, r3); + /* _MM_TRANSPOSE4_PS(r0, r1, r2, r3); */ tmp0 = wasm_i32x4_shuffle(r0, r1, 0, 4, 1, 5); tmp1 = wasm_i32x4_shuffle(r0, r1, 2, 6, 3, 7); tmp2 = wasm_i32x4_shuffle(r2, r3, 0, 4, 1, 5); tmp3 = wasm_i32x4_shuffle(r2, r3, 2, 6, 3, 7); - // r0 = _mm_movelh_ps(tmp0, tmp2); + /* r0 = _mm_movelh_ps(tmp0, tmp2); */ r0 = wasm_i32x4_shuffle(tmp0, tmp2, 0, 1, 4, 5); - // r1 = _mm_movehl_ps(tmp2, tmp0); + /* r1 = _mm_movehl_ps(tmp2, tmp0); */ r1 = wasm_i32x4_shuffle(tmp2, tmp0, 6, 7, 2, 3); - // r2 = _mm_movelh_ps(tmp1, tmp3); + /* r2 = _mm_movelh_ps(tmp1, tmp3); */ r2 = wasm_i32x4_shuffle(tmp1, tmp3, 0, 1, 4, 5); - // r3 = _mm_movehl_ps(tmp3, tmp1); + /* r3 = _mm_movehl_ps(tmp3, tmp1); */ r3 = wasm_i32x4_shuffle(tmp3, tmp1, 6, 7, 2, 3); glmm_store(dest[0], r0); @@ -186,9 +186,9 @@ glm_mat4_inv_fast_wasm(mat4 mat, mat4 dest) { r1 = glmm_load(mat[1]); /* h g f e */ r2 = glmm_load(mat[2]); /* l k j i */ r3 = glmm_load(mat[3]); /* p o n m */ - // x0 = _mm_movehl_ps(r3, r2); + /* x0 = _mm_movehl_ps(r3, r2); */ x0 = wasm_i32x4_shuffle(r3, r2, 6, 7, 2, 3); /* p o l k */ - // x3 = _mm_movelh_ps(r2, r3); + /* x3 = _mm_movelh_ps(r2, r3); */ x3 = wasm_i32x4_shuffle(r2, r3, 0, 1, 4, 5); /* n m j i */ x1 = glmm_shuff1(x0, 1, 3, 3 ,3); /* l p p p */ x2 = glmm_shuff1(x0, 0, 2, 2, 2); /* k o o o */ @@ -242,9 +242,9 @@ glm_mat4_inv_fast_wasm(mat4 mat, mat4 dest) { t2[5] = e * n - m * f; t3[5] = e * j - i * f; */ t5 = glmm_fnmadd(x7, x5, t5); - // x4 = _mm_movelh_ps(r0, r1); + /* x4 = _mm_movelh_ps(r0, r1); */ x4 = wasm_i32x4_shuffle(r0, r1, 0, 1, 4, 5); /* f e b a */ - // x5 = _mm_movehl_ps(r1, r0); + /* x5 = _mm_movehl_ps(r1, r0); */ x5 = wasm_i32x4_shuffle(r1, r0, 6, 7, 2, 3); /* h g d c */ x0 = glmm_shuff1(x4, 0, 0, 0, 2); /* a a a e */ @@ -300,7 +300,7 @@ glm_mat4_inv_fast_wasm(mat4 mat, mat4 dest) { x1 = wasm_i32x4_shuffle(v2, v3, 0, 0, 4, 4); x0 = wasm_i32x4_shuffle(x0, x1, 0, 2, 4, 6); - // x0 = _mm_rcp_ps(glmm_vhadd(wasm_f32x4_mul(x0, r0))); + /* x0 = _mm_rcp_ps(glmm_vhadd(wasm_f32x4_mul(x0, r0))); */ x0 = wasm_f32x4_div(wasm_f32x4_const_splat(1.0f), glmm_vhadd(wasm_f32x4_mul(x0, r0))); @@ -326,9 +326,9 @@ glm_mat4_inv_wasm(mat4 mat, mat4 dest) { r1 = glmm_load(mat[1]); /* h g f e */ r2 = glmm_load(mat[2]); /* l k j i */ r3 = glmm_load(mat[3]); /* p o n m */ - // x0 = _mm_movehl_ps(r3, r2); + /* x0 = _mm_movehl_ps(r3, r2); */ x0 = wasm_i32x4_shuffle(r3, r2, 6, 7, 2, 3); /* p o l k */ - // x3 = _mm_movelh_ps(r2, r3); + /* x3 = _mm_movelh_ps(r2, r3); */ x3 = wasm_i32x4_shuffle(r2, r3, 0, 1, 4, 5); /* n m j i */ x1 = glmm_shuff1(x0, 1, 3, 3 ,3); /* l p p p */ x2 = glmm_shuff1(x0, 0, 2, 2, 2); /* k o o o */ @@ -382,9 +382,9 @@ glm_mat4_inv_wasm(mat4 mat, mat4 dest) { t2[5] = e * n - m * f; t3[5] = e * j - i * f; */ t5 = glmm_fnmadd(x7, x5, t5); - // x4 = _mm_movelh_ps(r0, r1); + /* x4 = _mm_movelh_ps(r0, r1); */ x4 = wasm_i32x4_shuffle(r0, r1, 0, 1, 4, 5); /* f e b a */ - // x5 = _mm_movehl_ps(r1, r0); + /* x5 = _mm_movehl_ps(r1, r0); */ x5 = wasm_i32x4_shuffle(r1, r0, 6, 7, 2, 3); /* h g d c */ x0 = glmm_shuff1(x4, 0, 0, 0, 2); /* a a a e */ diff --git a/include/cglm/simd/wasm/quat.h b/include/cglm/simd/wasm/quat.h index f8434f1..927ea21 100644 --- a/include/cglm/simd/wasm/quat.h +++ b/include/cglm/simd/wasm/quat.h @@ -28,7 +28,7 @@ glm_quat_mul_wasm(versor p, versor q, versor dest) { xq = glmm_load(q); x1 = wasm_f32x4_const(0.f, -0.f, 0.f, -0.f); /* TODO: _mm_set1_ss() + shuff ? */ r = wasm_f32x4_mul(glmm_splat_w(xp), xq); - // x2 = _mm_unpackhi_ps(x1, x1); + /* x2 = _mm_unpackhi_ps(x1, x1); */ x2 = wasm_i32x4_shuffle(x1, x1, 2, 6, 3, 7); x3 = glmm_shuff1(x1, 3, 2, 0, 1); x = glmm_splat_x(xp); diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 73a5662..d6ee080 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -163,7 +163,7 @@ CGLM_INLINE void glm_vec4_ucopy(vec4 v, vec4 dest) { #if defined(__wasm__) && defined(__wasm_simd128__) - // note here wasm v128.load/v128.store support unaligned loads and stores + /* note here wasm v128.load/v128.store support unaligned loads and stores */ wasm_v128_store(dest, wasm_v128_load(v)); #else dest[0] = v[0]; @@ -761,7 +761,7 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) { x0 = glmm_load(v); xdot = glmm_vdot(x0, x0); - // dot = _mm_cvtss_f32(xdot); + /* dot = _mm_cvtss_f32(xdot); */ dot = wasm_f32x4_extract_lane(xdot, 0); if (dot == 0.0f) {