From 9725b60d46e8d8d0c1afbbbe2349ba10c34609e1 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 23 Apr 2021 22:12:57 +0300 Subject: [PATCH] rename glmm_shuff1x() to glmm_splat() * mark glmm_shuff1x() as DEPRECATED --- include/cglm/simd/sse2/affine.h | 45 ++++++++++++++++----------------- include/cglm/simd/sse2/mat4.h | 17 ++++++------- include/cglm/simd/sse2/quat.h | 8 +++--- include/cglm/simd/x86.h | 8 ++++-- include/cglm/vec4-ext.h | 6 ++--- 5 files changed, 43 insertions(+), 41 deletions(-) diff --git a/include/cglm/simd/sse2/affine.h b/include/cglm/simd/sse2/affine.h index 236408c..b5d64f0 100644 --- a/include/cglm/simd/sse2/affine.h +++ b/include/cglm/simd/sse2/affine.h @@ -25,29 +25,28 @@ glm_mul_sse2(mat4 m1, mat4 m2, mat4 dest) { r = glmm_load(m2[0]); glmm_store(dest[0], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - _mm_mul_ps(glmm_shuff1x(r, 2), l2)))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + _mm_mul_ps(glmm_splat(r, 2), l2)))); r = glmm_load(m2[1]); glmm_store(dest[1], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - _mm_mul_ps(glmm_shuff1x(r, 2), l2)))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + _mm_mul_ps(glmm_splat(r, 2), l2)))); r = glmm_load(m2[2]); glmm_store(dest[2], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - _mm_mul_ps(glmm_shuff1x(r, 2), l2)))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + _mm_mul_ps(glmm_splat(r, 2), l2)))); r = glmm_load(m2[3]); glmm_store(dest[3], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - glmm_fmadd(glmm_shuff1x(r, 2), l2, - _mm_mul_ps(glmm_shuff1x(r, 3), - l3))))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + glmm_fmadd(glmm_splat(r, 2), l2, + _mm_mul_ps(glmm_splat(r, 3), l3))))); } CGLM_INLINE @@ -63,22 +62,22 @@ glm_mul_rot_sse2(mat4 m1, mat4 m2, mat4 dest) { r = glmm_load(m2[0]); glmm_store(dest[0], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - _mm_mul_ps(glmm_shuff1x(r, 2), l2)))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + _mm_mul_ps(glmm_splat(r, 2), l2)))); r = glmm_load(m2[1]); glmm_store(dest[1], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - _mm_mul_ps(glmm_shuff1x(r, 2), l2)))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + _mm_mul_ps(glmm_splat(r, 2), l2)))); r = glmm_load(m2[2]); glmm_store(dest[2], - glmm_fmadd(glmm_shuff1x(r, 0), l0, - glmm_fmadd(glmm_shuff1x(r, 1), l1, - _mm_mul_ps(glmm_shuff1x(r, 2), l2)))); + glmm_fmadd(glmm_splat(r, 0), l0, + glmm_fmadd(glmm_splat(r, 1), l1, + _mm_mul_ps(glmm_splat(r, 2), l2)))); glmm_store(dest[3], l3); } diff --git a/include/cglm/simd/sse2/mat4.h b/include/cglm/simd/sse2/mat4.h index 78fac21..f5d7135 100644 --- a/include/cglm/simd/sse2/mat4.h +++ b/include/cglm/simd/sse2/mat4.h @@ -60,11 +60,10 @@ glm_mat4_mul_sse2(mat4 m1, mat4 m2, mat4 dest) { \ r = glmm_load(m2[C]); \ glmm_store(dest[C], \ - glmm_fmadd(glmm_shuff1x(r, 0), l0, \ - glmm_fmadd(glmm_shuff1x(r, 1), l1, \ - glmm_fmadd(glmm_shuff1x(r, 2), l2, \ - _mm_mul_ps(glmm_shuff1x(r, 3), \ - l3))))); + glmm_fmadd(glmm_splat(r, 0), l0, \ + glmm_fmadd(glmm_splat(r, 1), l1, \ + glmm_fmadd(glmm_splat(r, 2), l2, \ + _mm_mul_ps(glmm_splat(r, 3), l3))))); XX(0); XX(1); @@ -80,11 +79,11 @@ glm_mat4_mulv_sse2(mat4 m, vec4 v, vec4 dest) { __m128 x0, x1; x0 = glmm_load(v); - x1 = glmm_fmadd(glmm_load(m[0]), glmm_shuff1x(x0, 0), - glmm_fmadd(glmm_load(m[1]), glmm_shuff1x(x0, 1), - glmm_fmadd(glmm_load(m[2]), glmm_shuff1x(x0, 2), + x1 = glmm_fmadd(glmm_load(m[0]), glmm_splat(x0, 0), + glmm_fmadd(glmm_load(m[1]), glmm_splat(x0, 1), + glmm_fmadd(glmm_load(m[2]), glmm_splat(x0, 2), _mm_mul_ps(glmm_load(m[3]), - glmm_shuff1x(x0, 3))))); + glmm_splat(x0, 3))))); glmm_store(dest, x1); } diff --git a/include/cglm/simd/sse2/quat.h b/include/cglm/simd/sse2/quat.h index 0173f94..894d492 100644 --- a/include/cglm/simd/sse2/quat.h +++ b/include/cglm/simd/sse2/quat.h @@ -27,15 +27,15 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) { xp = glmm_load(p); /* 3 2 1 0 */ xq = glmm_load(q); - r = _mm_mul_ps(glmm_shuff1x(xp, 3), xq); + r = _mm_mul_ps(glmm_splat(xp, 3), xq); - x0 = _mm_xor_ps(glmm_shuff1x(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f)); + x0 = _mm_xor_ps(glmm_splat(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f)); r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 0, 1, 2, 3))); - x0 = _mm_xor_ps(glmm_shuff1x(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f)); + x0 = _mm_xor_ps(glmm_splat(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f)); r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 1, 0, 3, 2))); - x0 = _mm_xor_ps(glmm_shuff1x(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f)); + x0 = _mm_xor_ps(glmm_splat(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f)); r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 2, 3, 0, 1))); glmm_store(dest, r); diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 5848b7b..43cb413 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -27,7 +27,11 @@ _mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w)) #endif +#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane) + +/* glmm_shuff1x() is DEPRECATED!, use glmm_splat() */ #define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x) + #define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \ z1, y1, x1, w1) @@ -89,7 +93,7 @@ glmm_vhmin(__m128 v) { __m128 x0, x1, x2; x0 = _mm_movehl_ps(v, v); /* [2, 3, 2, 3] */ x1 = _mm_min_ps(x0, v); /* [0|2, 1|3, 2|2, 3|3] */ - x2 = glmm_shuff1x(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */ + x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */ return _mm_min_ss(x1, x2); } @@ -105,7 +109,7 @@ glmm_vhmax(__m128 v) { __m128 x0, x1, x2; x0 = _mm_movehl_ps(v, v); /* [2, 3, 2, 3] */ x1 = _mm_max_ps(x0, v); /* [0|2, 1|3, 2|2, 3|3] */ - x2 = glmm_shuff1x(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */ + x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */ return _mm_max_ss(x1, x2); } diff --git a/include/cglm/vec4-ext.h b/include/cglm/vec4-ext.h index dfc6a4e..e4e20cb 100644 --- a/include/cglm/vec4-ext.h +++ b/include/cglm/vec4-ext.h @@ -224,10 +224,10 @@ glm_vec4_sign(vec4 v, vec4 dest) { x0 = glmm_load(v); x1 = _mm_set_ps(0.0f, 0.0f, 1.0f, -1.0f); - x2 = glmm_shuff1x(x1, 2); + x2 = glmm_splat(x1, 2); - x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), glmm_shuff1x(x1, 1)); - x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), glmm_shuff1x(x1, 0)); + x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), glmm_splat(x1, 1)); + x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), glmm_splat(x1, 0)); glmm_store(dest, _mm_or_ps(x3, x4)); #else