From 32a477ef07141ef9cd1055615960a11c449b0716 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sun, 31 Mar 2024 23:35:15 +0300 Subject: [PATCH] separate SSE and SSE2 --- include/cglm/simd/intrin.h | 14 ++++++++++++-- include/cglm/simd/x86.h | 23 +++++++++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h index 137df65..11c46e5 100644 --- a/include/cglm/simd/intrin.h +++ b/include/cglm/simd/intrin.h @@ -10,6 +10,9 @@ #if defined( _MSC_VER ) # if (defined(_M_AMD64) || defined(_M_X64)) || _M_IX86_FP == 2 +# ifndef __SSE__ +# define __SSE__ +# endif # ifndef __SSE2__ # define __SSE2__ # endif @@ -24,15 +27,22 @@ # endif #endif -#if defined( __SSE__ ) || defined( __SSE2__ ) +#if defined(__SSE__) # include -# include # define CGLM_SSE_FP 1 # ifndef CGLM_SIMD_x86 # define CGLM_SIMD_x86 # endif #endif +#if defined(__SSE2__) +# include +# define CGLM_SSE2_FP 1 +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif +#endif + #if defined(__SSE3__) # include # ifndef CGLM_SIMD_x86 diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 8fd5a72..eb227e7 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -21,7 +21,7 @@ #define glmm_set1(x) _mm_set1_ps(x) #define glmm_128 __m128 -#ifdef CGLM_USE_INT_DOMAIN +#if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__) # define glmm_shuff1(xmm, z, y, x, w) \ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ _MM_SHUFFLE(z, y, x, w))) @@ -56,16 +56,27 @@ /* Note that `0x80000000` corresponds to `INT_MIN` for a 32-bit int. */ #define GLMM_NEGZEROf ((int)0x80000000) /* 0x80000000 ---> -0.0f */ +#define GLMM_POSZEROf ((int)0x00000000) /* 0x00000000 ---> +0.0f */ -#define GLMM__SIGNMASKf(X, Y, Z, W) \ +#if defined(__SSE2__) +# define GLMM__SIGNMASKf(X, Y, Z, W) \ _mm_castsi128_ps(_mm_set_epi32(X, Y, Z, W)) /* _mm_set_ps(X, Y, Z, W); */ +#else +# define GLMM__SIGNMASKf(X, Y, Z, W) _mm_set_ps(X, Y, Z, W) +#endif -#define glmm_float32x4_SIGNMASK_PNPN GLMM__SIGNMASKf(0, GLMM_NEGZEROf, 0, GLMM_NEGZEROf) -#define glmm_float32x4_SIGNMASK_NPNP GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, GLMM_NEGZEROf, 0) -#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, 0, GLMM_NEGZEROf) +#define glmm_float32x4_SIGNMASK_PNPN GLMM__SIGNMASKf(GLMM_POSZEROf, GLMM_NEGZEROf, GLMM_POSZEROf, GLMM_NEGZEROf) +#define glmm_float32x4_SIGNMASK_NPNP GLMM__SIGNMASKf(GLMM_NEGZEROf, GLMM_POSZEROf, GLMM_NEGZEROf, GLMM_POSZEROf) +#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, GLMM_POSZEROf, GLMM_POSZEROf, GLMM_NEGZEROf) + +/* fasth math prevents -0.0f to work */ +#if defined(__SSE2__) +# define glmm_float32x4_SIGNMASK_NEG _mm_castsi128_ps(_mm_set1_epi32(GLMM_NEGZEROf)) /* _mm_set1_ps(-0.0f) */ +#else +# define glmm_float32x4_SIGNMASK_NEG _mm_set1_ps(GLMM_NEGZEROf) +#endif -#define glmm_float32x4_SIGNMASK_NEG _mm_castsi128_ps(_mm_set1_epi32(GLMM_NEGZEROf)) /* _mm_set1_ps(-0.0f) */ #define glmm_float32x8_SIGNMASK_NEG _mm256_castsi256_ps(_mm256_set1_epi32(GLMM_NEGZEROf)) static inline