diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index 1153694..4c8a3b4 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -13,6 +13,14 @@ #define glmm_load(p) vld1q_f32(p) #define glmm_store(p, a) vst1q_f32(p, a) +#define glmm_set1(x) vdupq_n_f32(x) +#define glmm_128 float32x4_t + +#define glmm_splat_x(x) vdupq_lane_f32(vget_low_f32(x), 0) +#define glmm_splat_y(x) vdupq_lane_f32(vget_low_f32(x), 1) +#define glmm_splat_z(x) vdupq_lane_f32(vget_high_f32(x), 0) +#define glmm_splat_w(x) vdupq_lane_f32(vget_high_f32(x), 1) + static inline float32x4_t glmm_abs(float32x4_t v) { diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 43cb413..df32491 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -18,6 +18,9 @@ # define glmm_store(p, a) _mm_store_ps(p, a) #endif +#define glmm_set1(x) _mm_set1_ps(x) +#define glmm_128 __m128 + #ifdef CGLM_USE_INT_DOMAIN # define glmm_shuff1(xmm, z, y, x, w) \ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ @@ -29,6 +32,11 @@ #define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane) +#define glmm_splat_x(x) glmm_splat(x, 0) +#define glmm_splat_y(x) glmm_splat(x, 1) +#define glmm_splat_z(x) glmm_splat(x, 2) +#define glmm_splat_w(x) glmm_splat(x, 3) + /* glmm_shuff1x() is DEPRECATED!, use glmm_splat() */ #define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x)