From 1fb941a41b2a6e0627633185132afd590eacf6c9 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sun, 25 Apr 2021 02:35:55 +0300 Subject: [PATCH] drop swizzling helpers fro now for simplicity --- CREDITS | 5 ----- include/cglm/simd/arm.h | 44 ----------------------------------------- 2 files changed, 49 deletions(-) diff --git a/CREDITS b/CREDITS index 96341fc..c388348 100644 --- a/CREDITS +++ b/CREDITS @@ -75,11 +75,6 @@ Link to paper: http://webserver2.tecgraf.puc-rio.br/~mgattass/cg/trbRR/Fast%20Mi 14. ARM NEON: Matrix Vector Multiplication https://stackoverflow.com/a/57793352/2676533 -15. ARM NEON Vector Swizzling and Permute - -https://stackoverflow.com/questions/32536265/how-to-convert-mm-shuffle-ps-sse-intrinsic-to-neon-intrinsic -http://github.com/microsoft/DirectXMath - 16. ARM NEON Div http://github.com/microsoft/DirectXMath diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index cd4f2b8..a7009e5 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -25,50 +25,6 @@ #define glmm_splat_z(x) vdupq_lane_f32(vget_high_f32(x), 0) #define glmm_splat_w(x) vdupq_lane_f32(vget_high_f32(x), 1) -#define SWIZZLE(NAME) \ - static inline float32x4_t NAME(glmm_128 v) - -/* TODO: - * I'm not sure if glmm_xxxx(), glmm_yyyy()... is better than glmm_0000()... - */ - -/* Memory layout Register layout (glmm) - 0 1 2 3 -> 3 2 1 0 - */ - -SWIZZLE(glmm_0000) { return vdupq_lane_f32(vget_low_f32(v), 0); } -SWIZZLE(glmm_1111) { return vdupq_lane_f32(vget_low_f32(v), 1); } -SWIZZLE(glmm_2222) { return vdupq_lane_f32(vget_high_f32(v), 0); } -SWIZZLE(glmm_3333) { return vdupq_lane_f32(vget_high_f32(v), 1); } - -SWIZZLE(glmm_2301) { return vrev64q_f32(v); } - -SWIZZLE(glmm_1010) { float32x2_t vt = vget_low_f32(v); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_3232) { float32x2_t vt = vget_high_f32(v); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_0101) { float32x2_t vt = vrev64_f32(vget_low_f32(v)); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_2323) { float32x2_t vt = vrev64_f32(vget_high_f32(v)); return vcombine_f32(vt, vt); } - -SWIZZLE(glmm_2310) { return vcombine_f32(vget_low_f32(v), vrev64_f32(vget_high_f32(v))); } -SWIZZLE(glmm_3201) { return vcombine_f32(vrev64_f32(vget_low_f32(v)), vget_high_f32(v)); } -SWIZZLE(glmm_0132) { return vcombine_f32(vget_high_f32(v), vrev64_f32(vget_low_f32(v))); } -SWIZZLE(glmm_1023) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vget_low_f32(v)); } -SWIZZLE(glmm_0123) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vrev64_f32(vget_low_f32(v))); } - -SWIZZLE(glmm_2200) { return vtrnq_f32(v, v).val[0]; } -SWIZZLE(glmm_3311) { return vtrnq_f32(v, v).val[1]; } - -SWIZZLE(glmm_1100) { return vzipq_f32(v, v).val[0]; } -SWIZZLE(glmm_3322) { return vzipq_f32(v, v).val[1]; } - -SWIZZLE(glmm_2020) { return vuzpq_f32(v, v).val[0]; } -SWIZZLE(glmm_3131) { return vuzpq_f32(v, v).val[1]; } - -SWIZZLE(glmm_0321) { return vextq_f32(v, v, 1); } -SWIZZLE(glmm_1032) { return vextq_f32(v, v, 2); } -SWIZZLE(glmm_2103) { return vextq_f32(v, v, 3); } - -#undef SWIZZLE - #define glmm_xor(a, b) \ vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(a), \ vreinterpretq_s32_f32(b)))