swizzling functions for NEON

This commit is contained in:
Recep Aslantas
2021-04-24 00:00:00 +03:00
parent f303984aad
commit 65292a94a6
2 changed files with 45 additions and 0 deletions

View File

@@ -21,6 +21,46 @@
#define glmm_splat_z(x) vdupq_lane_f32(vget_high_f32(x), 0)
#define glmm_splat_w(x) vdupq_lane_f32(vget_high_f32(x), 1)
#define SWIZZLE(NAME) \
static inline float32x4_t NAME(glmm_128 v)
/* TODO:
* I'm not sure if glmm_xxxx(), glmm_yyyy()... is better than glmm_0000()...
*/
SWIZZLE(glmm_0000) { return vdupq_lane_f32(vget_low_f32(v), 0); }
SWIZZLE(glmm_1111) { return vdupq_lane_f32(vget_low_f32(v), 1); }
SWIZZLE(glmm_2222) { return vdupq_lane_f32(vget_high_f32(v), 0); }
SWIZZLE(glmm_3333) { return vdupq_lane_f32(vget_high_f32(v), 1); }
SWIZZLE(glmm_1032) { return vrev64q_f32(v); }
SWIZZLE(glmm_0101) { float32x2_t vt = vget_low_f32(v); return vcombine_f32(vt, vt); }
SWIZZLE(glmm_2323) { float32x2_t vt = vget_high_f32(v); return vcombine_f32(vt, vt); }
SWIZZLE(glmm_1010) { float32x2_t vt = vrev64_f32(vget_low_f32(v)); return vcombine_f32(vt, vt); }
SWIZZLE(glmm_3232) { float32x2_t vt = vrev64_f32(vget_high_f32(v)); return vcombine_f32(vt, vt); }
SWIZZLE(glmm_0132) { return vcombine_f32(vget_low_f32(v), vrev64_f32(vget_high_f32(v))); }
SWIZZLE(glmm_1023) { return vcombine_f32(vrev64_f32(vget_low_f32(v)), vget_high_f32(v)); }
SWIZZLE(glmm_2310) { return vcombine_f32(vget_high_f32(v), vrev64_f32(vget_low_f32(v))); }
SWIZZLE(glmm_3201) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vget_low_f32(v)); }
SWIZZLE(glmm_3210) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vrev64_f32(vget_low_f32(v))); }
SWIZZLE(glmm_0022) { return vtrnq_f32(v, v).val[0]; }
SWIZZLE(glmm_1133) { return vtrnq_f32(v, v).val[1]; }
SWIZZLE(glmm_0011) { return vzipq_f32(v, v).val[0]; }
SWIZZLE(glmm_2233) { return vzipq_f32(v, v).val[1]; }
SWIZZLE(glmm_0202) { return vuzpq_f32(v, v).val[0]; }
SWIZZLE(glmm_1313) { return vuzpq_f32(v, v).val[1]; }
SWIZZLE(glmm_1230) { return vextq_f32(v, v, 1); }
SWIZZLE(glmm_2301) { return vextq_f32(v, v, 2); }
SWIZZLE(glmm_3012) { return vextq_f32(v, v, 3); }
#undef SWIZZLE
static inline
float32x4_t
glmm_abs(float32x4_t v) {