mirror of
https://github.com/recp/cglm.git
synced 2025-12-25 04:44:58 +00:00
simd: optimize vec4_distance with sse and neon
This commit is contained in:
@@ -707,10 +707,25 @@ glm_vec4_normalize(vec4 v) {
|
||||
CGLM_INLINE
|
||||
float
|
||||
glm_vec4_distance(vec4 a, vec4 b) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
__m128 x0;
|
||||
x0 = _mm_sub_ps(glmm_load(b), glmm_load(a));
|
||||
x0 = _mm_mul_ps(x0, x0);
|
||||
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
|
||||
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(x0,
|
||||
glmm_shuff1(x0, 0, 1, 0, 1))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
float32x4_t v0;
|
||||
float32_t r;
|
||||
v0 = vsubq_f32(vld1q_f32(a), vld1q_f32(b));
|
||||
r = vaddvq_f32(vmulq_f32(v0, v0));
|
||||
return sqrtf(r);
|
||||
#else
|
||||
return sqrtf(glm_pow2(b[0] - a[0])
|
||||
+ glm_pow2(b[1] - a[1])
|
||||
+ glm_pow2(b[2] - a[2])
|
||||
+ glm_pow2(b[3] - a[3]));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
|
||||
Reference in New Issue
Block a user