mirror of
https://github.com/recp/cglm.git
synced 2026-01-05 06:40:06 +00:00
simd128: handle both sse2 and simd128 enabled by Emscripten
https://github.com/recp/cglm/pull/286#issuecomment-1492985403
This commit is contained in:
@@ -137,9 +137,9 @@ glm_vec4_copy3(vec4 a, vec3 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_copy(vec4 v, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, glmm_load(v));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, glmm_load(v));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vld1q_f32(v));
|
||||
@@ -181,10 +181,10 @@ glm_vec4_ucopy(vec4 v, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_zero(vec4 v) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, _mm_setzero_ps());
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(v, wasm_f32x4_const_splat(0.f));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, _mm_setzero_ps());
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(v, vdupq_n_f32(0.0f));
|
||||
#else
|
||||
@@ -203,10 +203,10 @@ glm_vec4_zero(vec4 v) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_one(vec4 v) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, _mm_set1_ps(1.0f));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(v, wasm_f32x4_const_splat(1.0f));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, _mm_set1_ps(1.0f));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(v, vdupq_n_f32(1.0f));
|
||||
#else
|
||||
@@ -331,10 +331,10 @@ glm_vec4_norm_inf(vec4 v) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||
#else
|
||||
@@ -355,10 +355,10 @@ glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_adds(vec4 v, float s, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(glmm_load(v), wasm_f32x4_splat(s)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
||||
#else
|
||||
@@ -379,10 +379,10 @@ glm_vec4_adds(vec4 v, float s, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||
#else
|
||||
@@ -403,10 +403,10 @@ glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_subs(vec4 v, float s, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_sub(glmm_load(v), wasm_f32x4_splat(s)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
||||
#else
|
||||
@@ -427,10 +427,10 @@ glm_vec4_subs(vec4 v, float s, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_mul(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||
#else
|
||||
@@ -451,10 +451,10 @@ glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_scale(vec4 v, float s, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_mul(glmm_load(v), wasm_f32x4_splat(s)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
||||
#else
|
||||
@@ -516,10 +516,10 @@ glm_vec4_div(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_divs(vec4 v, float s, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_div(glmm_load(v), wasm_f32x4_splat(s)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||
#else
|
||||
glm_vec4_scale(v, 1.0f / s, dest);
|
||||
#endif
|
||||
@@ -537,14 +537,14 @@ glm_vec4_divs(vec4 v, float s, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_add_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_add(glmm_load(a), glmm_load(b))));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_add_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vaddq_f32(vld1q_f32(a),
|
||||
@@ -569,14 +569,14 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_sub_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_sub(glmm_load(a), glmm_load(b))));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_sub_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vsubq_f32(vld1q_f32(a),
|
||||
@@ -645,14 +645,14 @@ glm_vec4_muladds(vec4 a, float s, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_max_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_max(glmm_load(a), glmm_load(b))));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_max_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vmaxq_f32(vld1q_f32(a),
|
||||
@@ -677,14 +677,14 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_min_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_min(glmm_load(a), glmm_load(b))));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||
_mm_min_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vminq_f32(vld1q_f32(a),
|
||||
@@ -706,11 +706,11 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_negate_to(vec4 v, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_v128_xor(glmm_load(v),
|
||||
wasm_f32x4_const_splat(-0.0f)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vnegq_f32(vld1q_f32(v)));
|
||||
#else
|
||||
@@ -741,21 +741,7 @@ glm_vec4_negate(vec4 v) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_normalize_to(vec4 v, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
__m128 xdot, x0;
|
||||
float dot;
|
||||
|
||||
x0 = glmm_load(v);
|
||||
xdot = glmm_vdot(x0, x0);
|
||||
dot = _mm_cvtss_f32(xdot);
|
||||
|
||||
if (dot == 0.0f) {
|
||||
glmm_store(dest, _mm_setzero_ps());
|
||||
return;
|
||||
}
|
||||
|
||||
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_128 xdot, x0;
|
||||
float dot;
|
||||
|
||||
@@ -770,6 +756,20 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) {
|
||||
}
|
||||
|
||||
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
__m128 xdot, x0;
|
||||
float dot;
|
||||
|
||||
x0 = glmm_load(v);
|
||||
xdot = glmm_vdot(x0, x0);
|
||||
dot = _mm_cvtss_f32(xdot);
|
||||
|
||||
if (dot == 0.0f) {
|
||||
glmm_store(dest, _mm_setzero_ps());
|
||||
return;
|
||||
}
|
||||
|
||||
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
|
||||
#else
|
||||
float norm;
|
||||
|
||||
@@ -805,10 +805,10 @@ glm_vec4_normalize(vec4 v) {
|
||||
CGLM_INLINE
|
||||
float
|
||||
glm_vec4_distance(vec4 a, vec4 b) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
return glmm_norm(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
return glmm_norm(wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
return glmm_norm(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
return glmm_norm(vsubq_f32(glmm_load(a), glmm_load(b)));
|
||||
#else
|
||||
@@ -829,10 +829,10 @@ glm_vec4_distance(vec4 a, vec4 b) {
|
||||
CGLM_INLINE
|
||||
float
|
||||
glm_vec4_distance2(vec4 a, vec4 b) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
return glmm_norm2(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
return glmm_norm2(wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
return glmm_norm2(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
return glmm_norm2(vsubq_f32(glmm_load(a), glmm_load(b)));
|
||||
#else
|
||||
@@ -853,10 +853,10 @@ glm_vec4_distance2(vec4 a, vec4 b) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_max(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||
#else
|
||||
@@ -877,10 +877,10 @@ glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_min(glmm_load(a), glmm_load(b)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||
#else
|
||||
@@ -901,13 +901,13 @@ glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
|
||||
_mm_set1_ps(maxVal)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(v, wasm_f32x4_min(
|
||||
wasm_f32x4_max(glmm_load(v), wasm_f32x4_splat(minVal)),
|
||||
wasm_f32x4_splat(maxVal)));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
|
||||
_mm_set1_ps(maxVal)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
|
||||
vdupq_n_f32(maxVal)));
|
||||
|
||||
Reference in New Issue
Block a user