mirror of
https://github.com/recp/cglm.git
synced 2025-10-04 01:00:46 +00:00
simd128: inline _mm_cvtss_f32
This commit is contained in:
@@ -262,7 +262,8 @@ glm_quat_normalize_to(versor q, versor dest) {
|
||||
|
||||
x0 = glmm_load(q);
|
||||
xdot = glmm_vdot(x0, x0);
|
||||
dot = _mm_cvtss_f32(xdot);
|
||||
// dot = _mm_cvtss_f32(xdot);
|
||||
dot = wasm_f32x4_extract_lane(xdot, 0);
|
||||
|
||||
if (dot <= 0.0f) {
|
||||
glm_quat_identity(dest);
|
||||
|
@@ -20,8 +20,6 @@
|
||||
#define glmm_splat_z(x) glmm_splat(x, 2)
|
||||
#define glmm_splat_w(x) glmm_splat(x, 3)
|
||||
|
||||
#define _mm_cvtss_f32(v) wasm_f32x4_extract_lane(v, 0)
|
||||
|
||||
static inline
|
||||
glmm_128
|
||||
glmm_abs(glmm_128 x) {
|
||||
@@ -52,7 +50,7 @@ glmm_vhadds(glmm_128 v) {
|
||||
static inline
|
||||
float
|
||||
glmm_hadd(glmm_128 v) {
|
||||
return _mm_cvtss_f32(glmm_vhadds(v));
|
||||
return wasm_f32x4_extract_lane(glmm_vhadds(v), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
@@ -68,7 +66,7 @@ glmm_vhmin(glmm_128 v) {
|
||||
static inline
|
||||
float
|
||||
glmm_hmin(glmm_128 v) {
|
||||
return _mm_cvtss_f32(glmm_vhmin(v));
|
||||
return wasm_f32x4_extract_lane(glmm_vhmin(v), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
@@ -106,7 +104,7 @@ glmm_vdot(glmm_128 a, glmm_128 b) {
|
||||
static inline
|
||||
float
|
||||
glmm_dot(glmm_128 a, glmm_128 b) {
|
||||
return _mm_cvtss_f32(glmm_vdots(a, b));
|
||||
return wasm_f32x4_extract_lane(glmm_vdots(a, b), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
@@ -114,25 +112,26 @@ float
|
||||
glmm_norm(glmm_128 a) {
|
||||
glmm_128 x0;
|
||||
x0 = glmm_vhadds(wasm_f32x4_mul(a, a));
|
||||
return _mm_cvtss_f32(wasm_i32x4_shuffle(x0, wasm_f32x4_sqrt(x0),4, 1, 2, 3));
|
||||
return wasm_f32x4_extract_lane(
|
||||
wasm_i32x4_shuffle(x0, wasm_f32x4_sqrt(x0),4, 1, 2, 3), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
float
|
||||
glmm_norm2(glmm_128 a) {
|
||||
return _mm_cvtss_f32(glmm_vhadds(wasm_f32x4_mul(a, a)));
|
||||
return wasm_f32x4_extract_lane(glmm_vhadds(wasm_f32x4_mul(a, a)), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
float
|
||||
glmm_norm_one(glmm_128 a) {
|
||||
return _mm_cvtss_f32(glmm_vhadds(glmm_abs(a)));
|
||||
return wasm_f32x4_extract_lane(glmm_vhadds(glmm_abs(a)), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
float
|
||||
glmm_norm_inf(glmm_128 a) {
|
||||
return _mm_cvtss_f32(glmm_vhmax(glmm_abs(a)));
|
||||
return wasm_f32x4_extract_lane(glmm_vhmax(glmm_abs(a)), 0);
|
||||
}
|
||||
|
||||
static inline
|
||||
|
@@ -542,9 +542,9 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
|
||||
_mm_add_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
|
||||
wasm_f32x4_add(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_add(glmm_load(a), glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vaddq_f32(vld1q_f32(a),
|
||||
@@ -574,9 +574,9 @@ glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
|
||||
_mm_sub_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
|
||||
wasm_f32x4_sub(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_sub(glmm_load(a), glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vsubq_f32(vld1q_f32(a),
|
||||
@@ -650,9 +650,9 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
||||
_mm_max_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
|
||||
wasm_f32x4_max(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_max(glmm_load(a), glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vmaxq_f32(vld1q_f32(a),
|
||||
@@ -682,9 +682,9 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
||||
_mm_min_ps(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
|
||||
wasm_f32x4_min(glmm_load(a),
|
||||
glmm_load(b))));
|
||||
glmm_store(dest, wasm_f32x4_add(
|
||||
glmm_load(dest),
|
||||
wasm_f32x4_min(glmm_load(a), glmm_load(b))));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||
vminq_f32(vld1q_f32(a),
|
||||
@@ -709,7 +709,8 @@ glm_vec4_negate_to(vec4 v, vec4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(dest, wasm_v128_xor(glmm_load(v), wasm_f32x4_const_splat(-0.0f)));
|
||||
glmm_store(dest, wasm_v128_xor(glmm_load(v),
|
||||
wasm_f32x4_const_splat(-0.0f)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(dest, vnegq_f32(vld1q_f32(v)));
|
||||
#else
|
||||
@@ -760,7 +761,8 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) {
|
||||
|
||||
x0 = glmm_load(v);
|
||||
xdot = glmm_vdot(x0, x0);
|
||||
dot = _mm_cvtss_f32(xdot);
|
||||
// dot = _mm_cvtss_f32(xdot);
|
||||
dot = wasm_f32x4_extract_lane(xdot, 0);
|
||||
|
||||
if (dot == 0.0f) {
|
||||
glmm_store(dest, wasm_f32x4_const_splat(0.f));
|
||||
@@ -903,8 +905,9 @@ glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
|
||||
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
|
||||
_mm_set1_ps(maxVal)));
|
||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(v, wasm_f32x4_min(wasm_f32x4_max(glmm_load(v), wasm_f32x4_splat(minVal)),
|
||||
wasm_f32x4_splat(maxVal)));
|
||||
glmm_store(v, wasm_f32x4_min(
|
||||
wasm_f32x4_max(glmm_load(v), wasm_f32x4_splat(minVal)),
|
||||
wasm_f32x4_splat(maxVal)));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
|
||||
vdupq_n_f32(maxVal)));
|
||||
|
Reference in New Issue
Block a user