vec: some useful functions (#103)

These functions are added:
- abs(): absolute value
- fract(): fractional part
- norm_one(): L1 norm
- norm_inf(): infinity norm
- hadd(): horizontal add
- hmax(): horizontal max
This commit is contained in:
Luigi Castelli
2019-08-31 23:30:15 +02:00
committed by Recep Aslantas
parent 6af1f5af04
commit 27cc9c3351
14 changed files with 552 additions and 0 deletions

View File

@@ -55,6 +55,14 @@ glmc_vec3_norm(vec3 v);
CGLM_EXPORT
float
glmc_vec3_norm2(vec3 v);
CGLM_EXPORT
float
glmc_vec3_norm_one(vec3 v);
CGLM_EXPORT
float
glmc_vec3_norm_inf(vec3 v);
CGLM_EXPORT
void
@@ -281,6 +289,18 @@ glmc_vec3_isvalid(vec3 v);
CGLM_EXPORT
void
glmc_vec3_sign(vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_vec3_abs(vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_vec3_fract(vec3 v, vec3 dest);
CGLM_EXPORT
float
glmc_vec3_hadd(vec3 v);
CGLM_EXPORT
void

View File

@@ -56,6 +56,14 @@ glmc_vec4_norm(vec4 v);
CGLM_EXPORT
float
glmc_vec4_norm2(vec4 v);
CGLM_EXPORT
float
glmc_vec4_norm_one(vec4 v);
CGLM_EXPORT
float
glmc_vec4_norm_inf(vec4 v);
CGLM_EXPORT
void
@@ -258,6 +266,18 @@ glmc_vec4_isvalid(vec4 v);
CGLM_EXPORT
void
glmc_vec4_sign(vec4 v, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_abs(vec4 v, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_fract(vec4 v, vec4 dest);
CGLM_EXPORT
float
glmc_vec4_hadd(vec4 v);
CGLM_EXPORT
void

View File

@@ -13,6 +13,12 @@
#define glmm_load(p) vld1q_f32(p)
#define glmm_store(p, a) vst1q_f32(p, a)
static inline
float32x4_t
glmm_abs(float32x4_t v) {
return vabsq_f32(v);
}
static inline
float
glmm_hadd(float32x4_t v) {
@@ -25,6 +31,22 @@ glmm_hadd(float32x4_t v) {
#endif
}
static inline
float
glmm_hmin(float32x4_t v) {
v = vpmin_f32(vget_low_f32(v), vget_high_f32(v));
v = vpmin_f32(v, v);
return vget_lane_f32(v, 0);
}
static inline
float
glmm_hmax(float32x4_t v) {
v = vpmax_f32(vget_low_f32(v), vget_high_f32(v));
v = vpmax_f32(v, v);
return vget_lane_f32(v, 0);
}
static inline
float
glmm_dot(float32x4_t a, float32x4_t b) {
@@ -43,5 +65,17 @@ glmm_norm2(float32x4_t a) {
return glmm_dot(a, a);
}
static inline
float
glmm_norm_one(float32x4_t a) {
return glmm_hadd(glmm_abs(a));
}
static inline
float
glmm_norm_inf(float32x4_t a) {
return glmm_hmax(glmm_abs(a));
}
#endif
#endif /* cglm_simd_arm_h */

View File

@@ -42,6 +42,12 @@
# endif
#endif
static inline
__m128
glmm_abs(__m128 x) {
return _mm_andnot_ps(_mm_set1_ps(-0.0f), x);
}
static inline
__m128
glmm_vhadds(__m128 v) {
@@ -68,6 +74,38 @@ glmm_hadd(__m128 v) {
return _mm_cvtss_f32(glmm_vhadds(v));
}
static inline
__m128
glmm_vhmin(__m128 v) {
__m128 x0, x1, x2;
x0 = _mm_movehl_ps(v, v); /* [2, 3, 2, 3] */
x1 = _mm_min_ps(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
x2 = glmm_shuff1x(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
return _mm_min_ss(x1, x2);
}
static inline
float
glmm_hmin(__m128 v) {
return _mm_cvtss_f32(glmm_vhmin(v));
}
static inline
__m128
glmm_vhmax(__m128 v) {
__m128 x0, x1, x2;
x0 = _mm_movehl_ps(v, v); /* [2, 3, 2, 3] */
x1 = _mm_max_ps(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
x2 = glmm_shuff1x(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
return _mm_max_ss(x1, x2);
}
static inline
float
glmm_hmax(__m128 v) {
return _mm_cvtss_f32(glmm_vhmax(v));
}
static inline
__m128
glmm_vdots(__m128 a, __m128 b) {
@@ -119,6 +157,18 @@ glmm_norm2(__m128 a) {
return _mm_cvtss_f32(glmm_vhadds(_mm_mul_ps(a, a)));
}
static inline
float
glmm_norm_one(__m128 a) {
return _mm_cvtss_f32(glmm_vhadds(glmm_abs(a)));
}
static inline
float
glmm_norm_inf(__m128 a) {
return _mm_cvtss_f32(glmm_vhmax(glmm_abs(a)));
}
static inline
__m128
glmm_load3(float v[3]) {

View File

@@ -24,6 +24,9 @@
CGLM_INLINE bool glms_vec3_isinf(vec3s v);
CGLM_INLINE bool glms_vec3_isvalid(vec3s v);
CGLM_INLINE vec3s glms_vec3_sign(vec3s v);
CGLM_INLINE vec3s glms_vec3_abs(vec3s v);
CGLM_INLINE vec3s glms_vec3_fract(vec3s v);
CGLM_INLINE float glms_vec3_hadd(vec3s v);
CGLM_INLINE vec3s glms_vec3_sqrt(vec3s v);
*/
@@ -196,6 +199,47 @@ glms_vec3_sign(vec3s v) {
return r;
}
/*!
* @brief absolute value of each vector item
*
* @param[in] v vector
* @return destination vector
*/
CGLM_INLINE
vec3s
glms_vec3_abs(vec3s v) {
vec3s r;
glm_vec3_abs(v.raw, r.raw);
return r;
}
/*!
* @brief fractional part of each vector item
*
* @param[in] v vector
* @return dest destination vector
*/
CGLM_INLINE
vec3s
glms_vec3_fract(vec3s v) {
vec3s r;
glm_vec3_fract(v.raw, r.raw);
return r;
}
/*!
* @brief vector reduction by summation
* @warning could overflow
*
* @param[in] v vector
* @return sum of all vector's elements
*/
CGLM_INLINE
float
glms_vec3_hadd(vec3s v) {
return glm_vec3_hadd(v.raw);
}
/*!
* @brief square root of each vector item
*

View File

@@ -24,6 +24,8 @@
CGLM_INLINE float glms_vec3_dot(vec3s a, vec3s b);
CGLM_INLINE float glms_vec3_norm2(vec3s v);
CGLM_INLINE float glms_vec3_norm(vec3s v);
CGLM_INLINE float glms_vec3_norm_one(vec3s v);
CGLM_INLINE float glms_vec3_norm_inf(vec3s v);
CGLM_INLINE vec3s glms_vec3_add(vec3s a, vec3s b);
CGLM_INLINE vec3s glms_vec3_adds(vec3s a, float s);
CGLM_INLINE vec3s glms_vec3_sub(vec3s a, vec3s b);
@@ -212,6 +214,45 @@ glms_vec3_norm(vec3s v) {
return glm_vec3_norm(v.raw);
}
/*!
* @brief L1 norm of vec3
* Also known as Manhattan Distance or Taxicab norm.
* L1 Norm is the sum of the magnitudes of the vectors in a space.
* It is calculated as the sum of the absolute values of the vector components.
* In this norm, all the components of the vector are weighted equally.
*
* This computes:
* R = |v[0]| + |v[1]| + |v[2]|
*
* @param[in] v vector
*
* @return L1 norm
*/
CGLM_INLINE
float
glms_vec3_norm_one(vec3s v) {
return glm_vec3_norm_one(v.raw);
}
/*!
* @brief Infinity norm of vec3
* Also known as Maximum norm.
* Infinity Norm is the largest magnitude among each element of a vector.
* It is calculated as the maximum of the absolute values of the vector components.
*
* This computes:
* inf norm = max(|v[0]|, |v[1]|, |v[2]|)
*
* @param[in] v vector
*
* @return Infinity norm
*/
CGLM_INLINE
float
glms_vec3_norm_inf(vec3s v) {
return glm_vec3_norm_inf(v.raw);
}
/*!
* @brief add a vector to b vector store result in dest
*

View File

@@ -24,6 +24,9 @@
CGLM_INLINE bool glms_vec4_isinf(vec4s v);
CGLM_INLINE bool glms_vec4_isvalid(vec4s v);
CGLM_INLINE vec4s glms_vec4_sign(vec4s v);
CGLM_INLINE vec4s glms_vec4_abs(vec4s v);
CGLM_INLINE vec4s glms_vec4_fract(vec4s v);
CGLM_INLINE float glms_vec4_hadd(vec4s v);
CGLM_INLINE vec4s glms_vec4_sqrt(vec4s v);
*/
@@ -196,6 +199,47 @@ glms_vec4_sign(vec4s v) {
return r;
}
/*!
* @brief absolute value of each vector item
*
* @param[in] v vector
* @returns destination vector
*/
CGLM_INLINE
vec4s
glms_vec4_abs(vec4s v) {
vec4s r;
glm_vec4_abs(v.raw, r.raw);
return r;
}
/*!
* @brief fractional part of each vector item
*
* @param[in] v vector
* @returns dest destination vector
*/
CGLM_INLINE
vec4s
glms_vec4_fract(vec4s v) {
vec4s r;
glm_vec4_fract(v.raw, r.raw);
return r;
}
/*!
* @brief vector reduction by summation
* @warning could overflow
*
* @param[in] v vector
* @return sum of all vector's elements
*/
CGLM_INLINE
float
glms_vec4_hadd(vec4s v) {
return glm_vec4_hadd(v.raw);
}
/*!
* @brief square root of each vector item
*

View File

@@ -24,6 +24,8 @@
CGLM_INLINE float glms_vec4_dot(vec4s a, vec4s b);
CGLM_INLINE float glms_vec4_norm2(vec4s v);
CGLM_INLINE float glms_vec4_norm(vec4s v);
CGLM_INLINE float glms_vec4_norm_one(vec4s v);
CGLM_INLINE float glms_vec4_norm_inf(vec4s v);
CGLM_INLINE vec4s glms_vec4_add(vec4s a, vec4s b);
CGLM_INLINE vec4s glms_vec4_adds(vec4s v, float s);
CGLM_INLINE vec4s glms_vec4_sub(vec4s a, vec4s b);
@@ -241,6 +243,45 @@ glms_vec4_norm(vec4s v) {
return glm_vec4_norm(v.raw);
}
/*!
* @brief L1 norm of vec4
* Also known as Manhattan Distance or Taxicab norm.
* L1 Norm is the sum of the magnitudes of the vectors in a space.
* It is calculated as the sum of the absolute values of the vector components.
* In this norm, all the components of the vector are weighted equally.
*
* This computes:
* R = |v[0]| + |v[1]| + |v[2]| + |v[3]|
*
* @param[in] v vector
*
* @return L1 norm
*/
CGLM_INLINE
float
glms_vec4_norm_one(vec4s v) {
return glm_vec4_norm_one(v.raw);
}
/*!
* @brief Infinity norm of vec4
* Also known as Maximum norm.
* Infinity Norm is the largest magnitude among each element of a vector.
* It is calculated as the maximum of the absolute values of the vector components.
*
* This computes:
* inf norm = max(|v[0]|, |v[1]|, |v[2]|, |v[3]|)
*
* @param[in] v vector
*
* @return Infinity norm
*/
CGLM_INLINE
float
glms_vec4_norm_inf(vec4s v) {
return glm_vec4_norm_inf(v.raw);
}
/*!
* @brief add b vector to a vector store result in dest
*

View File

@@ -24,6 +24,9 @@
CGLM_INLINE bool glm_vec3_isinf(vec3 v);
CGLM_INLINE bool glm_vec3_isvalid(vec3 v);
CGLM_INLINE void glm_vec3_sign(vec3 v, vec3 dest);
CGLM_INLINE void glm_vec3_abs(vec3 v, vec3 dest);
CGLM_INLINE void glm_vec3_fract(vec3 v, vec3 dest);
CGLM_INLINE float glm_vec3_hadd(vec3 v);
CGLM_INLINE void glm_vec3_sqrt(vec3 v, vec3 dest);
*/
@@ -211,6 +214,47 @@ glm_vec3_sign(vec3 v, vec3 dest) {
dest[2] = glm_signf(v[2]);
}
/*!
* @brief absolute value of each vector item
*
* @param[in] v vector
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec3_abs(vec3 v, vec3 dest) {
dest[0] = fabsf(v[0]);
dest[1] = fabsf(v[1]);
dest[2] = fabsf(v[2]);
}
/*!
* @brief fractional part of each vector item
*
* @param[in] v vector
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec3_fract(vec3 v, vec3 dest) {
dest[0] = fminf(v[0] - floorf(v[0]), 0x1.fffffep-1f);
dest[1] = fminf(v[1] - floorf(v[1]), 0x1.fffffep-1f);
dest[2] = fminf(v[2] - floorf(v[2]), 0x1.fffffep-1f);
}
/*!
* @brief vector reduction by summation
* @warning could overflow
*
* @param[in] v vector
* @return sum of all vector's elements
*/
CGLM_INLINE
float
glm_vec3_hadd(vec3 v) {
return v[0] + v[1] + v[2];
}
/*!
* @brief square root of each vector item
*

View File

@@ -23,6 +23,8 @@
CGLM_INLINE float glm_vec3_dot(vec3 a, vec3 b);
CGLM_INLINE float glm_vec3_norm2(vec3 v);
CGLM_INLINE float glm_vec3_norm(vec3 v);
CGLM_INLINE float glm_vec3_norm_one(vec3 v);
CGLM_INLINE float glm_vec3_norm_inf(vec3 v);
CGLM_INLINE void glm_vec3_add(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec3_adds(vec3 a, float s, vec3 dest);
CGLM_INLINE void glm_vec3_sub(vec3 a, vec3 b, vec3 dest);
@@ -213,6 +215,49 @@ glm_vec3_norm(vec3 v) {
return sqrtf(glm_vec3_norm2(v));
}
/*!
* @brief L1 norm of vec3
* Also known as Manhattan Distance or Taxicab norm.
* L1 Norm is the sum of the magnitudes of the vectors in a space.
* It is calculated as the sum of the absolute values of the vector components.
* In this norm, all the components of the vector are weighted equally.
*
* This computes:
* R = |v[0]| + |v[1]| + |v[2]|
*
* @param[in] v vector
*
* @return L1 norm
*/
CGLM_INLINE
float
glm_vec3_norm_one(vec3 v) {
vec3 t;
glm_vec3_abs(v, t);
return glm_vec3_hadd(t);
}
/*!
* @brief infinity norm of vec3
* Also known as Maximum norm.
* Infinity Norm is the largest magnitude among each element of a vector.
* It is calculated as the maximum of the absolute values of the vector components.
*
* This computes:
* inf norm = max(|v[0]|, |v[1]|, |v[2]|)
*
* @param[in] v vector
*
* @return infinity norm
*/
CGLM_INLINE
float
glm_vec3_norm_inf(vec3 v) {
vec3 t;
glm_vec3_abs(v, t);
return glm_vec3_max(t);
}
/*!
* @brief add a vector to b vector store result in dest
*

View File

@@ -24,6 +24,9 @@
CGLM_INLINE bool glm_vec4_isinf(vec4 v);
CGLM_INLINE bool glm_vec4_isvalid(vec4 v);
CGLM_INLINE void glm_vec4_sign(vec4 v, vec4 dest);
CGLM_INLINE void glm_vec4_abs(vec4 v, vec4 dest);
CGLM_INLINE void glm_vec4_fract(vec4 v, vec4 dest);
CGLM_INLINE float glm_vec4_hadd(vec4 v);
CGLM_INLINE void glm_vec4_sqrt(vec4 v, vec4 dest);
*/
@@ -237,6 +240,59 @@ glm_vec4_sign(vec4 v, vec4 dest) {
#endif
}
/*!
* @brief absolute value of each vector item
*
* @param[in] v vector
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_abs(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, glmm_abs(glmm_load(v)));
#elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vabsq_f32(vld1q_f32(a)));
#else
dest[0] = fabsf(v[0]);
dest[1] = fabsf(v[1]);
dest[2] = fabsf(v[2]);
dest[3] = fabsf(v[3]);
#endif
}
/*!
* @brief fractional part of each vector item
*
* @param[in] v vector
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_fract(vec4 v, vec4 dest) {
dest[0] = fminf(v[0] - floorf(v[0]), 0x1.fffffep-1f);
dest[1] = fminf(v[1] - floorf(v[1]), 0x1.fffffep-1f);
dest[2] = fminf(v[2] - floorf(v[2]), 0x1.fffffep-1f);
dest[3] = fminf(v[3] - floorf(v[3]), 0x1.fffffep-1f);
}
/*!
* @brief vector reduction by summation
* @warning could overflow
*
* @param[in] v vector
* @return sum of all vector's elements
*/
CGLM_INLINE
float
glm_vec4_hadd(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
return glmm_hadd(glmm_load(v));
#else
return v[0] + v[1] + v[2] + v[3];
#endif
}
/*!
* @brief square root of each vector item
*

View File

@@ -22,6 +22,8 @@
CGLM_INLINE float glm_vec4_dot(vec4 a, vec4 b);
CGLM_INLINE float glm_vec4_norm2(vec4 v);
CGLM_INLINE float glm_vec4_norm(vec4 v);
CGLM_INLINE float glm_vec4_norm_one(vec4 v);
CGLM_INLINE float glm_vec4_norm_inf(vec4 v);
CGLM_INLINE void glm_vec4_add(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_adds(vec4 v, float s, vec4 dest);
CGLM_INLINE void glm_vec4_sub(vec4 a, vec4 b, vec4 dest);
@@ -257,6 +259,57 @@ glm_vec4_norm(vec4 v) {
#endif
}
/*!
* @brief L1 norm of vec4
* Also known as Manhattan Distance or Taxicab norm.
* L1 Norm is the sum of the magnitudes of the vectors in a space.
* It is calculated as the sum of the absolute values of the vector components.
* In this norm, all the components of the vector are weighted equally.
*
* This computes:
* L1 norm = |v[0]| + |v[1]| + |v[2]| + |v[3]|
*
* @param[in] v vector
*
* @return L1 norm
*/
CGLM_INLINE
float
glm_vec4_norm_one(vec4 v) {
#if defined(CGLM_SIMD)
return glmm_norm_one(glmm_load(v));
#else
vec4 t;
glm_vec4_abs(v, t);
return glm_vec4_hadd(t);
#endif
}
/*!
* @brief infinity norm of vec4
* Also known as Maximum norm.
* Infinity Norm is the largest magnitude among each element of a vector.
* It is calculated as the maximum of the absolute values of the vector components.
*
* This computes:
* inf norm = max(|v[0]|, |v[1]|, |v[2]|, |v[3]|)
*
* @param[in] v vector
*
* @return infinity norm
*/
CGLM_INLINE
float
glm_vec4_norm_inf(vec4 v) {
#if defined(CGLM_SIMD)
return glmm_norm_inf(glmm_load(v));
#else
vec4 t;
glm_vec4_abs(v, t);
return glm_vec4_max(t);
#endif
}
/*!
* @brief add b vector to a vector store result in dest
*

View File

@@ -74,6 +74,18 @@ glmc_vec3_norm2(vec3 v) {
return glm_vec3_norm2(v);
}
CGLM_EXPORT
float
glmc_vec3_norm_one(vec3 v) {
return glm_vec3_norm_one(v);
}
CGLM_EXPORT
float
glmc_vec3_norm_inf(vec3 v) {
return glm_vec3_norm_inf(v);
}
CGLM_EXPORT
void
glmc_vec3_add(vec3 a, vec3 b, vec3 dest) {
@@ -382,6 +394,24 @@ glmc_vec3_sign(vec3 v, vec3 dest) {
glm_vec3_sign(v, dest);
}
CGLM_EXPORT
void
glmc_vec3_abs(vec3 v, vec3 dest) {
glm_vec3_abs(v, dest);
}
CGLM_EXPORT
void
glmc_vec3_fract(vec3 v, vec3 dest) {
glm_vec3_fract(v, dest);
}
CGLM_EXPORT
float
glmc_vec3_hadd(vec3 v) {
return glm_vec3_hadd(v);
}
CGLM_EXPORT
void
glmc_vec3_sqrt(vec3 v, vec3 dest) {

View File

@@ -74,6 +74,18 @@ glmc_vec4_norm2(vec4 v) {
return glm_vec4_norm2(v);
}
CGLM_EXPORT
float
glmc_vec4_norm_one(vec4 v) {
return glm_vec4_norm_one(v);
}
CGLM_EXPORT
float
glmc_vec4_norm_inf(vec4 v) {
return glm_vec4_norm_inf(v);
}
CGLM_EXPORT
void
glmc_vec4_add(vec4 a, vec4 b, vec4 dest) {
@@ -346,6 +358,24 @@ glmc_vec4_sign(vec4 v, vec4 dest) {
glm_vec4_sign(v, dest);
}
CGLM_EXPORT
void
glmc_vec4_abs(vec4 v, vec4 dest) {
glm_vec4_abs(v, dest);
}
CGLM_EXPORT
void
glmc_vec4_fract(vec4 v, vec4 dest) {
glm_vec4_fract(v, dest);
}
CGLM_EXPORT
float
glmc_vec4_hadd(vec4 v) {
return glm_vec4_hadd(v);
}
CGLM_EXPORT
void
glmc_vec4_sqrt(vec4 v, vec4 dest) {