Merge branch 'master' into optimizations

This commit is contained in:
Recep Aslantas
2018-05-10 13:59:10 +03:00
committed by GitHub
64 changed files with 4736 additions and 770 deletions

View File

@@ -16,6 +16,7 @@
#include "common.h"
#include "mat4.h"
#include "mat3.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/affine.h"
@@ -81,6 +82,59 @@ glm_mul(mat4 m1, mat4 m2, mat4 dest) {
#endif
}
/*!
* @brief this is similar to glm_mat4_mul but specialized to affine transform
*
* Right Matrix format should be:
* R R R 0
* R R R 0
* R R R 0
* 0 0 0 1
*
* this reduces some multiplications. It should be faster than mat4_mul.
* if you are not sure about matrix format then DON'T use this! use mat4_mul
*
* @param[in] m1 affine matrix 1
* @param[in] m2 affine matrix 2
* @param[out] dest result matrix
*/
CGLM_INLINE
void
glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mul_rot_sse2(m1, m2, dest);
#else
float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2], a13 = m1[1][3],
a20 = m1[2][0], a21 = m1[2][1], a22 = m1[2][2], a23 = m1[2][3],
a30 = m1[3][0], a31 = m1[3][1], a32 = m1[3][2], a33 = m1[3][3],
b00 = m2[0][0], b01 = m2[0][1], b02 = m2[0][2],
b10 = m2[1][0], b11 = m2[1][1], b12 = m2[1][2],
b20 = m2[2][0], b21 = m2[2][1], b22 = m2[2][2];
dest[0][0] = a00 * b00 + a10 * b01 + a20 * b02;
dest[0][1] = a01 * b00 + a11 * b01 + a21 * b02;
dest[0][2] = a02 * b00 + a12 * b01 + a22 * b02;
dest[0][3] = a03 * b00 + a13 * b01 + a23 * b02;
dest[1][0] = a00 * b10 + a10 * b11 + a20 * b12;
dest[1][1] = a01 * b10 + a11 * b11 + a21 * b12;
dest[1][2] = a02 * b10 + a12 * b11 + a22 * b12;
dest[1][3] = a03 * b10 + a13 * b11 + a23 * b12;
dest[2][0] = a00 * b20 + a10 * b21 + a20 * b22;
dest[2][1] = a01 * b20 + a11 * b21 + a21 * b22;
dest[2][2] = a02 * b20 + a12 * b21 + a22 * b22;
dest[2][3] = a03 * b20 + a13 * b21 + a23 * b22;
dest[3][0] = a30;
dest[3][1] = a31;
dest[3][2] = a32;
dest[3][3] = a33;
#endif
}
/*!
* @brief inverse orthonormal rotation + translation matrix (ridig-body)
*

View File

@@ -16,15 +16,14 @@
CGLM_INLINE void glm_scale_to(mat4 m, vec3 v, mat4 dest);
CGLM_INLINE void glm_scale_make(mat4 m, vec3 v);
CGLM_INLINE void glm_scale(mat4 m, vec3 v);
CGLM_INLINE void glm_scale1(mat4 m, float s);
CGLM_INLINE void glm_scale_uni(mat4 m, float s);
CGLM_INLINE void glm_rotate_x(mat4 m, float angle, mat4 dest);
CGLM_INLINE void glm_rotate_y(mat4 m, float angle, mat4 dest);
CGLM_INLINE void glm_rotate_z(mat4 m, float angle, mat4 dest);
CGLM_INLINE void glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc);
CGLM_INLINE void glm_rotate_make(mat4 m, float angle, vec3 axis);
CGLM_INLINE void glm_rotate_ndc(mat4 m, float angle, vec3 axis);
CGLM_INLINE void glm_rotate(mat4 m, float angle, vec3 axis);
CGLM_INLINE void glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_INLINE void glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_INLINE void glm_decompose_scalev(mat4 m, vec3 s);
CGLM_INLINE bool glm_uniscaled(mat4 m);
CGLM_INLINE void glm_decompose_rs(mat4 m, mat4 r, vec3 s);
@@ -35,9 +34,15 @@
#define cglm_affine_h
#include "common.h"
#include "vec4.h"
#include "affine-mat.h"
#include "util.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#include "affine-mat.h"
CGLM_INLINE
void
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest);
/*!
* @brief translate existing transform matrix by v vector
@@ -53,19 +58,19 @@ glm_translate_to(mat4 m, vec3 v, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT;
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_load_ps(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(_mm_load_ps(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(_mm_load_ps(t[2]),
_mm_set1_ps(v[2])),
_mm_load_ps(t[3]))))
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(t[2]),
_mm_set1_ps(v[2])),
glmm_load(t[3]))))
;
_mm_store_ps(dest[0], _mm_load_ps(m[0]));
_mm_store_ps(dest[1], _mm_load_ps(m[1]));
_mm_store_ps(dest[2], _mm_load_ps(m[2]));
glmm_store(dest[0], glmm_load(m[0]));
glmm_store(dest[1], glmm_load(m[1]));
glmm_store(dest[2], glmm_load(m[2]));
#else
vec4 v1, v2, v3;
@@ -92,14 +97,14 @@ CGLM_INLINE
void
glm_translate(mat4 m, vec3 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(_mm_load_ps(m[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
_mm_set1_ps(v[2])),
_mm_load_ps(m[3]))))
glmm_store(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(v[2])),
glmm_load(m[3]))))
;
#else
vec4 v1, v2, v3;
@@ -124,10 +129,10 @@ CGLM_INLINE
void
glm_translate_x(mat4 m, float x) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
_mm_set1_ps(x)),
_mm_load_ps(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(x)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -146,10 +151,10 @@ CGLM_INLINE
void
glm_translate_y(mat4 m, float y) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[1]),
_mm_set1_ps(y)),
_mm_load_ps(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(y)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -168,10 +173,10 @@ CGLM_INLINE
void
glm_translate_z(mat4 m, float z) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
_mm_set1_ps(z)),
_mm_load_ps(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(z)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -237,16 +242,6 @@ glm_scale(mat4 m, vec3 v) {
glm_scale_to(m, v, m);
}
/*!
* @brief DEPRECATED! Use glm_scale_uni
*/
CGLM_INLINE
void
glm_scale1(mat4 m, float s) {
vec3 v = { s, s, s };
glm_scale_to(m, v, m);
}
/*!
* @brief applies uniform scale to existing transform matrix v = [s, s, s]
* and stores result in same matrix
@@ -272,19 +267,18 @@ glm_scale_uni(mat4 m, float s) {
CGLM_INLINE
void
glm_rotate_x(mat4 m, float angle, mat4 dest) {
float cosVal;
float sinVal;
mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s;
cosVal = cosf(angle);
sinVal = sinf(angle);
c = cosf(angle);
s = sinf(angle);
t[1][1] = cosVal;
t[1][2] = sinVal;
t[2][1] = -sinVal;
t[2][2] = cosVal;
t[1][1] = c;
t[1][2] = s;
t[2][1] = -s;
t[2][2] = c;
glm_mat4_mul(m, t, dest);
glm_mul_rot(m, t, dest);
}
/*!
@@ -298,19 +292,18 @@ glm_rotate_x(mat4 m, float angle, mat4 dest) {
CGLM_INLINE
void
glm_rotate_y(mat4 m, float angle, mat4 dest) {
float cosVal;
float sinVal;
mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s;
cosVal = cosf(angle);
sinVal = sinf(angle);
c = cosf(angle);
s = sinf(angle);
t[0][0] = cosVal;
t[0][2] = -sinVal;
t[2][0] = sinVal;
t[2][2] = cosVal;
t[0][0] = c;
t[0][2] = -s;
t[2][0] = s;
t[2][2] = c;
glm_mat4_mul(m, t, dest);
glm_mul_rot(m, t, dest);
}
/*!
@@ -324,61 +317,18 @@ glm_rotate_y(mat4 m, float angle, mat4 dest) {
CGLM_INLINE
void
glm_rotate_z(mat4 m, float angle, mat4 dest) {
float cosVal;
float sinVal;
mat4 t = GLM_MAT4_IDENTITY_INIT;
cosVal = cosf(angle);
sinVal = sinf(angle);
t[0][0] = cosVal;
t[0][1] = sinVal;
t[1][0] = -sinVal;
t[1][1] = cosVal;
glm_mat4_mul(m, t, dest);
}
/*!
* @brief creates NEW rotation matrix by angle and axis
*
* this name may change in the future. axis must be is normalized
*
* @param[out] m affine transfrom
* @param[in] angle angle (radians)
* @param[in] axis_ndc normalized axis
*/
CGLM_INLINE
void
glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc) {
/* https://www.opengl.org/sdk/docs/man2/xhtml/glRotate.xml */
vec3 v, vs;
float c;
float c, s;
c = cosf(angle);
s = sinf(angle);
glm_vec_scale(axis_ndc, 1.0f - c, v);
glm_vec_scale(axis_ndc, sinf(angle), vs);
t[0][0] = c;
t[0][1] = s;
t[1][0] = -s;
t[1][1] = c;
glm_vec_scale(axis_ndc, v[0], m[0]);
glm_vec_scale(axis_ndc, v[1], m[1]);
glm_vec_scale(axis_ndc, v[2], m[2]);
m[0][0] += c;
m[0][1] += vs[2];
m[0][2] -= vs[1];
m[1][0] -= vs[2];
m[1][1] += c;
m[1][2] += vs[0];
m[2][0] += vs[1];
m[2][1] -= vs[0];
m[2][2] += c;
m[0][3] = m[1][3] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
m[3][3] = 1.0f;
glm_mul_rot(m, t, dest);
}
/*!
@@ -393,53 +343,29 @@ glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc) {
CGLM_INLINE
void
glm_rotate_make(mat4 m, float angle, vec3 axis) {
vec3 axis_ndc;
vec3 axisn, v, vs;
float c;
glm_vec_normalize_to(axis, axis_ndc);
glm_rotate_ndc_make(m, angle, axis_ndc);
c = cosf(angle);
glm_vec_normalize_to(axis, axisn);
glm_vec_scale(axisn, 1.0f - c, v);
glm_vec_scale(axisn, sinf(angle), vs);
glm_vec_scale(axisn, v[0], m[0]);
glm_vec_scale(axisn, v[1], m[1]);
glm_vec_scale(axisn, v[2], m[2]);
m[0][0] += c; m[1][0] -= vs[2]; m[2][0] += vs[1];
m[0][1] += vs[2]; m[1][1] += c; m[2][1] -= vs[0];
m[0][2] -= vs[1]; m[1][2] += vs[0]; m[2][2] += c;
m[0][3] = m[1][3] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
m[3][3] = 1.0f;
}
/*!
* @brief rotate existing transform matrix around Z axis by angle and axis
*
* this name may change in the future, axis must be normalized.
*
* @param[in, out] m affine transfrom
* @param[in] angle angle (radians)
* @param[in] axis_ndc normalized axis
*/
CGLM_INLINE
void
glm_rotate_ndc(mat4 m, float angle, vec3 axis_ndc) {
mat4 rot, tmp;
glm_rotate_ndc_make(rot, angle, axis_ndc);
glm_vec4_scale(m[0], rot[0][0], tmp[1]);
glm_vec4_scale(m[1], rot[0][1], tmp[0]);
glm_vec4_add(tmp[1], tmp[0], tmp[1]);
glm_vec4_scale(m[2], rot[0][2], tmp[0]);
glm_vec4_add(tmp[1], tmp[0], tmp[1]);
glm_vec4_scale(m[0], rot[1][0], tmp[2]);
glm_vec4_scale(m[1], rot[1][1], tmp[0]);
glm_vec4_add(tmp[2], tmp[0], tmp[2]);
glm_vec4_scale(m[2], rot[1][2], tmp[0]);
glm_vec4_add(tmp[2], tmp[0], tmp[2]);
glm_vec4_scale(m[0], rot[2][0], tmp[3]);
glm_vec4_scale(m[1], rot[2][1], tmp[0]);
glm_vec4_add(tmp[3], tmp[0], tmp[3]);
glm_vec4_scale(m[2], rot[2][2], tmp[0]);
glm_vec4_add(tmp[3], tmp[0], tmp[3]);
glm_vec4_copy(tmp[1], m[0]);
glm_vec4_copy(tmp[2], m[1]);
glm_vec4_copy(tmp[3], m[2]);
}
/*!
* @brief rotate existing transform matrix around Z axis by angle and axis
* @brief rotate existing transform matrix around given axis by angle
*
* @param[in, out] m affine transfrom
* @param[in] angle angle (radians)
@@ -448,10 +374,56 @@ glm_rotate_ndc(mat4 m, float angle, vec3 axis_ndc) {
CGLM_INLINE
void
glm_rotate(mat4 m, float angle, vec3 axis) {
vec3 axis_ndc;
mat4 rot;
glm_rotate_make(rot, angle, axis);
glm_mul_rot(m, rot, m);
}
glm_vec_normalize_to(axis, axis_ndc);
glm_rotate_ndc(m, angle, axis_ndc);
/*!
* @brief rotate existing transform
* around given axis by angle at given pivot point (rotation center)
*
* @param[in, out] m affine transfrom
* @param[in] pivot rotation center
* @param[in] angle angle (radians)
* @param[in] axis axis
*/
CGLM_INLINE
void
glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_translate(m, pivot);
glm_rotate(m, angle, axis);
glm_translate(m, pivotInv);
}
/*!
* @brief creates NEW rotation matrix by angle and axis at given point
*
* this creates rotation matrix, it assumes you don't have a matrix
*
* this should work faster than glm_rotate_at because it reduces
* one glm_translate.
*
* @param[out] m affine transfrom
* @param[in] pivot rotation center
* @param[in] angle angle (radians)
* @param[in] axis axis
*/
CGLM_INLINE
void
glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_mat4_identity(m);
glm_vec_copy(pivot, m[3]);
glm_rotate(m, angle, axis);
glm_translate(m, pivotInv);
}
/*!
@@ -469,7 +441,7 @@ glm_decompose_scalev(mat4 m, vec3 s) {
}
/*!
* @brief returns true if matrix is uniform scaled. This is helpful for
* @brief returns true if matrix is uniform scaled. This is helpful for
* creating normal matrix.
*
* @param[in] m m

View File

@@ -11,6 +11,7 @@
#include "common.h"
#include "vec3.h"
#include "vec4.h"
#include "util.h"
/*!
* @brief apply transform to Axis-Aligned Bounding Box

View File

@@ -13,6 +13,10 @@ extern "C" {
#include "../cglm.h"
CGLM_EXPORT
void
glmc_translate_make(mat4 m, vec3 v);
CGLM_EXPORT
void
glmc_translate_to(mat4 m, vec3 v, mat4 dest);
@@ -33,6 +37,10 @@ CGLM_EXPORT
void
glmc_translate_z(mat4 m, float to);
CGLM_EXPORT
void
glmc_scale_make(mat4 m, vec3 v);
CGLM_EXPORT
void
glmc_scale_to(mat4 m, vec3 v, mat4 dest);
@@ -43,7 +51,7 @@ glmc_scale(mat4 m, vec3 v);
CGLM_EXPORT
void
glmc_scale1(mat4 m, float s);
glmc_scale_uni(mat4 m, float s);
CGLM_EXPORT
void
@@ -57,26 +65,30 @@ CGLM_EXPORT
void
glmc_rotate_z(mat4 m, float rad, mat4 dest);
CGLM_EXPORT
void
glmc_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc);
CGLM_EXPORT
void
glmc_rotate_make(mat4 m, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_rotate_ndc(mat4 m, float angle, vec3 axis_ndc);
CGLM_EXPORT
void
glmc_rotate(mat4 m, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_decompose_scalev(mat4 m, vec3 s);
CGLM_EXPORT
bool
glmc_uniscaled(mat4 m);
CGLM_EXPORT
void
glmc_decompose_rs(mat4 m, mat4 r, vec3 s);

View File

@@ -47,12 +47,16 @@ glmc_mat4_mul(mat4 m1, mat4 m2, mat4 dest);
CGLM_EXPORT
void
glmc_mat4_mulN(mat4 * __restrict matrices[], int len, mat4 dest);
glmc_mat4_mulN(mat4 * __restrict matrices[], uint32_t len, mat4 dest);
CGLM_EXPORT
void
glmc_mat4_mulv(mat4 m, vec4 v, vec4 dest);
CGLM_EXPORT
void
glmc_mat4_quat(mat4 m, versor dest);
CGLM_EXPORT
void
glmc_mat4_transpose_to(mat4 m, mat4 dest);

View File

@@ -19,33 +19,79 @@ glmc_quat_identity(versor q);
CGLM_EXPORT
void
glmc_quat(versor q,
float angle,
float x,
float y,
float z);
glmc_quat_init(versor q, float x, float y, float z, float w);
CGLM_EXPORT
void
glmc_quatv(versor q,
float angle,
vec3 v);
glmc_quat(versor q, float angle, float x, float y, float z);
CGLM_EXPORT
void
glmc_quatv(versor q, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_quat_copy(versor q, versor dest);
CGLM_EXPORT
float
glmc_quat_norm(versor q);
CGLM_EXPORT
void
glmc_quat_normalize_to(versor q, versor dest);
CGLM_EXPORT
void
glmc_quat_normalize(versor q);
CGLM_EXPORT
float
glmc_quat_dot(versor q, versor r);
glmc_quat_dot(versor p, versor q);
CGLM_EXPORT
void
glmc_quat_mulv(versor q1, versor q2, versor dest);
glmc_quat_conjugate(versor q, versor dest);
CGLM_EXPORT
void
glmc_quat_inv(versor q, versor dest);
CGLM_EXPORT
void
glmc_quat_add(versor p, versor q, versor dest);
CGLM_EXPORT
void
glmc_quat_sub(versor p, versor q, versor dest);
CGLM_EXPORT
float
glmc_quat_real(versor q);
CGLM_EXPORT
void
glmc_quat_imag(versor q, vec3 dest);
CGLM_EXPORT
void
glmc_quat_imagn(versor q, vec3 dest);
CGLM_EXPORT
float
glmc_quat_imaglen(versor q);
CGLM_EXPORT
float
glmc_quat_angle(versor q);
CGLM_EXPORT
void
glmc_quat_axis(versor q, versor dest);
CGLM_EXPORT
void
glmc_quat_mul(versor p, versor q, versor dest);
CGLM_EXPORT
void
@@ -53,10 +99,51 @@ glmc_quat_mat4(versor q, mat4 dest);
CGLM_EXPORT
void
glmc_quat_slerp(versor q,
versor r,
float t,
versor dest);
glmc_quat_mat4t(versor q, mat4 dest);
CGLM_EXPORT
void
glmc_quat_mat3(versor q, mat3 dest);
CGLM_EXPORT
void
glmc_quat_mat3t(versor q, mat3 dest);
CGLM_EXPORT
void
glmc_quat_lerp(versor from, versor to, float t, versor dest);
CGLM_EXPORT
void
glmc_quat_slerp(versor q, versor r, float t, versor dest);
CGLM_EXPORT
void
glmc_quat_look(vec3 eye, versor ori, mat4 dest);
CGLM_EXPORT
void
glmc_quat_for(vec3 dir, vec3 fwd, vec3 up, versor dest);
CGLM_EXPORT
void
glmc_quat_forp(vec3 from, vec3 to, vec3 fwd, vec3 up, versor dest);
CGLM_EXPORT
void
glmc_quat_rotatev(versor from, vec3 to, vec3 dest);
CGLM_EXPORT
void
glmc_quat_rotate(mat4 m, versor q, mat4 dest);
CGLM_EXPORT
void
glmc_quat_rotate_at(mat4 model, versor q, vec3 pivot);
CGLM_EXPORT
void
glmc_quat_rotate_atm(mat4 m, versor q, vec3 pivot);
#ifdef __cplusplus
}

View File

@@ -16,10 +16,22 @@ extern "C" {
/* DEPRECATED! use _copy, _ucopy versions */
#define glmc_vec_dup(v, dest) glmc_vec_copy(v, dest)
CGLM_EXPORT
void
glmc_vec3(vec4 v4, vec3 dest);
CGLM_EXPORT
void
glmc_vec_copy(vec3 a, vec3 dest);
CGLM_EXPORT
void
glmc_vec_zero(vec3 v);
CGLM_EXPORT
void
glmc_vec_one(vec3 v);
CGLM_EXPORT
float
glmc_vec_dot(vec3 a, vec3 b);
@@ -50,7 +62,19 @@ glmc_vec_add(vec3 v1, vec3 v2, vec3 dest);
CGLM_EXPORT
void
glmc_vec_sub(vec3 v1, vec3 v2, vec3 dest);
glmc_vec_adds(vec3 v, float s, vec3 dest);
CGLM_EXPORT
void
glmc_vec_sub(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT
void
glmc_vec_subs(vec3 v, float s, vec3 dest);
CGLM_EXPORT
void
glmc_vec_mul(vec3 a, vec3 b, vec3 d);
CGLM_EXPORT
void
@@ -60,10 +84,38 @@ CGLM_EXPORT
void
glmc_vec_scale_as(vec3 v, float s, vec3 dest);
CGLM_EXPORT
void
glmc_vec_div(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT
void
glmc_vec_divs(vec3 a, float s, vec3 dest);
CGLM_EXPORT
void
glmc_vec_addadd(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT
void
glmc_vec_subadd(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT
void
glmc_vec_muladd(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT
void
glmc_vec_muladds(vec3 a, float s, vec3 dest);
CGLM_EXPORT
void
glmc_vec_flipsign(vec3 v);
CGLM_EXPORT
void
glmc_vec_flipsign_to(vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_vec_inv(vec3 v);
@@ -108,6 +160,72 @@ CGLM_EXPORT
void
glmc_vec_clamp(vec3 v, float minVal, float maxVal);
CGLM_EXPORT
void
glmc_vec_ortho(vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_vec_lerp(vec3 from, vec3 to, float t, vec3 dest);
/* ext */
CGLM_EXPORT
void
glmc_vec_mulv(vec3 a, vec3 b, vec3 d);
CGLM_EXPORT
void
glmc_vec_broadcast(float val, vec3 d);
CGLM_EXPORT
bool
glmc_vec_eq(vec3 v, float val);
CGLM_EXPORT
bool
glmc_vec_eq_eps(vec3 v, float val);
CGLM_EXPORT
bool
glmc_vec_eq_all(vec3 v);
CGLM_EXPORT
bool
glmc_vec_eqv(vec3 v1, vec3 v2);
CGLM_EXPORT
bool
glmc_vec_eqv_eps(vec3 v1, vec3 v2);
CGLM_EXPORT
float
glmc_vec_max(vec3 v);
CGLM_EXPORT
float
glmc_vec_min(vec3 v);
CGLM_EXPORT
bool
glmc_vec_isnan(vec3 v);
CGLM_EXPORT
bool
glmc_vec_isinf(vec3 v);
CGLM_EXPORT
bool
glmc_vec_isvalid(vec3 v);
CGLM_EXPORT
void
glmc_vec_sign(vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_vec_sqrt(vec3 v, vec3 dest);
#ifdef __cplusplus
}
#endif

View File

@@ -17,6 +17,18 @@ extern "C" {
#define glmc_vec4_dup3(v, dest) glmc_vec4_copy3(v, dest)
#define glmc_vec4_dup(v, dest) glmc_vec4_copy(v, dest)
CGLM_EXPORT
void
glmc_vec4(vec3 v3, float last, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_zero(vec4 v);
CGLM_EXPORT
void
glmc_vec4_one(vec4 v);
CGLM_EXPORT
void
glmc_vec4_copy3(vec4 a, vec3 dest);
@@ -47,11 +59,23 @@ glmc_vec4_normalize(vec4 v);
CGLM_EXPORT
void
glmc_vec4_add(vec4 v1, vec4 v2, vec4 dest);
glmc_vec4_add(vec4 a, vec4 b, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_sub(vec4 v1, vec4 v2, vec4 dest);
glmc_vec4_adds(vec4 v, float s, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_sub(vec4 a, vec4 b, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_subs(vec4 v, float s, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_mul(vec4 a, vec4 b, vec4 d);
CGLM_EXPORT
void
@@ -61,10 +85,38 @@ CGLM_EXPORT
void
glmc_vec4_scale_as(vec3 v, float s, vec3 dest);
CGLM_EXPORT
void
glmc_vec4_div(vec4 a, vec4 b, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_divs(vec4 v, float s, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_addadd(vec4 a, vec4 b, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_subadd(vec4 a, vec4 b, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_muladd(vec4 a, vec4 b, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_muladds(vec4 a, float s, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_flipsign(vec4 v);
CGLM_EXPORT
void
glmc_vec4_flipsign_to(vec4 v, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_inv(vec4 v);
@@ -89,6 +141,68 @@ CGLM_EXPORT
void
glmc_vec4_clamp(vec4 v, float minVal, float maxVal);
CGLM_EXPORT
void
glmc_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest);
/* ext */
CGLM_EXPORT
void
glmc_vec4_mulv(vec4 a, vec4 b, vec4 d);
CGLM_EXPORT
void
glmc_vec4_broadcast(float val, vec4 d);
CGLM_EXPORT
bool
glmc_vec4_eq(vec4 v, float val);
CGLM_EXPORT
bool
glmc_vec4_eq_eps(vec4 v, float val);
CGLM_EXPORT
bool
glmc_vec4_eq_all(vec4 v);
CGLM_EXPORT
bool
glmc_vec4_eqv(vec4 v1, vec4 v2);
CGLM_EXPORT
bool
glmc_vec4_eqv_eps(vec4 v1, vec4 v2);
CGLM_EXPORT
float
glmc_vec4_max(vec4 v);
CGLM_EXPORT
float
glmc_vec4_min(vec4 v);
CGLM_EXPORT
bool
glmc_vec4_isnan(vec4 v);
CGLM_EXPORT
bool
glmc_vec4_isinf(vec4 v);
CGLM_EXPORT
bool
glmc_vec4_isvalid(vec4 v);
CGLM_EXPORT
void
glmc_vec4_sign(vec4 v, vec4 dest);
CGLM_EXPORT
void
glmc_vec4_sqrt(vec4 v, vec4 dest);
#ifdef __cplusplus
}
#endif

View File

@@ -14,7 +14,7 @@
#include <math.h>
#include <float.h>
#if defined(_WIN32)
#if defined(_MSC_VER)
# ifdef CGLM_DLL
# define CGLM_EXPORT __declspec(dllexport)
# else

View File

@@ -10,6 +10,9 @@
#include "common.h"
#include "plane.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#define GLM_LBN 0 /* left bottom near */
#define GLM_LTN 1 /* left top near */

View File

@@ -31,6 +31,7 @@
#define cglm_mat3_h
#include "common.h"
#include "vec3.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/mat3.h"
@@ -186,6 +187,56 @@ glm_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
dest[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2];
}
/*!
* @brief convert mat4's rotation part to quaternion
*
* @param[in] m left matrix
* @param[out] dest destination quaternion
*/
CGLM_INLINE
void
glm_mat3_quat(mat3 m, versor dest) {
float trace, r, rinv;
/* it seems using like m12 instead of m[1][2] causes extra instructions */
trace = m[0][0] + m[1][1] + m[2][2];
if (trace >= 0.0f) {
r = sqrtf(1.0f + trace);
rinv = 0.5f / r;
dest[0] = rinv * (m[1][2] - m[2][1]);
dest[1] = rinv * (m[2][0] - m[0][2]);
dest[2] = rinv * (m[0][1] - m[1][0]);
dest[3] = r * 0.5f;
} else if (m[0][0] >= m[1][1] && m[0][0] >= m[2][2]) {
r = sqrtf(1.0f - m[1][1] - m[2][2] + m[0][0]);
rinv = 0.5f / r;
dest[0] = r * 0.5f;
dest[1] = rinv * (m[0][1] + m[1][0]);
dest[2] = rinv * (m[0][2] + m[2][0]);
dest[3] = rinv * (m[1][2] - m[2][1]);
} else if (m[1][1] >= m[2][2]) {
r = sqrtf(1.0f - m[0][0] - m[2][2] + m[1][1]);
rinv = 0.5f / r;
dest[0] = rinv * (m[0][1] + m[1][0]);
dest[1] = r * 0.5f;
dest[2] = rinv * (m[1][2] + m[2][1]);
dest[3] = rinv * (m[2][0] - m[0][2]);
} else {
r = sqrtf(1.0f - m[0][0] - m[1][1] + m[2][2]);
rinv = 0.5f / r;
dest[0] = rinv * (m[0][2] + m[2][0]);
dest[1] = rinv * (m[1][2] + m[2][1]);
dest[2] = r * 0.5f;
dest[3] = rinv * (m[0][1] - m[1][0]);
}
}
/*!
* @brief scale (multiply with scalar) matrix
*

View File

@@ -45,6 +45,8 @@
#define cglm_mat_h
#include "common.h"
#include "vec4.h"
#include "vec3.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/mat4.h"
@@ -58,7 +60,9 @@
# include "simd/neon/mat4.h"
#endif
#include <assert.h>
#ifdef DEBUG
# include <assert.h>
#endif
#define GLM_MAT4_IDENTITY_INIT {{1.0f, 0.0f, 0.0f, 0.0f}, \
{0.0f, 1.0f, 0.0f, 0.0f}, \
@@ -106,13 +110,13 @@ CGLM_INLINE
void
glm_mat4_copy(mat4 mat, mat4 dest) {
#ifdef __AVX__
_mm256_store_ps(dest[0], _mm256_load_ps(mat[0]));
_mm256_store_ps(dest[2], _mm256_load_ps(mat[2]));
glmm_store256(dest[0], glmm_load256(mat[0]));
glmm_store256(dest[2], glmm_load256(mat[2]));
#elif defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[0], _mm_load_ps(mat[0]));
_mm_store_ps(dest[1], _mm_load_ps(mat[1]));
_mm_store_ps(dest[2], _mm_load_ps(mat[2]));
_mm_store_ps(dest[3], _mm_load_ps(mat[3]));
glmm_store(dest[0], glmm_load(mat[0]));
glmm_store(dest[1], glmm_load(mat[1]));
glmm_store(dest[2], glmm_load(mat[2]));
glmm_store(dest[3], glmm_load(mat[3]));
#else
glm_mat4_ucopy(mat, dest);
#endif
@@ -281,19 +285,17 @@ glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) {
*/
CGLM_INLINE
void
glm_mat4_mulN(mat4 * __restrict matrices[], int len, mat4 dest) {
int i;
glm_mat4_mulN(mat4 * __restrict matrices[], uint32_t len, mat4 dest) {
uint32_t i;
#ifdef DEBUG
assert(len > 1 && "there must be least 2 matrices to go!");
#endif
glm_mat4_mul(*matrices[0],
*matrices[1],
dest);
glm_mat4_mul(*matrices[0], *matrices[1], dest);
for (i = 2; i < len; i++)
glm_mat4_mul(dest,
*matrices[i],
dest);
glm_mat4_mul(dest, *matrices[i], dest);
}
/*!
@@ -318,6 +320,55 @@ glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
#endif
}
/*!
* @brief convert mat4's rotation part to quaternion
*
* @param[in] m left matrix
* @param[out] dest destination quaternion
*/
CGLM_INLINE
void
glm_mat4_quat(mat4 m, versor dest) {
float trace, r, rinv;
/* it seems using like m12 instead of m[1][2] causes extra instructions */
trace = m[0][0] + m[1][1] + m[2][2];
if (trace >= 0.0f) {
r = sqrtf(1.0f + trace);
rinv = 0.5f / r;
dest[0] = rinv * (m[1][2] - m[2][1]);
dest[1] = rinv * (m[2][0] - m[0][2]);
dest[2] = rinv * (m[0][1] - m[1][0]);
dest[3] = r * 0.5f;
} else if (m[0][0] >= m[1][1] && m[0][0] >= m[2][2]) {
r = sqrtf(1.0f - m[1][1] - m[2][2] + m[0][0]);
rinv = 0.5f / r;
dest[0] = r * 0.5f;
dest[1] = rinv * (m[0][1] + m[1][0]);
dest[2] = rinv * (m[0][2] + m[2][0]);
dest[3] = rinv * (m[1][2] - m[2][1]);
} else if (m[1][1] >= m[2][2]) {
r = sqrtf(1.0f - m[0][0] - m[2][2] + m[1][1]);
rinv = 0.5f / r;
dest[0] = rinv * (m[0][1] + m[1][0]);
dest[1] = r * 0.5f;
dest[2] = rinv * (m[1][2] + m[2][1]);
dest[3] = rinv * (m[2][0] - m[0][2]);
} else {
r = sqrtf(1.0f - m[0][0] - m[1][1] + m[2][2]);
rinv = 0.5f / r;
dest[0] = rinv * (m[0][2] + m[2][0]);
dest[1] = rinv * (m[1][2] + m[2][1]);
dest[2] = r * 0.5f;
dest[3] = rinv * (m[0][1] - m[1][0]);
}
}
/*!
* @brief multiply vector with mat4's mat3 part(rotation)
*
@@ -568,5 +619,4 @@ glm_mat4_swap_row(mat4 mat, int row1, int row2) {
mat[3][row2] = tmp[3];
}
#else
#endif /* cglm_mat_h */

View File

@@ -9,9 +9,7 @@
#define cglm_plane_h
#include "common.h"
#include "mat4.h"
#include "vec4.h"
#include "vec3.h"
/*
Plane equation: Ax + By + Cz + D = 0;

View File

@@ -8,9 +8,9 @@
#ifndef cglm_project_h
#define cglm_project_h
#include "mat4.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
/*!
* @brief maps the specified viewport coordinates into specified space [1]

View File

@@ -11,41 +11,84 @@
GLM_QUAT_IDENTITY
Functions:
CGLM_INLINE void glm_quat_identity(versor q);
CGLM_INLINE void glm_quat(versor q, float angle, float x, float y, float z);
CGLM_INLINE void glm_quatv(versor q, float angle, vec3 v);
CGLM_INLINE void glm_quat_identity(versor q);
CGLM_INLINE void glm_quat_init(versor q, float x, float y, float z, float w);
CGLM_INLINE void glm_quat(versor q, float angle, float x, float y, float z);
CGLM_INLINE void glm_quatv(versor q, float angle, vec3 axis);
CGLM_INLINE void glm_quat_copy(versor q, versor dest);
CGLM_INLINE float glm_quat_norm(versor q);
CGLM_INLINE void glm_quat_normalize(versor q);
CGLM_INLINE float glm_quat_dot(versor q, versor r);
CGLM_INLINE void glm_quat_mulv(versor q1, versor q2, versor dest);
CGLM_INLINE void glm_quat_mat4(versor q, mat4 dest);
CGLM_INLINE void glm_quat_slerp(versor q, versor r, float t, versor dest);
CGLM_INLINE void glm_quat_normalize(versor q);
CGLM_INLINE void glm_quat_normalize_to(versor q, versor dest);
CGLM_INLINE float glm_quat_dot(versor q1, versor q2);
CGLM_INLINE void glm_quat_conjugate(versor q, versor dest);
CGLM_INLINE void glm_quat_inv(versor q, versor dest);
CGLM_INLINE void glm_quat_add(versor p, versor q, versor dest);
CGLM_INLINE void glm_quat_sub(versor p, versor q, versor dest);
CGLM_INLINE float glm_quat_real(versor q);
CGLM_INLINE void glm_quat_imag(versor q, vec3 dest);
CGLM_INLINE void glm_quat_imagn(versor q, vec3 dest);
CGLM_INLINE float glm_quat_imaglen(versor q);
CGLM_INLINE float glm_quat_angle(versor q);
CGLM_INLINE void glm_quat_axis(versor q, versor dest);
CGLM_INLINE void glm_quat_mul(versor p, versor q, versor dest);
CGLM_INLINE void glm_quat_mat4(versor q, mat4 dest);
CGLM_INLINE void glm_quat_mat4t(versor q, mat4 dest);
CGLM_INLINE void glm_quat_mat3(versor q, mat3 dest);
CGLM_INLINE void glm_quat_mat3t(versor q, mat3 dest);
CGLM_INLINE void glm_quat_lerp(versor from, versor to, float t, versor dest);
CGLM_INLINE void glm_quat_slerp(versor q, versor r, float t, versor dest);
CGLM_INLINE void glm_quat_look(vec3 eye, versor ori, mat4 dest);
CGLM_INLINE void glm_quat_for(vec3 dir, vec3 fwd, vec3 up, versor dest);
CGLM_INLINE void glm_quat_forp(vec3 from,
vec3 to,
vec3 fwd,
vec3 up,
versor dest);
CGLM_INLINE void glm_quat_rotatev(versor q, vec3 v, vec3 dest);
CGLM_INLINE void glm_quat_rotate(mat4 m, versor q, mat4 dest);
*/
#ifndef cglm_quat_h
#define cglm_quat_h
#include "common.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#include "mat3.h"
#include "affine-mat.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/quat.h"
#endif
CGLM_INLINE
void
glm_mat4_identity(mat4 mat);
CGLM_INLINE
void
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
CGLM_INLINE
void
glm_mul_rot(mat4 m1, mat4 m2, mat4 dest);
CGLM_INLINE
void
glm_translate(mat4 m, vec3 v);
/*
* IMPORTANT! cglm stores quat as [w, x, y, z]
* IMPORTANT:
* ----------------------------------------------------------------------------
* cglm stores quat as [x, y, z, w] since v0.3.6
*
* Possible changes (these may be changed in the future):
* - versor is identity quat, we can define new type for quat.
* it can't be quat or quaternion becuase someone can use that name for
* variable name. maybe just vec4.
* - it stores [w, x, y, z] but it may change to [x, y, z, w] if we get enough
* feedback to change it.
* - in general we use last param as dest, but this header used first param
* as dest this may be changed but decided yet
* it was [w, x, y, z] before v0.3.6 it has been changed to [x, y, z, w]
* with v0.3.6 version.
* ----------------------------------------------------------------------------
*/
#define GLM_QUAT_IDENTITY_INIT {1.0f, 0.0f, 0.0f, 0.0f}
#define GLM_QUAT_IDENTITY_INIT {0.0f, 0.0f, 0.0f, 1.0f}
#define GLM_QUAT_IDENTITY ((versor)GLM_QUAT_IDENTITY_INIT)
/*!
@@ -60,6 +103,49 @@ glm_quat_identity(versor q) {
glm_vec4_copy(v, q);
}
/*!
* @brief inits quaterion with raw values
*
* @param[out] q quaternion
* @param[in] x x
* @param[in] y y
* @param[in] z z
* @param[in] w w (real part)
*/
CGLM_INLINE
void
glm_quat_init(versor q, float x, float y, float z, float w) {
q[0] = x;
q[1] = y;
q[2] = z;
q[3] = w;
}
/*!
* @brief creates NEW quaternion with axis vector
*
* @param[out] q quaternion
* @param[in] angle angle (radians)
* @param[in] axis axis
*/
CGLM_INLINE
void
glm_quatv(versor q, float angle, vec3 axis) {
vec3 k;
float a, c, s;
a = angle * 0.5f;
c = cosf(a);
s = sinf(a);
glm_normalize_to(axis, k);
q[0] = s * k[0];
q[1] = s * k[1];
q[2] = s * k[2];
q[3] = c;
}
/*!
* @brief creates NEW quaternion with individual axis components
*
@@ -71,45 +157,21 @@ glm_quat_identity(versor q) {
*/
CGLM_INLINE
void
glm_quat(versor q,
float angle,
float x,
float y,
float z) {
float a, c, s;
a = angle * 0.5f;
c = cosf(a);
s = sinf(a);
q[0] = c;
q[1] = s * x;
q[2] = s * y;
q[3] = s * z;
glm_quat(versor q, float angle, float x, float y, float z) {
vec3 axis = {x, y, z};
glm_quatv(q, angle, axis);
}
/*!
* @brief creates NEW quaternion with axis vector
* @brief copy quaternion to another one
*
* @param[out] q quaternion
* @param[in] angle angle (radians)
* @param[in] v axis
* @param[in] q quaternion
* @param[out] dest destination
*/
CGLM_INLINE
void
glm_quatv(versor q,
float angle,
vec3 v) {
float a, c, s;
a = angle * 0.5f;
c = cosf(a);
s = sinf(a);
q[0] = c;
q[1] = s * v[0];
q[2] = s * v[1];
q[3] = s * v[2];
glm_quat_copy(versor q, versor dest) {
glm_vec4_copy(q, dest);
}
/*!
@@ -123,6 +185,43 @@ glm_quat_norm(versor q) {
return glm_vec4_norm(q);
}
/*!
* @brief normalize quaternion and store result in dest
*
* @param[in] q quaternion to normalze
* @param[out] dest destination quaternion
*/
CGLM_INLINE
void
glm_quat_normalize_to(versor q, versor dest) {
#if defined( __SSE2__ ) || defined( __SSE2__ )
__m128 xdot, x0;
float dot;
x0 = glmm_load(q);
xdot = glmm_dot(x0, x0);
dot = _mm_cvtss_f32(xdot);
if (dot <= 0.0f) {
glm_quat_identity(dest);
return;
}
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#else
float dot;
dot = glm_vec4_norm2(q);
if (dot <= 0.0f) {
glm_quat_identity(q);
return;
}
glm_vec4_scale(q, 1.0f / sqrtf(dot), dest);
#endif
}
/*!
* @brief normalize quaternion
*
@@ -131,45 +230,178 @@ glm_quat_norm(versor q) {
CGLM_INLINE
void
glm_quat_normalize(versor q) {
float sum;
sum = q[0] * q[0] + q[1] * q[1]
+ q[2] * q[2] + q[3] * q[3];
if (fabs(1.0f - sum) < 0.0001f)
return;
glm_vec4_scale(q, 1.0f / sqrtf(sum), q);
glm_quat_normalize_to(q, q);
}
/*!
* @brief dot product of two quaternion
*
* @param[in] q quaternion 1
* @param[in] r quaternion 2
* @param[in] p quaternion 1
* @param[in] q quaternion 2
*/
CGLM_INLINE
float
glm_quat_dot(versor q, versor r) {
return glm_vec4_dot(q, r);
glm_quat_dot(versor p, versor q) {
return glm_vec4_dot(p, q);
}
/*!
* @brief conjugate of quaternion
*
* @param[in] q quaternion
* @param[out] dest conjugate
*/
CGLM_INLINE
void
glm_quat_conjugate(versor q, versor dest) {
glm_vec4_flipsign_to(q, dest);
dest[3] = -dest[3];
}
/*!
* @brief inverse of non-zero quaternion
*
* @param[in] q quaternion
* @param[out] dest inverse quaternion
*/
CGLM_INLINE
void
glm_quat_inv(versor q, versor dest) {
versor conj;
glm_quat_conjugate(q, conj);
glm_vec4_scale(conj, 1.0f / glm_vec4_norm2(q), dest);
}
/*!
* @brief add (componentwise) two quaternions and store result in dest
*
* @param[in] p quaternion 1
* @param[in] q quaternion 2
* @param[out] dest result quaternion
*/
CGLM_INLINE
void
glm_quat_add(versor p, versor q, versor dest) {
glm_vec4_add(p, q, dest);
}
/*!
* @brief subtract (componentwise) two quaternions and store result in dest
*
* @param[in] p quaternion 1
* @param[in] q quaternion 2
* @param[out] dest result quaternion
*/
CGLM_INLINE
void
glm_quat_sub(versor p, versor q, versor dest) {
glm_vec4_sub(p, q, dest);
}
/*!
* @brief returns real part of quaternion
*
* @param[in] q quaternion
*/
CGLM_INLINE
float
glm_quat_real(versor q) {
return q[3];
}
/*!
* @brief returns imaginary part of quaternion
*
* @param[in] q quaternion
* @param[out] dest imag
*/
CGLM_INLINE
void
glm_quat_imag(versor q, vec3 dest) {
dest[0] = q[0];
dest[1] = q[1];
dest[2] = q[2];
}
/*!
* @brief returns normalized imaginary part of quaternion
*
* @param[in] q quaternion
*/
CGLM_INLINE
void
glm_quat_imagn(versor q, vec3 dest) {
glm_normalize_to(q, dest);
}
/*!
* @brief returns length of imaginary part of quaternion
*
* @param[in] q quaternion
*/
CGLM_INLINE
float
glm_quat_imaglen(versor q) {
return glm_vec_norm(q);
}
/*!
* @brief returns angle of quaternion
*
* @param[in] q quaternion
*/
CGLM_INLINE
float
glm_quat_angle(versor q) {
/*
sin(theta / 2) = length(x*x + y*y + z*z)
cos(theta / 2) = w
theta = 2 * atan(sin(theta / 2) / cos(theta / 2))
*/
return 2.0f * atan2f(glm_quat_imaglen(q), glm_quat_real(q));
}
/*!
* @brief axis of quaternion
*
* @param[in] q quaternion
* @param[out] dest axis of quaternion
*/
CGLM_INLINE
void
glm_quat_axis(versor q, versor dest) {
glm_quat_imagn(q, dest);
}
/*!
* @brief multiplies two quaternion and stores result in dest
* this is also called Hamilton Product
*
* @param[in] q1 quaternion 1
* @param[in] q2 quaternion 2
* According to WikiPedia:
* The product of two rotation quaternions [clarification needed] will be
* equivalent to the rotation q followed by the rotation p
*
* @param[in] p quaternion 1
* @param[in] q quaternion 2
* @param[out] dest result quaternion
*/
CGLM_INLINE
void
glm_quat_mulv(versor q1, versor q2, versor dest) {
dest[0] = q2[0] * q1[0] - q2[1] * q1[1] - q2[2] * q1[2] - q2[3] * q1[3];
dest[1] = q2[0] * q1[1] + q2[1] * q1[0] - q2[2] * q1[3] + q2[3] * q1[2];
dest[2] = q2[0] * q1[2] + q2[1] * q1[3] + q2[2] * q1[0] - q2[3] * q1[1];
dest[3] = q2[0] * q1[3] - q2[1] * q1[2] + q2[2] * q1[1] + q2[3] * q1[0];
glm_quat_normalize(dest);
glm_quat_mul(versor p, versor q, versor dest) {
/*
+ (a1 b2 + b1 a2 + c1 d2 d1 c2)i
+ (a1 c2 b1 d2 + c1 a2 + d1 b2)j
+ (a1 d2 + b1 c2 c1 b2 + d1 a2)k
a1 a2 b1 b2 c1 c2 d1 d2
*/
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_quat_mul_sse2(p, q, dest);
#else
dest[0] = p[3] * q[0] + p[0] * q[3] + p[1] * q[2] - p[2] * q[1];
dest[1] = p[3] * q[1] - p[0] * q[2] + p[1] * q[3] + p[2] * q[0];
dest[2] = p[3] * q[2] + p[0] * q[1] - p[1] * q[0] + p[2] * q[3];
dest[3] = p[3] * q[3] - p[0] * q[0] - p[1] * q[1] - p[2] * q[2];
#endif
}
/*!
@@ -181,19 +413,22 @@ glm_quat_mulv(versor q1, versor q2, versor dest) {
CGLM_INLINE
void
glm_quat_mat4(versor q, mat4 dest) {
float w, x, y, z;
float xx, yy, zz;
float xy, yz, xz;
float wx, wy, wz;
float w, x, y, z,
xx, yy, zz,
xy, yz, xz,
wx, wy, wz, norm, s;
w = q[0];
x = q[1];
y = q[2];
z = q[3];
norm = glm_quat_norm(q);
s = norm > 0.0f ? 2.0f / norm : 0.0f;
xx = 2.0f * x * x; xy = 2.0f * x * y; wx = 2.0f * w * x;
yy = 2.0f * y * y; yz = 2.0f * y * z; wy = 2.0f * w * y;
zz = 2.0f * z * z; xz = 2.0f * x * z; wz = 2.0f * w * z;
x = q[0];
y = q[1];
z = q[2];
w = q[3];
xx = s * x * x; xy = s * x * y; wx = s * w * x;
yy = s * y * y; yz = s * y * z; wy = s * w * y;
zz = s * z * z; xz = s * x * z; wz = s * w * z;
dest[0][0] = 1.0f - yy - zz;
dest[1][1] = 1.0f - xx - zz;
@@ -207,8 +442,8 @@ glm_quat_mat4(versor q, mat4 dest) {
dest[2][1] = yz - wx;
dest[0][2] = xz - wy;
dest[1][3] = 0.0f;
dest[0][3] = 0.0f;
dest[1][3] = 0.0f;
dest[2][3] = 0.0f;
dest[3][0] = 0.0f;
dest[3][1] = 0.0f;
@@ -216,69 +451,347 @@ glm_quat_mat4(versor q, mat4 dest) {
dest[3][3] = 1.0f;
}
/*!
* @brief convert quaternion to mat4 (transposed)
*
* @param[in] q quaternion
* @param[out] dest result matrix as transposed
*/
CGLM_INLINE
void
glm_quat_mat4t(versor q, mat4 dest) {
float w, x, y, z,
xx, yy, zz,
xy, yz, xz,
wx, wy, wz, norm, s;
norm = glm_quat_norm(q);
s = norm > 0.0f ? 2.0f / norm : 0.0f;
x = q[0];
y = q[1];
z = q[2];
w = q[3];
xx = s * x * x; xy = s * x * y; wx = s * w * x;
yy = s * y * y; yz = s * y * z; wy = s * w * y;
zz = s * z * z; xz = s * x * z; wz = s * w * z;
dest[0][0] = 1.0f - yy - zz;
dest[1][1] = 1.0f - xx - zz;
dest[2][2] = 1.0f - xx - yy;
dest[1][0] = xy + wz;
dest[2][1] = yz + wx;
dest[0][2] = xz + wy;
dest[0][1] = xy - wz;
dest[1][2] = yz - wx;
dest[2][0] = xz - wy;
dest[0][3] = 0.0f;
dest[1][3] = 0.0f;
dest[2][3] = 0.0f;
dest[3][0] = 0.0f;
dest[3][1] = 0.0f;
dest[3][2] = 0.0f;
dest[3][3] = 1.0f;
}
/*!
* @brief convert quaternion to mat3
*
* @param[in] q quaternion
* @param[out] dest result matrix
*/
CGLM_INLINE
void
glm_quat_mat3(versor q, mat3 dest) {
float w, x, y, z,
xx, yy, zz,
xy, yz, xz,
wx, wy, wz, norm, s;
norm = glm_quat_norm(q);
s = norm > 0.0f ? 2.0f / norm : 0.0f;
x = q[0];
y = q[1];
z = q[2];
w = q[3];
xx = s * x * x; xy = s * x * y; wx = s * w * x;
yy = s * y * y; yz = s * y * z; wy = s * w * y;
zz = s * z * z; xz = s * x * z; wz = s * w * z;
dest[0][0] = 1.0f - yy - zz;
dest[1][1] = 1.0f - xx - zz;
dest[2][2] = 1.0f - xx - yy;
dest[0][1] = xy + wz;
dest[1][2] = yz + wx;
dest[2][0] = xz + wy;
dest[1][0] = xy - wz;
dest[2][1] = yz - wx;
dest[0][2] = xz - wy;
}
/*!
* @brief convert quaternion to mat3 (transposed)
*
* @param[in] q quaternion
* @param[out] dest result matrix
*/
CGLM_INLINE
void
glm_quat_mat3t(versor q, mat3 dest) {
float w, x, y, z,
xx, yy, zz,
xy, yz, xz,
wx, wy, wz, norm, s;
norm = glm_quat_norm(q);
s = norm > 0.0f ? 2.0f / norm : 0.0f;
x = q[0];
y = q[1];
z = q[2];
w = q[3];
xx = s * x * x; xy = s * x * y; wx = s * w * x;
yy = s * y * y; yz = s * y * z; wy = s * w * y;
zz = s * z * z; xz = s * x * z; wz = s * w * z;
dest[0][0] = 1.0f - yy - zz;
dest[1][1] = 1.0f - xx - zz;
dest[2][2] = 1.0f - xx - yy;
dest[1][0] = xy + wz;
dest[2][1] = yz + wx;
dest[0][2] = xz + wy;
dest[0][1] = xy - wz;
dest[1][2] = yz - wx;
dest[2][0] = xz - wy;
}
/*!
* @brief interpolates between two quaternions
* using linear interpolation (LERP)
*
* @param[in] from from
* @param[in] to to
* @param[in] t interpolant (amount) clamped between 0 and 1
* @param[out] dest result quaternion
*/
CGLM_INLINE
void
glm_quat_lerp(versor from, versor to, float t, versor dest) {
glm_vec4_lerp(from, to, t, dest);
}
/*!
* @brief interpolates between two quaternions
* using spherical linear interpolation (SLERP)
*
* @param[in] q from
* @param[in] r to
* @param[in] from from
* @param[in] to to
* @param[in] t amout
* @param[out] dest result quaternion
*/
CGLM_INLINE
void
glm_quat_slerp(versor q,
versor r,
float t,
versor dest) {
/* https://en.wikipedia.org/wiki/Slerp */
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_quat_slerp_sse2(q, r, t, dest);
#else
float cosTheta, sinTheta, angle, a, b, c;
glm_quat_slerp(versor from, versor to, float t, versor dest) {
vec4 q1, q2;
float cosTheta, sinTheta, angle;
cosTheta = glm_quat_dot(q, r);
if (cosTheta < 0.0f) {
q[0] *= -1.0f;
q[1] *= -1.0f;
q[2] *= -1.0f;
q[3] *= -1.0f;
cosTheta = glm_quat_dot(from, to);
glm_quat_copy(from, q1);
cosTheta = -cosTheta;
}
if (fabs(cosTheta) >= 1.0f) {
dest[0] = q[0];
dest[1] = q[1];
dest[2] = q[2];
dest[3] = q[3];
if (fabsf(cosTheta) >= 1.0f) {
glm_quat_copy(q1, dest);
return;
}
sinTheta = sqrt(1.0f - cosTheta * cosTheta);
if (cosTheta < 0.0f) {
glm_vec4_flipsign(q1);
cosTheta = -cosTheta;
}
c = 1.0f - t;
sinTheta = sqrtf(1.0f - cosTheta * cosTheta);
/* LERP */
/* TODO: FLT_EPSILON vs 0.001? */
if (sinTheta < 0.001f) {
dest[0] = c * q[0] + t * r[0];
dest[1] = c * q[1] + t * r[1];
dest[2] = c * q[2] + t * r[2];
dest[3] = c * q[3] + t * r[3];
/* LERP to avoid zero division */
if (fabsf(sinTheta) < 0.001f) {
glm_quat_lerp(from, to, t, dest);
return;
}
/* SLERP */
angle = acosf(cosTheta);
a = sinf(c * angle);
b = sinf(t * angle);
glm_vec4_scale(q1, sinf((1.0f - t) * angle), q1);
glm_vec4_scale(to, sinf(t * angle), q2);
dest[0] = (q[0] * a + r[0] * b) / sinTheta;
dest[1] = (q[1] * a + r[1] * b) / sinTheta;
dest[2] = (q[2] * a + r[2] * b) / sinTheta;
dest[3] = (q[3] * a + r[3] * b) / sinTheta;
#endif
glm_vec4_add(q1, q2, q1);
glm_vec4_scale(q1, 1.0f / sinTheta, dest);
}
/*!
* @brief creates view matrix using quaternion as camera orientation
*
* @param[in] eye eye
* @param[in] ori orientation in world space as quaternion
* @param[out] dest view matrix
*/
CGLM_INLINE
void
glm_quat_look(vec3 eye, versor ori, mat4 dest) {
vec4 t;
/* orientation */
glm_quat_mat4t(ori, dest);
/* translate */
glm_vec4(eye, 1.0f, t);
glm_mat4_mulv(dest, t, t);
glm_vec_flipsign_to(t, dest[3]);
}
/*!
* @brief creates look rotation quaternion
*
* @param[in] dir direction to look
* @param[in] fwd forward vector
* @param[in] up up vector
* @param[out] dest destination quaternion
*/
CGLM_INLINE
void
glm_quat_for(vec3 dir, vec3 fwd, vec3 up, versor dest) {
vec3 axis;
float dot, angle;
dot = glm_vec_dot(dir, fwd);
if (fabsf(dot + 1.0f) < 0.000001f) {
glm_quat_init(dest, up[0], up[1], up[2], CGLM_PI);
return;
}
if (fabsf(dot - 1.0f) < 0.000001f) {
glm_quat_identity(dest);
return;
}
angle = acosf(dot);
glm_cross(fwd, dir, axis);
glm_normalize(axis);
glm_quatv(dest, angle, axis);
}
/*!
* @brief creates look rotation quaternion using source and
* destination positions p suffix stands for position
*
* @param[in] from source point
* @param[in] to destination point
* @param[in] fwd forward vector
* @param[in] up up vector
* @param[out] dest destination quaternion
*/
CGLM_INLINE
void
glm_quat_forp(vec3 from, vec3 to, vec3 fwd, vec3 up, versor dest) {
vec3 dir;
glm_vec_sub(to, from, dir);
glm_quat_for(dir, fwd, up, dest);
}
/*!
* @brief rotate vector using using quaternion
*
* @param[in] q quaternion
* @param[in] v vector to rotate
* @param[out] dest rotated vector
*/
CGLM_INLINE
void
glm_quat_rotatev(versor q, vec3 v, vec3 dest) {
versor p;
vec3 u, v1, v2;
float s;
glm_quat_normalize_to(q, p);
glm_quat_imag(p, u);
s = glm_quat_real(p);
glm_vec_scale(u, 2.0f * glm_vec_dot(u, v), v1);
glm_vec_scale(v, s * s - glm_vec_dot(u, u), v2);
glm_vec_add(v1, v2, v1);
glm_vec_cross(u, v, v2);
glm_vec_scale(v2, 2.0f * s, v2);
glm_vec_add(v1, v2, dest);
}
/*!
* @brief rotate existing transform matrix using quaternion
*
* @param[in] m existing transform matrix
* @param[in] q quaternion
* @param[out] dest rotated matrix/transform
*/
CGLM_INLINE
void
glm_quat_rotate(mat4 m, versor q, mat4 dest) {
mat4 rot;
glm_quat_mat4(q, rot);
glm_mul_rot(m, rot, dest);
}
/*!
* @brief rotate existing transform matrix using quaternion at pivot point
*
* @param[in, out] m existing transform matrix
* @param[in] q quaternion
* @param[out] pivot pivot
*/
CGLM_INLINE
void
glm_quat_rotate_at(mat4 m, versor q, vec3 pivot) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_translate(m, pivot);
glm_quat_rotate(m, q, m);
glm_translate(m, pivotInv);
}
/*!
* @brief rotate NEW transform matrix using quaternion at pivot point
*
* this creates rotation matrix, it assumes you don't have a matrix
*
* this should work faster than glm_quat_rotate_at because it reduces
* one glm_translate.
*
* @param[out] m existing transform matrix
* @param[in] q quaternion
* @param[in] pivot pivot
*/
CGLM_INLINE
void
glm_quat_rotate_atm(mat4 m, versor q, vec3 pivot) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_mat4_identity(m);
glm_vec_copy(pivot, m[3]);
glm_quat_rotate(m, q, m);
glm_translate(m, pivotInv);
}
#endif /* cglm_quat_h */

View File

@@ -21,11 +21,11 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
y0 = _mm256_load_ps(m2[0]); /* h g f e d c b a */
y1 = _mm256_load_ps(m2[2]); /* p o n m l k j i */
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = _mm256_load_ps(m1[0]); /* h g f e d c b a */
y3 = _mm256_load_ps(m1[2]); /* p o n m l k j i */
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000000); /* l k j i l k j i */
@@ -37,10 +37,10 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
_mm256_store_ps(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y4, y8)),
_mm256_mul_ps(y5, y7)));
glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y4, y8)),
_mm256_mul_ps(y5, y7)));
/* n n n n i i i i */
@@ -52,11 +52,11 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
}
#endif

View File

@@ -21,11 +21,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
y0 = _mm256_load_ps(m2[0]); /* h g f e d c b a */
y1 = _mm256_load_ps(m2[2]); /* p o n m l k j i */
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = _mm256_load_ps(m1[0]); /* h g f e d c b a */
y3 = _mm256_load_ps(m1[2]); /* p o n m l k j i */
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000011); /* l k j i p o n m */
@@ -39,11 +39,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
/* n n n n i i i i */
/* p p p p k k k k */
@@ -54,11 +54,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
}
#endif

View File

@@ -8,11 +8,19 @@
#ifndef cglm_intrin_h
#define cglm_intrin_h
#if defined( _WIN32 )
#if defined( _MSC_VER )
# if (defined(_M_AMD64) || defined(_M_X64)) || _M_IX86_FP == 2
# define __SSE2__
# ifndef __SSE2__
# define __SSE2__
# endif
# elif _M_IX86_FP == 1
# define __SSE__
# ifndef __SSE__
# define __SSE__
# endif
# endif
/* do not use alignment for older visual studio versions */
# if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
# endif
#endif
@@ -36,6 +44,49 @@
# define _mm_shuffle2_ps(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \
_mm_shuffle1_ps(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \
z1, y1, x1, w1)
static inline
__m128
glmm_dot(__m128 a, __m128 b) {
__m128 x0;
x0 = _mm_mul_ps(a, b);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
return _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1));
}
static inline
__m128
glmm_norm(__m128 a) {
return _mm_sqrt_ps(glmm_dot(a, a));
}
static inline
__m128
glmm_load3(float v[3]) {
__m128i xy;
__m128 z;
xy = _mm_loadl_epi64((const __m128i *)v);
z = _mm_load_ss(&v[2]);
return _mm_movelh_ps(_mm_castsi128_ps(xy), z);
}
static inline
void
glmm_store3(__m128 vx, float v[3]) {
_mm_storel_pi((__m64 *)&v[0], vx);
_mm_store_ss(&v[2], _mm_shuffle1_ps(vx, 2, 2, 2, 2));
}
#ifdef CGLM_ALL_UNALIGNED
# define glmm_load(p) _mm_loadu_ps(p)
# define glmm_store(p, a) _mm_storeu_ps(p, a)
#else
# define glmm_load(p) _mm_load_ps(p)
# define glmm_store(p, a) _mm_store_ps(p, a)
#endif
#endif
/* x86, x64 */
@@ -45,6 +96,15 @@
#ifdef __AVX__
# define CGLM_AVX_FP 1
#ifdef CGLM_ALL_UNALIGNED
# define glmm_load256(p) _mm256_loadu_ps(p)
# define glmm_store256(p, a) _mm256_storeu_ps(p, a)
#else
# define glmm_load256(p) _mm256_load_ps(p)
# define glmm_store256(p, a) _mm256_store_ps(p, a)
#endif
#endif
/* ARM Neon */

View File

@@ -18,35 +18,67 @@ glm_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */
__m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]);
l1 = _mm_load_ps(m1[1]);
l2 = _mm_load_ps(m1[2]);
l3 = _mm_load_ps(m1[3]);
l0 = glmm_load(m1[0]);
l1 = glmm_load(m1[1]);
l2 = glmm_load(m1[2]);
l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]);
_mm_store_ps(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[0]);
glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = _mm_load_ps(m2[1]);
_mm_store_ps(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[1]);
glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = _mm_load_ps(m2[2]);
_mm_store_ps(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[2]);
glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = _mm_load_ps(m2[3]);
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[3]);
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
}
CGLM_INLINE
void
glm_mul_rot_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */
__m128 l0, l1, l2, l3, r;
l0 = glmm_load(m1[0]);
l1 = glmm_load(m1[1]);
l2 = glmm_load(m1[2]);
l3 = glmm_load(m1[3]);
r = glmm_load(m2[0]);
glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[1]);
glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[2]);
glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
glmm_store(dest[3], l3);
}
CGLM_INLINE
@@ -54,11 +86,11 @@ void
glm_inv_tr_sse2(mat4 mat) {
__m128 r0, r1, r2, r3, x0, x1;
r0 = _mm_load_ps(mat[0]);
r1 = _mm_load_ps(mat[1]);
r2 = _mm_load_ps(mat[2]);
r3 = _mm_load_ps(mat[3]);
x1 = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
r0 = glmm_load(mat[0]);
r1 = glmm_load(mat[1]);
r2 = glmm_load(mat[2]);
r3 = glmm_load(mat[3]);
x1 = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
_MM_TRANSPOSE4_PS(r0, r1, r2, x1);
@@ -69,10 +101,10 @@ glm_inv_tr_sse2(mat4 mat) {
x0 = _mm_add_ps(x0, x1);
_mm_store_ps(mat[0], r0);
_mm_store_ps(mat[1], r1);
_mm_store_ps(mat[2], r2);
_mm_store_ps(mat[3], x0);
glmm_store(mat[0], r0);
glmm_store(mat[1], r1);
glmm_store(mat[2], r2);
glmm_store(mat[3], x0);
}
#endif

View File

@@ -20,10 +20,10 @@ glm_mat4_scale_sse2(mat4 m, float s){
__m128 x0;
x0 = _mm_set1_ps(s);
_mm_store_ps(m[0], _mm_mul_ps(_mm_load_ps(m[0]), x0));
_mm_store_ps(m[1], _mm_mul_ps(_mm_load_ps(m[1]), x0));
_mm_store_ps(m[2], _mm_mul_ps(_mm_load_ps(m[2]), x0));
_mm_store_ps(m[3], _mm_mul_ps(_mm_load_ps(m[3]), x0));
glmm_store(m[0], _mm_mul_ps(glmm_load(m[0]), x0));
glmm_store(m[1], _mm_mul_ps(glmm_load(m[1]), x0));
glmm_store(m[2], _mm_mul_ps(glmm_load(m[2]), x0));
glmm_store(m[3], _mm_mul_ps(glmm_load(m[3]), x0));
}
CGLM_INLINE
@@ -31,17 +31,17 @@ void
glm_mat4_transp_sse2(mat4 m, mat4 dest){
__m128 r0, r1, r2, r3;
r0 = _mm_load_ps(m[0]);
r1 = _mm_load_ps(m[1]);
r2 = _mm_load_ps(m[2]);
r3 = _mm_load_ps(m[3]);
r0 = glmm_load(m[0]);
r1 = glmm_load(m[1]);
r2 = glmm_load(m[2]);
r3 = glmm_load(m[3]);
_MM_TRANSPOSE4_PS(r0, r1, r2, r3);
_mm_store_ps(dest[0], r0);
_mm_store_ps(dest[1], r1);
_mm_store_ps(dest[2], r2);
_mm_store_ps(dest[3], r3);
glmm_store(dest[0], r0);
glmm_store(dest[1], r1);
glmm_store(dest[2], r2);
glmm_store(dest[3], r3);
}
CGLM_INLINE
@@ -51,36 +51,36 @@ glm_mat4_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
__m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]);
l1 = _mm_load_ps(m1[1]);
l2 = _mm_load_ps(m1[2]);
l3 = _mm_load_ps(m1[3]);
l0 = glmm_load(m1[0]);
l1 = glmm_load(m1[1]);
l2 = glmm_load(m1[2]);
l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]);
_mm_store_ps(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = _mm_load_ps(m2[1]);
_mm_store_ps(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = _mm_load_ps(m2[2]);
_mm_store_ps(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[0]);
glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[1]);
glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[2]);
glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = _mm_load_ps(m2[3]);
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[3]);
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
}
CGLM_INLINE
@@ -88,18 +88,18 @@ void
glm_mat4_mulv_sse2(mat4 m, vec4 v, vec4 dest) {
__m128 x0, x1, x2;
x0 = _mm_load_ps(v);
x1 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
x0 = glmm_load(v);
x1 = _mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_shuffle1_ps1(x0, 0)),
_mm_mul_ps(_mm_load_ps(m[1]),
_mm_mul_ps(glmm_load(m[1]),
_mm_shuffle1_ps1(x0, 1)));
x2 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
x2 = _mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_shuffle1_ps1(x0, 2)),
_mm_mul_ps(_mm_load_ps(m[3]),
_mm_mul_ps(glmm_load(m[3]),
_mm_shuffle1_ps1(x0, 3)));
_mm_store_ps(dest, _mm_add_ps(x1, x2));
glmm_store(dest, _mm_add_ps(x1, x2));
}
CGLM_INLINE
@@ -108,10 +108,10 @@ glm_mat4_det_sse2(mat4 mat) {
__m128 r0, r1, r2, r3, x0, x1, x2;
/* 127 <- 0, [square] det(A) = det(At) */
r0 = _mm_load_ps(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */
r0 = glmm_load(mat[0]); /* d c b a */
r1 = glmm_load(mat[1]); /* h g f e */
r2 = glmm_load(mat[2]); /* l k j i */
r3 = glmm_load(mat[3]); /* p o n m */
/*
t[1] = j * p - n * l;
@@ -166,10 +166,10 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7;
/* 127 <- 0 */
r0 = _mm_load_ps(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */
r0 = glmm_load(mat[0]); /* d c b a */
r1 = glmm_load(mat[1]); /* h g f e */
r2 = glmm_load(mat[2]); /* l k j i */
r3 = glmm_load(mat[3]); /* p o n m */
x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */
x1 = _mm_shuffle1_ps(x0, 1, 3, 3, 3); /* l p p p */
@@ -275,10 +275,10 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 0, 1));
x0 = _mm_rcp_ps(x0);
_mm_store_ps(dest[0], _mm_mul_ps(v0, x0));
_mm_store_ps(dest[1], _mm_mul_ps(v1, x0));
_mm_store_ps(dest[2], _mm_mul_ps(v2, x0));
_mm_store_ps(dest[3], _mm_mul_ps(v3, x0));
glmm_store(dest[0], _mm_mul_ps(v0, x0));
glmm_store(dest[1], _mm_mul_ps(v1, x0));
glmm_store(dest[2], _mm_mul_ps(v2, x0));
glmm_store(dest[3], _mm_mul_ps(v3, x0));
}
CGLM_INLINE
@@ -290,10 +290,10 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7;
/* 127 <- 0 */
r0 = _mm_load_ps(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */
r0 = glmm_load(mat[0]); /* d c b a */
r1 = glmm_load(mat[1]); /* h g f e */
r2 = glmm_load(mat[2]); /* l k j i */
r3 = glmm_load(mat[3]); /* p o n m */
x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */
x1 = _mm_shuffle1_ps(x0, 1, 3, 3, 3); /* l p p p */
@@ -399,10 +399,10 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 0, 1));
x0 = _mm_div_ps(_mm_set1_ps(1.0f), x0);
_mm_store_ps(dest[0], _mm_mul_ps(v0, x0));
_mm_store_ps(dest[1], _mm_mul_ps(v1, x0));
_mm_store_ps(dest[2], _mm_mul_ps(v2, x0));
_mm_store_ps(dest[3], _mm_mul_ps(v3, x0));
glmm_store(dest[0], _mm_mul_ps(v0, x0));
glmm_store(dest[1], _mm_mul_ps(v1, x0));
glmm_store(dest[2], _mm_mul_ps(v2, x0));
glmm_store(dest[3], _mm_mul_ps(v3, x0));
}
#endif

View File

@@ -14,56 +14,33 @@
CGLM_INLINE
void
glm_quat_slerp_sse2(versor q,
versor r,
float t,
versor dest) {
/* https://en.wikipedia.org/wiki/Slerp */
float cosTheta, sinTheta, angle, a, b, c;
glm_quat_mul_sse2(versor p, versor q, versor dest) {
/*
+ (a1 b2 + b1 a2 + c1 d2 d1 c2)i
+ (a1 c2 b1 d2 + c1 a2 + d1 b2)j
+ (a1 d2 + b1 c2 c1 b2 + d1 a2)k
a1 a2 b1 b2 c1 c2 d1 d2
*/
__m128 xmm_q;
__m128 xp, xq, x0, r;
xmm_q = _mm_load_ps(q);
xp = glmm_load(p); /* 3 2 1 0 */
xq = glmm_load(q);
cosTheta = glm_vec4_dot(q, r);
if (cosTheta < 0.0f) {
_mm_store_ps(q,
_mm_xor_ps(xmm_q,
_mm_set1_ps(-0.f))) ;
r = _mm_mul_ps(_mm_shuffle1_ps1(xp, 3), xq);
cosTheta = -cosTheta;
}
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 0, 1, 2, 3)));
if (cosTheta >= 1.0f) {
_mm_store_ps(dest, xmm_q);
return;
}
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 1, 0, 3, 2)));
sinTheta = sqrtf(1.0f - cosTheta * cosTheta);
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 2, 3, 0, 1)));
c = 1.0f - t;
/* LERP */
if (sinTheta < 0.001f) {
_mm_store_ps(dest, _mm_add_ps(_mm_mul_ps(_mm_set1_ps(c),
xmm_q),
_mm_mul_ps(_mm_set1_ps(t),
_mm_load_ps(r))));
return;
}
/* SLERP */
angle = acosf(cosTheta);
a = sinf(c * angle);
b = sinf(t * angle);
_mm_store_ps(dest,
_mm_div_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(a),
xmm_q),
_mm_mul_ps(_mm_set1_ps(b),
_mm_load_ps(r))),
_mm_set1_ps(sinTheta)));
glmm_store(dest, r);
}
#endif
#endif /* cglm_quat_simd_h */

View File

@@ -9,23 +9,35 @@
#define cglm_types_h
#if defined(_MSC_VER)
# define CGLM_ALIGN(X) /* __declspec(align(X)) */
/* do not use alignment for older visual studio versions */
#if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
# define CGLM_ALIGN(X) /* no alignment */
#else
# define CGLM_ALIGN(X) __declspec(align(X))
#endif
#else
# define CGLM_ALIGN(X) __attribute((aligned(X)))
#endif
typedef float vec2[2];
typedef float vec3[3];
typedef int ivec3[3];
typedef CGLM_ALIGN(16) float vec4[4];
#ifndef CGLM_ALL_UNALIGNED
# define CGLM_ALIGN_IF(X) CGLM_ALIGN(X)
#else
# define CGLM_ALIGN_IF(X) /* no alignment */
#endif
typedef vec3 mat3[3];
typedef vec4 mat4[4];
typedef float vec2[2];
typedef CGLM_ALIGN_IF(8) float vec3[3];
typedef int ivec3[3];
typedef CGLM_ALIGN_IF(16) float vec4[4];
typedef vec4 versor;
typedef vec3 mat3[3];
typedef CGLM_ALIGN_IF(16) vec4 mat4[4];
#define CGLM_PI (float)M_PI
#define CGLM_PI_2 (float)M_PI_2
#define CGLM_PI_4 (float)M_PI_4
typedef vec4 versor;
#define CGLM_PI ((float)M_PI)
#define CGLM_PI_2 ((float)M_PI_2)
#define CGLM_PI_4 ((float)M_PI_4)
#endif /* cglm_types_h */

View File

@@ -143,4 +143,19 @@ glm_clamp(float val, float minVal, float maxVal) {
return glm_min(glm_max(val, minVal), maxVal);
}
/*!
* @brief linear interpolation between two number
*
* formula: from + s * (to - from)
*
* @param[in] from from value
* @param[in] to to value
* @param[in] t interpolant (amount) clamped between 0 and 1
*/
CGLM_INLINE
float
glm_lerp(float from, float to, float t) {
return from + glm_clamp(t, 0.0f, 1.0f) * (to - from);
}
#endif /* cglm_util_h */

View File

@@ -26,12 +26,13 @@
#define cglm_vec3_ext_h
#include "common.h"
#include "util.h"
#include <stdbool.h>
#include <math.h>
#include <float.h>
/*!
* @brief multiplies individual items, just for convenient like SIMD
* @brief DEPRECATED! use glm_vec_mul
*
* @param[in] a vec1
* @param[in] b vec2
@@ -160,4 +161,69 @@ glm_vec_min(vec3 v) {
return min;
}
/*!
* @brief check if all items are NaN (not a number)
* you should only use this in DEBUG mode or very critical asserts
*
* @param[in] v vector
*/
CGLM_INLINE
bool
glm_vec_isnan(vec3 v) {
return isnan(v[0]) || isnan(v[1]) || isnan(v[2]);
}
/*!
* @brief check if all items are INFINITY
* you should only use this in DEBUG mode or very critical asserts
*
* @param[in] v vector
*/
CGLM_INLINE
bool
glm_vec_isinf(vec3 v) {
return isinf(v[0]) || isinf(v[1]) || isinf(v[2]);
}
/*!
* @brief check if all items are valid number
* you should only use this in DEBUG mode or very critical asserts
*
* @param[in] v vector
*/
CGLM_INLINE
bool
glm_vec_isvalid(vec3 v) {
return !glm_vec_isnan(v) && !glm_vec_isinf(v);
}
/*!
* @brief get sign of 32 bit float as +1, -1, 0
*
* Important: It returns 0 for zero/NaN input
*
* @param v vector
*/
CGLM_INLINE
void
glm_vec_sign(vec3 v, vec3 dest) {
dest[0] = glm_signf(v[0]);
dest[1] = glm_signf(v[1]);
dest[2] = glm_signf(v[2]);
}
/*!
* @brief square root of each vector item
*
* @param[in] v vector
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec_sqrt(vec3 v, vec3 dest) {
dest[0] = sqrtf(v[0]);
dest[1] = sqrtf(v[1]);
dest[2] = sqrtf(v[2]);
}
#endif /* cglm_vec3_ext_h */

View File

@@ -28,10 +28,18 @@
CGLM_INLINE void glm_vec_cross(vec3 a, vec3 b, vec3 d);
CGLM_INLINE float glm_vec_norm2(vec3 v);
CGLM_INLINE float glm_vec_norm(vec3 vec);
CGLM_INLINE void glm_vec_add(vec3 v1, vec3 v2, vec3 dest);
CGLM_INLINE void glm_vec_sub(vec3 v1, vec3 v2, vec3 dest);
CGLM_INLINE void glm_vec_add(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_adds(vec3 a, float s, vec3 dest);
CGLM_INLINE void glm_vec_sub(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_subs(vec3 a, float s, vec3 dest);
CGLM_INLINE void glm_vec_mul(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_scale(vec3 v, float s, vec3 dest);
CGLM_INLINE void glm_vec_scale_as(vec3 v, float s, vec3 dest);
CGLM_INLINE void glm_vec_div(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_divs(vec3 a, float s, vec3 dest);
CGLM_INLINE void glm_vec_addadd(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_subadd(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_muladd(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE void glm_vec_flipsign(vec3 v);
CGLM_INLINE void glm_vec_inv(vec3 v);
CGLM_INLINE void glm_vec_inv_to(vec3 v, vec3 dest);
@@ -59,6 +67,7 @@
#define cglm_vec3_h
#include "common.h"
#include "vec4.h"
#include "vec3-ext.h"
#include "util.h"
@@ -103,6 +112,32 @@ glm_vec_copy(vec3 a, vec3 dest) {
dest[2] = a[2];
}
/*!
* @brief make vector zero
*
* @param[in, out] v vector
*/
CGLM_INLINE
void
glm_vec_zero(vec3 v) {
v[0] = 0.0f;
v[1] = 0.0f;
v[2] = 0.0f;
}
/*!
* @brief make vector one
*
* @param[in, out] v vector
*/
CGLM_INLINE
void
glm_vec_one(vec3 v) {
v[0] = 1.0f;
v[1] = 1.0f;
v[2] = 1.0f;
}
/*!
* @brief vec3 dot product
*
@@ -147,7 +182,7 @@ glm_vec_cross(vec3 a, vec3 b, vec3 d) {
CGLM_INLINE
float
glm_vec_norm2(vec3 v) {
return v[0] * v[0] + v[1] * v[1] + v[2] * v[2];
return glm_vec_dot(v, v);
}
/*!
@@ -164,33 +199,78 @@ glm_vec_norm(vec3 vec) {
}
/*!
* @brief add v2 vector to v1 vector store result in dest
* @brief add a vector to b vector store result in dest
*
* @param[in] v1 vector1
* @param[in] v2 vector2
* @param[in] a vector1
* @param[in] b vector2
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec_add(vec3 v1, vec3 v2, vec3 dest) {
dest[0] = v1[0] + v2[0];
dest[1] = v1[1] + v2[1];
dest[2] = v1[2] + v2[2];
glm_vec_add(vec3 a, vec3 b, vec3 dest) {
dest[0] = a[0] + b[0];
dest[1] = a[1] + b[1];
dest[2] = a[2] + b[2];
}
/*!
* @brief add scalar to v vector store result in dest (d = v + s)
*
* @param[in] v vector
* @param[in] s scalar
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec_adds(vec3 v, float s, vec3 dest) {
dest[0] = v[0] + s;
dest[1] = v[1] + s;
dest[2] = v[2] + s;
}
/*!
* @brief subtract v2 vector from v1 vector store result in dest
*
* @param[in] v1 vector1
* @param[in] v2 vector2
* @param[in] a vector1
* @param[in] b vector2
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec_sub(vec3 v1, vec3 v2, vec3 dest) {
dest[0] = v1[0] - v2[0];
dest[1] = v1[1] - v2[1];
dest[2] = v1[2] - v2[2];
glm_vec_sub(vec3 a, vec3 b, vec3 dest) {
dest[0] = a[0] - b[0];
dest[1] = a[1] - b[1];
dest[2] = a[2] - b[2];
}
/*!
* @brief subtract scalar from v vector store result in dest (d = v - s)
*
* @param[in] v vector
* @param[in] s scalar
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec_subs(vec3 v, float s, vec3 dest) {
dest[0] = v[0] - s;
dest[1] = v[1] - s;
dest[2] = v[2] - s;
}
/*!
* @brief multiply two vector (component-wise multiplication)
*
* @param a v1
* @param b v2
* @param d v3 = (a[0] * b[0], a[1] * b[1], a[2] * b[2])
*/
CGLM_INLINE
void
glm_vec_mul(vec3 a, vec3 b, vec3 d) {
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
d[2] = a[2] * b[2];
}
/*!
@@ -221,14 +301,112 @@ glm_vec_scale_as(vec3 v, float s, vec3 dest) {
float norm;
norm = glm_vec_norm(v);
if (norm == 0) {
glm_vec_copy(v, dest);
if (norm == 0.0f) {
glm_vec_zero(dest);
return;
}
glm_vec_scale(v, s / norm, dest);
}
/*!
* @brief div vector with another component-wise division: d = a / b
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest result = (a[0]/b[0], a[1]/b[1], a[2]/b[2])
*/
CGLM_INLINE
void
glm_vec_div(vec3 a, vec3 b, vec3 dest) {
dest[0] = a[0] / b[0];
dest[1] = a[1] / b[1];
dest[2] = a[2] / b[2];
}
/*!
* @brief div vector with scalar: d = v / s
*
* @param[in] v vector
* @param[in] s scalar
* @param[out] dest result = (a[0]/s, a[1]/s, a[2]/s)
*/
CGLM_INLINE
void
glm_vec_divs(vec3 v, float s, vec3 dest) {
dest[0] = v[0] / s;
dest[1] = v[1] / s;
dest[2] = v[2] / s;
}
/*!
* @brief add two vectors and add result to sum
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest dest += (a + b)
*/
CGLM_INLINE
void
glm_vec_addadd(vec3 a, vec3 b, vec3 dest) {
dest[0] += a[0] + b[0];
dest[1] += a[1] + b[1];
dest[2] += a[2] + b[2];
}
/*!
* @brief sub two vectors and add result to dest
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest dest += (a + b)
*/
CGLM_INLINE
void
glm_vec_subadd(vec3 a, vec3 b, vec3 dest) {
dest[0] += a[0] - b[0];
dest[1] += a[1] - b[1];
dest[2] += a[2] - b[2];
}
/*!
* @brief mul two vectors and add result to dest
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest dest += (a * b)
*/
CGLM_INLINE
void
glm_vec_muladd(vec3 a, vec3 b, vec3 dest) {
dest[0] += a[0] * b[0];
dest[1] += a[1] * b[1];
dest[2] += a[2] * b[2];
}
/*!
* @brief mul vector with scalar and add result to sum
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector
* @param[in] s scalar
* @param[out] dest dest += (a * b)
*/
CGLM_INLINE
void
glm_vec_muladds(vec3 a, float s, vec3 dest) {
dest[0] += a[0] * s;
dest[1] += a[1] * s;
dest[2] += a[2] * s;
}
/*!
* @brief flip sign of all vec3 members
*
@@ -242,6 +420,20 @@ glm_vec_flipsign(vec3 v) {
v[2] = -v[2];
}
/*!
* @brief flip sign of all vec3 members and store result in dest
*
* @param[in] v vector
* @param[out] dest result vector
*/
CGLM_INLINE
void
glm_vec_flipsign_to(vec3 v, vec3 dest) {
dest[0] = -v[0];
dest[1] = -v[1];
dest[2] = -v[2];
}
/*!
* @brief make vector as inverse/opposite of itself
*
@@ -300,7 +492,7 @@ glm_vec_normalize_to(vec3 vec, vec3 dest) {
norm = glm_vec_norm(vec);
if (norm == 0.0f) {
dest[0] = dest[1] = dest[2] = 0.0f;
glm_vec_zero(dest);
return;
}
@@ -325,12 +517,6 @@ glm_vec_angle(vec3 v1, vec3 v2) {
return acosf(glm_vec_dot(v1, v2) * norm);
}
CGLM_INLINE
void
glm_quatv(versor q,
float angle,
vec3 v);
/*!
* @brief rotate vec3 around axis by angle using Rodrigues' rotation formula
*
@@ -341,31 +527,55 @@ glm_quatv(versor q,
CGLM_INLINE
void
glm_vec_rotate(vec3 v, float angle, vec3 axis) {
versor q;
vec3 v1, v2, v3;
vec3 v1, v2, k;
float c, s;
c = cosf(angle);
s = sinf(angle);
glm_vec_normalize_to(axis, k);
/* Right Hand, Rodrigues' rotation formula:
v = v*cos(t) + (kxv)sin(t) + k*(k.v)(1 - cos(t))
*/
/* quaternion */
glm_quatv(q, angle, v);
glm_vec_scale(v, c, v1);
glm_vec_cross(axis, v, v2);
glm_vec_cross(k, v, v2);
glm_vec_scale(v2, s, v2);
glm_vec_scale(axis,
glm_vec_dot(axis, v) * (1.0f - c),
v3);
glm_vec_add(v1, v2, v1);
glm_vec_add(v1, v3, v);
glm_vec_scale(k, glm_vec_dot(k, v) * (1.0f - c), v2);
glm_vec_add(v1, v2, v);
}
/*!
* @brief apply rotation matrix to vector
*
* matrix format should be (no perspective):
* a b c x
* e f g y
* i j k z
* 0 0 0 w
*
* @param[in] m affine matrix or rot matrix
* @param[in] v vector
* @param[out] dest rotated vector
*/
CGLM_INLINE
void
glm_vec_rotate_m4(mat4 m, vec3 v, vec3 dest) {
vec4 x, y, z, res;
glm_vec4_normalize_to(m[0], x);
glm_vec4_normalize_to(m[1], y);
glm_vec4_normalize_to(m[2], z);
glm_vec4_scale(x, v[0], res);
glm_vec4_muladds(y, v[1], res);
glm_vec4_muladds(z, v[2], res);
glm_vec3(res, dest);
}
/*!
@@ -377,18 +587,22 @@ glm_vec_rotate(vec3 v, float angle, vec3 axis) {
*/
CGLM_INLINE
void
glm_vec_rotate_m4(mat4 m, vec3 v, vec3 dest) {
vec3 res, x, y, z;
glm_vec_rotate_m3(mat3 m, vec3 v, vec3 dest) {
vec4 res, x, y, z;
glm_vec_normalize_to(m[0], x);
glm_vec_normalize_to(m[1], y);
glm_vec_normalize_to(m[2], z);
glm_vec4(m[0], 0.0f, x);
glm_vec4(m[1], 0.0f, y);
glm_vec4(m[2], 0.0f, z);
res[0] = x[0] * v[0] + y[0] * v[1] + z[0] * v[2];
res[1] = x[1] * v[0] + y[1] * v[1] + z[1] * v[2];
res[2] = x[2] * v[0] + y[2] * v[1] + z[2] * v[2];
glm_vec4_normalize(x);
glm_vec4_normalize(y);
glm_vec4_normalize(z);
glm_vec_copy(res, dest);
glm_vec4_scale(x, v[0], res);
glm_vec4_muladds(y, v[1], res);
glm_vec4_muladds(z, v[2], res);
glm_vec3(res, dest);
}
/*!
@@ -494,6 +708,28 @@ glm_vec_clamp(vec3 v, float minVal, float maxVal) {
v[2] = glm_clamp(v[2], minVal, maxVal);
}
/*!
* @brief linear interpolation between two vector
*
* formula: from + s * (to - from)
*
* @param[in] from from value
* @param[in] to to value
* @param[in] t interpolant (amount) clamped between 0 and 1
* @param[out] dest destination
*/
CGLM_INLINE
void
glm_vec_lerp(vec3 from, vec3 to, float t, vec3 dest) {
vec3 s, v;
/* from + s * (to - from) */
glm_vec_broadcast(glm_clamp(t, 0.0f, 1.0f), s);
glm_vec_sub(to, from, v);
glm_vec_mulv(s, v, v);
glm_vec_add(from, v, dest);
}
/*!
* @brief vec3 cross product
*

View File

@@ -32,7 +32,7 @@
#include <float.h>
/*!
* @brief multiplies individual items, just for convenient like SIMD
* @brief DEPRECATED! use glm_vec4_mul
*
* @param a v1
* @param b v2
@@ -42,7 +42,7 @@ CGLM_INLINE
void
glm_vec4_mulv(vec4 a, vec4 b, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b)));
glmm_store(d, _mm_mul_ps(glmm_load(a), glmm_load(b)));
#else
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
@@ -61,7 +61,7 @@ CGLM_INLINE
void
glm_vec4_broadcast(float val, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_set1_ps(val));
glmm_store(d, _mm_set1_ps(val));
#else
d[0] = d[1] = d[2] = d[3] = val;
#endif
@@ -174,5 +174,88 @@ glm_vec4_min(vec4 v) {
return min;
}
#endif /* cglm_vec4_ext_h */
/*!
* @brief check if one of items is NaN (not a number)
* you should only use this in DEBUG mode or very critical asserts
*
* @param[in] v vector
*/
CGLM_INLINE
bool
glm_vec4_isnan(vec4 v) {
return isnan(v[0]) || isnan(v[1]) || isnan(v[2]) || isnan(v[3]);
}
/*!
* @brief check if one of items is INFINITY
* you should only use this in DEBUG mode or very critical asserts
*
* @param[in] v vector
*/
CGLM_INLINE
bool
glm_vec4_isinf(vec4 v) {
return isinf(v[0]) || isinf(v[1]) || isinf(v[2]) || isinf(v[3]);
}
/*!
* @brief check if all items are valid number
* you should only use this in DEBUG mode or very critical asserts
*
* @param[in] v vector
*/
CGLM_INLINE
bool
glm_vec4_isvalid(vec4 v) {
return !glm_vec4_isnan(v) && !glm_vec4_isinf(v);
}
/*!
* @brief get sign of 32 bit float as +1, -1, 0
*
* Important: It returns 0 for zero/NaN input
*
* @param v vector
*/
CGLM_INLINE
void
glm_vec4_sign(vec4 v, vec4 dest) {
#if defined( __SSE2__ ) || defined( __SSE2__ )
__m128 x0, x1, x2, x3, x4;
x0 = glmm_load(v);
x1 = _mm_set_ps(0.0f, 0.0f, 1.0f, -1.0f);
x2 = _mm_shuffle1_ps1(x1, 2);
x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), _mm_shuffle1_ps1(x1, 1));
x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), _mm_shuffle1_ps1(x1, 0));
glmm_store(dest, _mm_or_ps(x3, x4));
#else
dest[0] = glm_signf(v[0]);
dest[1] = glm_signf(v[1]);
dest[2] = glm_signf(v[2]);
dest[3] = glm_signf(v[3]);
#endif
}
/*!
* @brief square root of each vector item
*
* @param[in] v vector
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_sqrt(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_sqrt_ps(glmm_load(v)));
#else
dest[0] = sqrtf(v[0]);
dest[1] = sqrtf(v[1]);
dest[2] = sqrtf(v[2]);
dest[3] = sqrtf(v[3]);
#endif
}
#endif /* cglm_vec4_ext_h */

View File

@@ -28,10 +28,18 @@
CGLM_INLINE float glm_vec4_dot(vec4 a, vec4 b);
CGLM_INLINE float glm_vec4_norm2(vec4 v);
CGLM_INLINE float glm_vec4_norm(vec4 vec);
CGLM_INLINE void glm_vec4_add(vec4 v1, vec4 v2, vec4 dest);
CGLM_INLINE void glm_vec4_sub(vec4 v1, vec4 v2, vec4 dest);
CGLM_INLINE void glm_vec4_add(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_adds(vec4 v, float s, vec4 dest);
CGLM_INLINE void glm_vec4_sub(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_subs(vec4 v, float s, vec4 dest);
CGLM_INLINE void glm_vec4_mul(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_scale(vec4 v, float s, vec4 dest);
CGLM_INLINE void glm_vec4_scale_as(vec4 v, float s, vec4 dest);
CGLM_INLINE void glm_vec4_div(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_divs(vec4 v, float s, vec4 dest);
CGLM_INLINE void glm_vec4_addadd(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_subadd(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_muladd(vec4 a, vec4 b, vec4 dest);
CGLM_INLINE void glm_vec4_flipsign(vec4 v);
CGLM_INLINE void glm_vec4_inv(vec4 v);
CGLM_INLINE void glm_vec4_inv_to(vec4 v, vec4 dest);
@@ -41,6 +49,7 @@
CGLM_INLINE void glm_vec4_maxv(vec4 v1, vec4 v2, vec4 dest);
CGLM_INLINE void glm_vec4_minv(vec4 v1, vec4 v2, vec4 dest);
CGLM_INLINE void glm_vec4_clamp(vec4 v, float minVal, float maxVal);
CGLM_INLINE void glm_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest)
*/
#ifndef cglm_vec4_h
@@ -102,7 +111,7 @@ CGLM_INLINE
void
glm_vec4_copy(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_load_ps(v));
glmm_store(dest, glmm_load(v));
#else
dest[0] = v[0];
dest[1] = v[1];
@@ -111,6 +120,42 @@ glm_vec4_copy(vec4 v, vec4 dest) {
#endif
}
/*!
* @brief make vector zero
*
* @param[in, out] v vector
*/
CGLM_INLINE
void
glm_vec4_zero(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(v, _mm_setzero_ps());
#else
v[0] = 0.0f;
v[1] = 0.0f;
v[2] = 0.0f;
v[3] = 0.0f;
#endif
}
/*!
* @brief make vector one
*
* @param[in, out] v vector
*/
CGLM_INLINE
void
glm_vec4_one(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(v, _mm_set1_ps(1.0f));
#else
v[0] = 1.0f;
v[1] = 1.0f;
v[2] = 1.0f;
v[3] = 1.0f;
#endif
}
/*!
* @brief vec4 dot product
*
@@ -122,7 +167,14 @@ glm_vec4_copy(vec4 v, vec4 dest) {
CGLM_INLINE
float
glm_vec4_dot(vec4 a, vec4 b) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0;
x0 = _mm_mul_ps(glmm_load(a), glmm_load(b));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1)));
#else
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
#endif
}
/*!
@@ -139,7 +191,15 @@ glm_vec4_dot(vec4 a, vec4 b) {
CGLM_INLINE
float
glm_vec4_norm2(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0;
x0 = glmm_load(v);
x0 = _mm_mul_ps(x0, x0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1)));
#else
return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3];
#endif
}
/*!
@@ -152,50 +212,112 @@ glm_vec4_norm2(vec4 v) {
CGLM_INLINE
float
glm_vec4_norm(vec4 vec) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0;
x0 = glmm_load(vec);
return _mm_cvtss_f32(_mm_sqrt_ss(glmm_dot(x0, x0)));
#else
return sqrtf(glm_vec4_norm2(vec));
#endif
}
/*!
* @brief add v2 vector to v1 vector store result in dest
*
* @param[in] v1 vector1
* @param[in] v2 vector2
* @param[in] a vector1
* @param[in] b vector2
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_add(vec4 v1, vec4 v2, vec4 dest) {
glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest,
_mm_add_ps(_mm_load_ps(v1),
_mm_load_ps(v2)));
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
#else
dest[0] = v1[0] + v2[0];
dest[1] = v1[1] + v2[1];
dest[2] = v1[2] + v2[2];
dest[3] = v1[3] + v2[3];
dest[0] = a[0] + b[0];
dest[1] = a[1] + b[1];
dest[2] = a[2] + b[2];
dest[3] = a[3] + b[3];
#endif
}
/*!
* @brief subtract v2 vector from v1 vector store result in dest
* @brief add scalar to v vector store result in dest (d = v + vec(s))
*
* @param[in] v1 vector1
* @param[in] v2 vector2
* @param[in] v vector
* @param[in] s scalar
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_sub(vec4 v1, vec4 v2, vec4 dest) {
glm_vec4_adds(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest,
_mm_sub_ps(_mm_load_ps(v1),
_mm_load_ps(v2)));
glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
#else
dest[0] = v1[0] - v2[0];
dest[1] = v1[1] - v2[1];
dest[2] = v1[2] - v2[2];
dest[3] = v1[3] - v2[3];
dest[0] = v[0] + s;
dest[1] = v[1] + s;
dest[2] = v[2] + s;
dest[3] = v[3] + s;
#endif
}
/*!
* @brief subtract b vector from a vector store result in dest (d = v1 - v2)
*
* @param[in] a vector1
* @param[in] b vector2
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
#else
dest[0] = a[0] - b[0];
dest[1] = a[1] - b[1];
dest[2] = a[2] - b[2];
dest[3] = a[3] - b[3];
#endif
}
/*!
* @brief subtract scalar from v vector store result in dest (d = v - vec(s))
*
* @param[in] v vector
* @param[in] s scalar
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_subs(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
#else
dest[0] = v[0] - s;
dest[1] = v[1] - s;
dest[2] = v[2] - s;
dest[3] = v[3] - s;
#endif
}
/*!
* @brief multiply two vector (component-wise multiplication)
*
* @param a v1
* @param b v2
* @param d v3 = (a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3])
*/
CGLM_INLINE
void
glm_vec4_mul(vec4 a, vec4 b, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(d, _mm_mul_ps(glmm_load(a), glmm_load(b)));
#else
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
d[2] = a[2] * b[2];
d[3] = a[3] * b[3];
#endif
}
@@ -210,9 +332,7 @@ CGLM_INLINE
void
glm_vec4_scale(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest,
_mm_mul_ps(_mm_load_ps(v),
_mm_set1_ps(s)));
glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
#else
dest[0] = v[0] * s;
dest[1] = v[1] * s;
@@ -234,14 +354,148 @@ glm_vec4_scale_as(vec4 v, float s, vec4 dest) {
float norm;
norm = glm_vec4_norm(v);
if (norm == 0) {
glm_vec4_copy(v, dest);
if (norm == 0.0f) {
glm_vec4_zero(dest);
return;
}
glm_vec4_scale(v, s / norm, dest);
}
/*!
* @brief div vector with another component-wise division: d = v1 / v2
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest result = (a[0]/b[0], a[1]/b[1], a[2]/b[2], a[3]/b[3])
*/
CGLM_INLINE
void
glm_vec4_div(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_div_ps(glmm_load(a), glmm_load(b)));
#else
dest[0] = a[0] / b[0];
dest[1] = a[1] / b[1];
dest[2] = a[2] / b[2];
dest[3] = a[3] / b[3];
#endif
}
/*!
* @brief div vec4 vector with scalar: d = v / s
*
* @param[in] v vector
* @param[in] s scalar
* @param[out] dest destination vector
*/
CGLM_INLINE
void
glm_vec4_divs(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
#else
glm_vec4_scale(v, 1.0f / s, dest);
#endif
}
/*!
* @brief add two vectors and add result to sum
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest dest += (a + b)
*/
CGLM_INLINE
void
glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_add_ps(glmm_load(a),
glmm_load(b))));
#else
dest[0] += a[0] + b[0];
dest[1] += a[1] + b[1];
dest[2] += a[2] + b[2];
dest[3] += a[3] + b[3];
#endif
}
/*!
* @brief sub two vectors and add result to dest
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest dest += (a - b)
*/
CGLM_INLINE
void
glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_sub_ps(glmm_load(a),
glmm_load(b))));
#else
dest[0] += a[0] - b[0];
dest[1] += a[1] - b[1];
dest[2] += a[2] - b[2];
dest[3] += a[3] - b[3];
#endif
}
/*!
* @brief mul two vectors and add result to dest
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest dest += (a * b)
*/
CGLM_INLINE
void
glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_mul_ps(glmm_load(a),
glmm_load(b))));
#else
dest[0] += a[0] * b[0];
dest[1] += a[1] * b[1];
dest[2] += a[2] * b[2];
dest[3] += a[3] * b[3];
#endif
}
/*!
* @brief mul vector with scalar and add result to sum
*
* it applies += operator so dest must be initialized
*
* @param[in] a vector
* @param[in] s scalar
* @param[out] dest dest += (a * b)
*/
CGLM_INLINE
void
glm_vec4_muladds(vec4 a, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_mul_ps(glmm_load(a),
_mm_set1_ps(s))));
#else
dest[0] += a[0] * s;
dest[1] += a[1] * s;
dest[2] += a[2] * s;
dest[3] += a[3] * s;
#endif
}
/*!
* @brief flip sign of all vec4 members
*
@@ -251,8 +505,7 @@ CGLM_INLINE
void
glm_vec4_flipsign(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_xor_ps(_mm_load_ps(v),
_mm_set1_ps(-0.0f)));
glmm_store(v, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
#else
v[0] = -v[0];
v[1] = -v[1];
@@ -261,6 +514,25 @@ glm_vec4_flipsign(vec4 v) {
#endif
}
/*!
* @brief flip sign of all vec4 members and store result in dest
*
* @param[in] v vector
* @param[out] dest vector
*/
CGLM_INLINE
void
glm_vec4_flipsign_to(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
#else
dest[0] = -v[0];
dest[1] = -v[1];
dest[2] = -v[2];
dest[3] = -v[3];
#endif
}
/*!
* @brief make vector as inverse/opposite of itself
*
@@ -285,26 +557,6 @@ glm_vec4_inv_to(vec4 v, vec4 dest) {
glm_vec4_flipsign(dest);
}
/*!
* @brief normalize vec4 and store result in same vec
*
* @param[in, out] v vector
*/
CGLM_INLINE
void
glm_vec4_normalize(vec4 v) {
float norm;
norm = glm_vec4_norm(v);
if (norm == 0.0f) {
v[0] = v[1] = v[2] = v[3] = 0.0f;
return;
}
glm_vec4_scale(v, 1.0f / norm, v);
}
/*!
* @brief normalize vec4 to dest
*
@@ -314,16 +566,43 @@ glm_vec4_normalize(vec4 v) {
CGLM_INLINE
void
glm_vec4_normalize_to(vec4 vec, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 xdot, x0;
float dot;
x0 = glmm_load(vec);
xdot = glmm_dot(x0, x0);
dot = _mm_cvtss_f32(xdot);
if (dot == 0.0f) {
glmm_store(dest, _mm_setzero_ps());
return;
}
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#else
float norm;
norm = glm_vec4_norm(vec);
if (norm == 0.0f) {
dest[0] = dest[1] = dest[2] = dest[3] = 0.0f;
glm_vec4_zero(dest);
return;
}
glm_vec4_scale(vec, 1.0f / norm, dest);
#endif
}
/*!
* @brief normalize vec4 and store result in same vec
*
* @param[in, out] v vector
*/
CGLM_INLINE
void
glm_vec4_normalize(vec4 v) {
glm_vec4_normalize_to(v, v);
}
/**
@@ -352,10 +631,14 @@ glm_vec4_distance(vec4 v1, vec4 v2) {
CGLM_INLINE
void
glm_vec4_maxv(vec4 v1, vec4 v2, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_max_ps(glmm_load(v1), glmm_load(v2)));
#else
dest[0] = glm_max(v1[0], v2[0]);
dest[1] = glm_max(v1[1], v2[1]);
dest[2] = glm_max(v1[2], v2[2]);
dest[3] = glm_max(v1[3], v2[3]);
#endif
}
/*!
@@ -368,10 +651,14 @@ glm_vec4_maxv(vec4 v1, vec4 v2, vec4 dest) {
CGLM_INLINE
void
glm_vec4_minv(vec4 v1, vec4 v2, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_min_ps(glmm_load(v1), glmm_load(v2)));
#else
dest[0] = glm_min(v1[0], v2[0]);
dest[1] = glm_min(v1[1], v2[1]);
dest[2] = glm_min(v1[2], v2[2]);
dest[3] = glm_min(v1[3], v2[3]);
#endif
}
/*!
@@ -384,10 +671,37 @@ glm_vec4_minv(vec4 v1, vec4 v2, vec4 dest) {
CGLM_INLINE
void
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
_mm_set1_ps(maxVal)));
#else
v[0] = glm_clamp(v[0], minVal, maxVal);
v[1] = glm_clamp(v[1], minVal, maxVal);
v[2] = glm_clamp(v[2], minVal, maxVal);
v[3] = glm_clamp(v[3], minVal, maxVal);
#endif
}
/*!
* @brief linear interpolation between two vector
*
* formula: from + s * (to - from)
*
* @param[in] from from value
* @param[in] to to value
* @param[in] t interpolant (amount) clamped between 0 and 1
* @param[out] dest destination
*/
CGLM_INLINE
void
glm_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest) {
vec4 s, v;
/* from + s * (to - from) */
glm_vec4_broadcast(glm_clamp(t, 0.0f, 1.0f), s);
glm_vec4_sub(to, from, v);
glm_vec4_mulv(s, v, v);
glm_vec4_add(from, v, dest);
}
#endif /* cglm_vec4_h */

View File

@@ -9,7 +9,7 @@
#define cglm_version_h
#define CGLM_VERSION_MAJOR 0
#define CGLM_VERSION_MINOR 3
#define CGLM_VERSION_PATCH 6
#define CGLM_VERSION_MINOR 4
#define CGLM_VERSION_PATCH 5
#endif /* cglm_version_h */