diff --git a/.gitignore b/.gitignore index d500b97..195a82c 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,4 @@ win/cglm_test_* win/x64 win/x85 win/Debug +cglm-test-ios* diff --git a/CREDITS b/CREDITS index 0488bad..7272ddc 100644 --- a/CREDITS +++ b/CREDITS @@ -52,3 +52,12 @@ https://gamedev.stackexchange.com/questions/28395/rotating-vector3-by-a-quaterni 9. Sphere AABB intersect https://github.com/erich666/GraphicsGems/blob/master/gems/BoxSphere.c + +10. Horizontal add +https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86 + +11. de casteljau implementation and comments +https://forums.khronos.org/showthread.php/10264-Animations-in-1-4-1-release-notes-revision-A/page2?highlight=bezier +https://forums.khronos.org/showthread.php/10644-Animation-Bezier-interpolation +https://forums.khronos.org/showthread.php/10387-2D-Tangents-in-Bezier-Splines?p=34164&viewfull=1#post34164 +https://forums.khronos.org/showthread.php/10651-Animation-TCB-Spline-Interpolation-in-COLLADA?highlight=bezier diff --git a/README.md b/README.md index dffa54c..d03a03f 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,11 @@ Currently *cglm* uses default clip space configuration (-1, 1) for camera functi - inline or pre-compiled function call - frustum (extract view frustum planes, corners...) - bounding box (AABB in Frustum (culling), crop, merge...) +- bounding sphere - project, unproject +- easing functions +- curves +- curve interpolation helpers (S*M*C, deCasteljau...) - and other...
diff --git a/configure.ac b/configure.ac index 1349b90..d29b9bb 100644 --- a/configure.ac +++ b/configure.ac @@ -29,6 +29,7 @@ LT_INIT # Checks for libraries. AC_CHECK_LIB([m], [floor]) +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_SYS_LARGEFILE # Checks for header files. diff --git a/docs/source/api.rst b/docs/source/api.rst index e88b426..408eae7 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -46,3 +46,5 @@ Follow the :doc:`build` documentation for this io call sphere + curve + bezier diff --git a/docs/source/bezier.rst b/docs/source/bezier.rst new file mode 100644 index 0000000..8b29751 --- /dev/null +++ b/docs/source/bezier.rst @@ -0,0 +1,89 @@ +.. default-domain:: C + +Bezier +================================================================================ + +Header: cglm/bezier.h + +Common helpers for cubic bezier and similar curves. + +Table of contents (click to go): +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Functions: + +1. :c:func:`glm_bezier` +2. :c:func:`glm_hermite` +3. :c:func:`glm_decasteljau` + +Functions documentation +~~~~~~~~~~~~~~~~~~~~~~~ + +.. c:function:: float glm_bezier(float s, float p0, float c0, float c1, float p1) + + | cubic bezier interpolation + | formula: + + .. code-block:: text + + B(s) = P0*(1-s)^3 + 3*C0*s*(1-s)^2 + 3*C1*s^2*(1-s) + P1*s^3 + + | similar result using matrix: + + .. code-block:: text + + B(s) = glm_smc(t, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + + | glm_eq(glm_smc(...), glm_bezier(...)) should return TRUE + + Parameters: + | *[in]* **s** parameter between 0 and 1 + | *[in]* **p0** begin point + | *[in]* **c0** control point 1 + | *[in]* **c1** control point 2 + | *[in]* **p1** end point + + Returns: + B(s) + +.. c:function:: float glm_hermite(float s, float p0, float t0, float t1, float p1) + + | cubic hermite interpolation + | formula: + + .. code-block:: text + + H(s) = P0*(2*s^3 - 3*s^2 + 1) + T0*(s^3 - 2*s^2 + s) + P1*(-2*s^3 + 3*s^2) + T1*(s^3 - s^2) + + | similar result using matrix: + + .. code-block:: text + + H(s) = glm_smc(t, GLM_HERMITE_MAT, (vec4){p0, p1, c0, c1}) + + | glm_eq(glm_smc(...), glm_hermite(...)) should return TRUE + + + Parameters: + | *[in]* **s** parameter between 0 and 1 + | *[in]* **p0** begin point + | *[in]* **t0** tangent 1 + | *[in]* **t1** tangent 2 + | *[in]* **p1** end point + + Returns: + B(s) + +.. c:function:: float glm_decasteljau(float prm, float p0, float c0, float c1, float p1) + + | iterative way to solve cubic equation + + Parameters: + | *[in]* **prm** parameter between 0 and 1 + | *[in]* **p0** begin point + | *[in]* **c0** control point 1 + | *[in]* **c1** control point 2 + | *[in]* **p1** end point + + Returns: + parameter to use in cubic equation diff --git a/docs/source/curve.rst b/docs/source/curve.rst new file mode 100644 index 0000000..26c9b75 --- /dev/null +++ b/docs/source/curve.rst @@ -0,0 +1,41 @@ +.. default-domain:: C + +Curve +================================================================================ + +Header: cglm/curve.h + +Common helpers for common curves. For specific curve see its header/doc +e.g bezier + +Table of contents (click to go): +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Functions: + +1. :c:func:`glm_smc` + +Functions documentation +~~~~~~~~~~~~~~~~~~~~~~~ + +.. c:function:: float glm_smc(float s, mat4 m, vec4 c) + + | helper function to calculate **S** * **M** * **C** multiplication for curves + + | this function does not encourage you to use SMC, instead it is a helper if you use SMC. + + | if you want to specify S as vector then use more generic glm_mat4_rmc() func. + + | Example usage: + + .. code-block:: c + + Bs = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + + Parameters: + | *[in]* **s** parameter between 0 and 1 (this will be [s3, s2, s, 1]) + | *[in]* **m** basis matrix + | *[out]* **c** position/control vector + + Returns: + scalar value e.g. Bs diff --git a/docs/source/mat4.rst b/docs/source/mat4.rst index 30f48fe..294f8f4 100644 --- a/docs/source/mat4.rst +++ b/docs/source/mat4.rst @@ -45,6 +45,7 @@ Functions: #. :c:func:`glm_mat4_inv_fast` #. :c:func:`glm_mat4_swap_col` #. :c:func:`glm_mat4_swap_row` +#. :c:func:`glm_mat4_rmc` Functions documentation ~~~~~~~~~~~~~~~~~~~~~~~ @@ -270,3 +271,20 @@ Functions documentation | *[in, out]* **mat** matrix | *[in]* **row1** row1 | *[in]* **row2** row2 + +.. c:function:: float glm_mat4_rmc(vec4 r, mat4 m, vec4 c) + + | **rmc** stands for **Row** * **Matrix** * **Column** + + | helper for R (row vector) * M (matrix) * C (column vector) + + | the result is scalar because S * M = Matrix1x4 (row vector), + | then Matrix1x4 * Vec4 (column vector) = Matrix1x1 (Scalar) + + Parameters: + | *[in]* **r** row vector or matrix1x4 + | *[in]* **m** matrix4x4 + | *[in]* **c** column vector or matrix4x1 + + Returns: + scalar value e.g. Matrix1x1 diff --git a/docs/source/vec4.rst b/docs/source/vec4.rst index 5bb1ac7..f497868 100644 --- a/docs/source/vec4.rst +++ b/docs/source/vec4.rst @@ -58,11 +58,7 @@ Functions: #. :c:func:`glm_vec4_minv` #. :c:func:`glm_vec4_clamp` #. :c:func:`glm_vec4_lerp` -#. :c:func:`glm_vec4_isnan` -#. :c:func:`glm_vec4_isinf` -#. :c:func:`glm_vec4_isvalid` -#. :c:func:`glm_vec4_sign` -#. :c:func:`glm_vec4_sqrt` +#. :c:func:`glm_vec4_cubic` Functions documentation ~~~~~~~~~~~~~~~~~~~~~~~ @@ -401,3 +397,11 @@ Functions documentation | *[in]* **to** to value | *[in]* **t** interpolant (amount) clamped between 0 and 1 | *[out]* **dest** destination + +.. c:function:: void glm_vec4_cubic(float s, vec4 dest) + + helper to fill vec4 as [S^3, S^2, S, 1] + + Parameters: + | *[in]* **s** parameter + | *[out]* **dest** destination diff --git a/include/cglm/bezier.h b/include/cglm/bezier.h new file mode 100644 index 0000000..2560594 --- /dev/null +++ b/include/cglm/bezier.h @@ -0,0 +1,152 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_bezier_h +#define cglm_bezier_h + +#define GLM_BEZIER_MAT_INIT {{-1.0f, 3.0f, -3.0f, 1.0f}, \ + { 3.0f, -6.0f, 3.0f, 0.0f}, \ + {-3.0f, 3.0f, 0.0f, 0.0f}, \ + { 1.0f, 0.0f, 0.0f, 0.0f}} +#define GLM_HERMITE_MAT_INIT {{ 2.0f, -3.0f, 0.0f, 1.0f}, \ + {-2.0f, 3.0f, 0.0f, 0.0f}, \ + { 1.0f, -2.0f, 1.0f, 0.0f}, \ + { 1.0f, -1.0f, 0.0f, 0.0f}} +/* for C only */ +#define GLM_BEZIER_MAT ((mat4)GLM_BEZIER_MAT_INIT) +#define GLM_HERMITE_MAT ((mat4)GLM_HERMITE_MAT_INIT) + +#define CGLM_DECASTEL_EPS 1e-9 +#define CGLM_DECASTEL_MAX 1000 +#define CGLM_DECASTEL_SMALL 1e-20 + +/*! + * @brief cubic bezier interpolation + * + * Formula: + * B(s) = P0*(1-s)^3 + 3*C0*s*(1-s)^2 + 3*C1*s^2*(1-s) + P1*s^3 + * + * similar result using matrix: + * B(s) = glm_smc(t, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + * + * glm_eq(glm_smc(...), glm_bezier(...)) should return TRUE + * + * @param[in] s parameter between 0 and 1 + * @param[in] p0 begin point + * @param[in] c0 control point 1 + * @param[in] c1 control point 2 + * @param[in] p1 end point + * + * @return B(s) + */ +CGLM_INLINE +float +glm_bezier(float s, float p0, float c0, float c1, float p1) { + float x, xx, ss, xs3, a; + + x = 1.0f - s; + xx = x * x; + ss = s * s; + xs3 = (s - ss) * 3.0f; + a = p0 * xx + c0 * xs3; + + return a + s * (c1 * xs3 + p1 * ss - a); +} + +/*! + * @brief cubic hermite interpolation + * + * Formula: + * H(s) = P0*(2*s^3 - 3*s^2 + 1) + T0*(s^3 - 2*s^2 + s) + * + P1*(-2*s^3 + 3*s^2) + T1*(s^3 - s^2) + * + * similar result using matrix: + * H(s) = glm_smc(t, GLM_HERMITE_MAT, (vec4){p0, p1, c0, c1}) + * + * glm_eq(glm_smc(...), glm_hermite(...)) should return TRUE + * + * @param[in] s parameter between 0 and 1 + * @param[in] p0 begin point + * @param[in] t0 tangent 1 + * @param[in] t1 tangent 2 + * @param[in] p1 end point + * + * @return H(s) + */ +CGLM_INLINE +float +glm_hermite(float s, float p0, float t0, float t1, float p1) { + float ss, d, a, b, c, e, f; + + ss = s * s; + a = ss + ss; + c = a + ss; + b = a * s; + d = s * ss; + f = d - ss; + e = b - c; + + return p0 * (e + 1.0f) + t0 * (f - ss + s) + t1 * f - p1 * e; +} + +/*! + * @brief iterative way to solve cubic equation + * + * @param[in] prm parameter between 0 and 1 + * @param[in] p0 begin point + * @param[in] c0 control point 1 + * @param[in] c1 control point 2 + * @param[in] p1 end point + * + * @return parameter to use in cubic equation + */ +CGLM_INLINE +float +glm_decasteljau(float prm, float p0, float c0, float c1, float p1) { + float u, v, a, b, c, d, e, f; + int i; + + if (prm - p0 < CGLM_DECASTEL_SMALL) + return 0.0f; + + if (p1 - prm < CGLM_DECASTEL_SMALL) + return 1.0f; + + u = 0.0f; + v = 1.0f; + + for (i = 0; i < CGLM_DECASTEL_MAX; i++) { + /* de Casteljau Subdivision */ + a = (p0 + c0) * 0.5f; + b = (c0 + c1) * 0.5f; + c = (c1 + p1) * 0.5f; + d = (a + b) * 0.5f; + e = (b + c) * 0.5f; + f = (d + e) * 0.5f; /* this one is on the curve! */ + + /* The curve point is close enough to our wanted t */ + if (fabsf(f - prm) < CGLM_DECASTEL_EPS) + return glm_clamp_zo((u + v) * 0.5f); + + /* dichotomy */ + if (f < prm) { + p0 = f; + c0 = e; + c1 = c; + u = (u + v) * 0.5f; + } else { + c0 = a; + c1 = d; + p1 = f; + v = (u + v) * 0.5f; + } + } + + return glm_clamp_zo((u + v) * 0.5f); +} + +#endif /* cglm_bezier_h */ diff --git a/include/cglm/call.h b/include/cglm/call.h index b7fa6e1..7cbd501 100644 --- a/include/cglm/call.h +++ b/include/cglm/call.h @@ -27,6 +27,8 @@ extern "C" { #include "call/project.h" #include "call/sphere.h" #include "call/ease.h" +#include "call/curve.h" +#include "call/bezier.h" #ifdef __cplusplus } diff --git a/include/cglm/call/bezier.h b/include/cglm/call/bezier.h new file mode 100644 index 0000000..a6a0eb4 --- /dev/null +++ b/include/cglm/call/bezier.h @@ -0,0 +1,31 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglmc_bezier_h +#define cglmc_bezier_h +#ifdef __cplusplus +extern "C" { +#endif + +#include "../cglm.h" + +CGLM_EXPORT +float +glmc_bezier(float s, float p0, float c0, float c1, float p1); + +CGLM_EXPORT +float +glmc_hermite(float s, float p0, float t0, float t1, float p1); + +CGLM_EXPORT +float +glmc_decasteljau(float prm, float p0, float c0, float c1, float p1); + +#ifdef __cplusplus +} +#endif +#endif /* cglmc_bezier_h */ diff --git a/include/cglm/call/curve.h b/include/cglm/call/curve.h new file mode 100644 index 0000000..061fdb9 --- /dev/null +++ b/include/cglm/call/curve.h @@ -0,0 +1,23 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglmc_curve_h +#define cglmc_curve_h +#ifdef __cplusplus +extern "C" { +#endif + +#include "../cglm.h" + +CGLM_EXPORT +float +glmc_smc(float s, mat4 m, vec4 c); + +#ifdef __cplusplus +} +#endif +#endif /* cglmc_curve_h */ diff --git a/include/cglm/call/ease.h b/include/cglm/call/ease.h index 9f1757e..87e39ca 100644 --- a/include/cglm/call/ease.h +++ b/include/cglm/call/ease.h @@ -137,4 +137,7 @@ CGLM_EXPORT float glmc_ease_bounce_inout(float t); +#ifdef __cplusplus +} +#endif #endif /* cglmc_ease_h */ diff --git a/include/cglm/call/mat4.h b/include/cglm/call/mat4.h index 7e76f73..54fbcbe 100644 --- a/include/cglm/call/mat4.h +++ b/include/cglm/call/mat4.h @@ -113,6 +113,10 @@ CGLM_EXPORT void glmc_mat4_swap_row(mat4 mat, int row1, int row2); +CGLM_EXPORT +float +glmc_mat4_rmc(vec4 r, mat4 m, vec4 c); + #ifdef __cplusplus } #endif diff --git a/include/cglm/call/sphere.h b/include/cglm/call/sphere.h index 02c3d55..9b96546 100644 --- a/include/cglm/call/sphere.h +++ b/include/cglm/call/sphere.h @@ -33,4 +33,7 @@ CGLM_EXPORT bool glmc_sphere_point(vec4 s, vec3 point); +#ifdef __cplusplus +} +#endif #endif /* cglmc_sphere_h */ diff --git a/include/cglm/call/vec4.h b/include/cglm/call/vec4.h index 9a72510..936bb52 100644 --- a/include/cglm/call/vec4.h +++ b/include/cglm/call/vec4.h @@ -153,6 +153,10 @@ CGLM_EXPORT void glmc_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest); +CGLM_EXPORT +void +glmc_vec4_cubic(float s, vec4 dest); + /* ext */ CGLM_EXPORT diff --git a/include/cglm/cglm.h b/include/cglm/cglm.h index 8b37162..7c301bf 100644 --- a/include/cglm/cglm.h +++ b/include/cglm/cglm.h @@ -26,5 +26,7 @@ #include "project.h" #include "sphere.h" #include "ease.h" +#include "curve.h" +#include "bezier.h" #endif /* cglm_h */ diff --git a/include/cglm/curve.h b/include/cglm/curve.h new file mode 100644 index 0000000..5033be5 --- /dev/null +++ b/include/cglm/curve.h @@ -0,0 +1,40 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_curve_h +#define cglm_curve_h + +#include "common.h" +#include "vec4.h" +#include "mat4.h" + +/*! + * @brief helper function to calculate S*M*C multiplication for curves + * + * This function does not encourage you to use SMC, + * instead it is a helper if you use SMC. + * + * if you want to specify S as vector then use more generic glm_mat4_rmc() func. + * + * Example usage: + * B(s) = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + * + * @param[in] s parameter between 0 and 1 (this will be [s3, s2, s, 1]) + * @param[in] m basis matrix + * @param[in] c position/control vector + * + * @return B(s) + */ +CGLM_INLINE +float +glm_smc(float s, mat4 m, vec4 c) { + vec4 vs; + glm_vec4_cubic(s, vs); + return glm_mat4_rmc(vs, m, c); +} + +#endif /* cglm_curve_h */ diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h index ea3b34e..74e876a 100644 --- a/include/cglm/mat4.h +++ b/include/cglm/mat4.h @@ -118,6 +118,11 @@ glm_mat4_copy(mat4 mat, mat4 dest) { glmm_store(dest[1], glmm_load(mat[1])); glmm_store(dest[2], glmm_load(mat[2])); glmm_store(dest[3], glmm_load(mat[3])); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest[0], vld1q_f32(mat[0])); + vst1q_f32(dest[1], vld1q_f32(mat[1])); + vst1q_f32(dest[2], vld1q_f32(mat[2])); + vst1q_f32(dest[3], vld1q_f32(mat[3])); #else glm_mat4_ucopy(mat, dest); #endif @@ -252,7 +257,7 @@ glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) { glm_mat4_mul_avx(m1, m2, dest); #elif defined( __SSE__ ) || defined( __SSE2__ ) glm_mat4_mul_sse2(m1, m2, dest); -#elif defined( __ARM_NEON_FP ) +#elif defined(CGLM_NEON_FP) glm_mat4_mul_neon(m1, m2, dest); #else float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3], @@ -506,6 +511,13 @@ void glm_mat4_scale(mat4 m, float s) { #if defined( __SSE__ ) || defined( __SSE2__ ) glm_mat4_scale_sse2(m, s); +#elif defined(CGLM_NEON_FP) + float32x4_t v0; + v0 = vdupq_n_f32(s); + vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), v0)); + vst1q_f32(m[1], vmulq_f32(vld1q_f32(m[1]), v0)); + vst1q_f32(m[2], vmulq_f32(vld1q_f32(m[2]), v0)); + vst1q_f32(m[3], vmulq_f32(vld1q_f32(m[3]), v0)); #else glm_mat4_scale_p(m, s); #endif @@ -665,4 +677,26 @@ glm_mat4_swap_row(mat4 mat, int row1, int row2) { mat[3][row2] = tmp[3]; } +/*! + * @brief helper for R (row vector) * M (matrix) * C (column vector) + * + * rmc stands for Row * Matrix * Column + * + * the result is scalar because S * M = Matrix1x4 (row vector), + * then Matrix1x4 * Vec4 (column vector) = Matrix1x1 (Scalar) + * + * @param[in] r row vector or matrix1x4 + * @param[in] m matrix4x4 + * @param[in] c column vector or matrix4x1 + * + * @return scalar value e.g. B(s) + */ +CGLM_INLINE +float +glm_mat4_rmc(vec4 r, mat4 m, vec4 c) { + vec4 tmp; + glm_mat4_mulv(m, c, tmp); + return glm_vec4_dot(r, tmp); +} + #endif /* cglm_mat_h */ diff --git a/include/cglm/quat.h b/include/cglm/quat.h index 1db0161..f5f29af 100644 --- a/include/cglm/quat.h +++ b/include/cglm/quat.h @@ -218,7 +218,7 @@ glm_quat_normalize_to(versor q, versor dest) { float dot; x0 = glmm_load(q); - xdot = glmm_dot(x0, x0); + xdot = glmm_vdot(x0, x0); dot = _mm_cvtss_f32(xdot); if (dot <= 0.0f) { diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h new file mode 100644 index 0000000..5412461 --- /dev/null +++ b/include/cglm/simd/arm.h @@ -0,0 +1,41 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_simd_arm_h +#define cglm_simd_arm_h +#include "intrin.h" +#ifdef CGLM_SIMD_ARM + +#define glmm_load(p) vld1q_f32(p) +#define glmm_store(p, a) vst1q_f32(p, a) + +static inline +float +glmm_hadd(float32x4_t v) { +#if defined(__aarch64__) + return vaddvq_f32(v); +#else + v = vaddq_f32(v, vrev64q_f32(v)); + v = vaddq_f32(v, vcombine_f32(vget_high_f32(v), vget_low_f32(v))); + return vgetq_lane_f32(v, 0); +#endif +} + +static inline +float +glmm_dot(float32x4_t a, float32x4_t b) { + return glmm_hadd(vmulq_f32(a, b)); +} + +static inline +float +glmm_norm(float32x4_t a) { + return sqrtf(glmm_dot(a, a)); +} + +#endif +#endif /* cglm_simd_arm_h */ diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h index f4854bd..a44b905 100644 --- a/include/cglm/simd/intrin.h +++ b/include/cglm/simd/intrin.h @@ -27,90 +27,64 @@ #if defined( __SSE__ ) || defined( __SSE2__ ) # include # include - -/* OPTIONAL: You may save some instructions but latency (not sure) */ -#ifdef CGLM_USE_INT_DOMAIN -# define glmm_shuff1(xmm, z, y, x, w) \ - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ - _MM_SHUFFLE(z, y, x, w))) -#else -# define glmm_shuff1(xmm, z, y, x, w) \ - _mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w)) -#endif - -#define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x) -#define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ - glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \ - z1, y1, x1, w1) - -static inline -__m128 -glmm_dot(__m128 a, __m128 b) { - __m128 x0; - x0 = _mm_mul_ps(a, b); - x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); - return _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 0, 1)); -} - -static inline -__m128 -glmm_norm(__m128 a) { - return _mm_sqrt_ps(glmm_dot(a, a)); -} - -static inline -__m128 -glmm_load3(float v[3]) { - __m128i xy; - __m128 z; - - xy = _mm_loadl_epi64((const __m128i *)v); - z = _mm_load_ss(&v[2]); - - return _mm_movelh_ps(_mm_castsi128_ps(xy), z); -} - -static inline -void -glmm_store3(__m128 vx, float v[3]) { - _mm_storel_pi((__m64 *)&v[0], vx); - _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2)); -} - -#ifdef CGLM_ALL_UNALIGNED -# define glmm_load(p) _mm_loadu_ps(p) -# define glmm_store(p, a) _mm_storeu_ps(p, a) -#else -# define glmm_load(p) _mm_load_ps(p) -# define glmm_store(p, a) _mm_store_ps(p, a) -#endif - -#endif - -/* x86, x64 */ -#if defined( __SSE__ ) || defined( __SSE2__ ) # define CGLM_SSE_FP 1 +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif +#endif + +#if defined(__SSE3__) +# include +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif +#endif + +#if defined(__SSE4_1__) +# include +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif +#endif + +#if defined(__SSE4_2__) +# include +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif #endif #ifdef __AVX__ +# include # define CGLM_AVX_FP 1 - -#ifdef CGLM_ALL_UNALIGNED -# define glmm_load256(p) _mm256_loadu_ps(p) -# define glmm_store256(p, a) _mm256_storeu_ps(p, a) -#else -# define glmm_load256(p) _mm256_load_ps(p) -# define glmm_store256(p, a) _mm256_store_ps(p, a) -#endif - +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif #endif /* ARM Neon */ -#if defined(__ARM_NEON) && defined(__ARM_NEON_FP) +#if defined(__ARM_NEON) # include -# define CGLM_NEON_FP 1 -#else -# undef CGLM_NEON_FP +# if defined(__ARM_NEON_FP) +# define CGLM_NEON_FP 1 +# ifndef CGLM_SIMD_ARM +# define CGLM_SIMD_ARM +# endif +# endif +#endif + +#if defined(CGLM_SIMD_x86) || defined(CGLM_NEON_FP) +# ifndef CGLM_SIMD +# define CGLM_SIMD +# endif +#endif + +#if defined(CGLM_SIMD_x86) +# include "x86.h" +#endif + +#if defined(CGLM_SIMD_ARM) +# include "arm.h" #endif #endif /* cglm_intrin_h */ diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h new file mode 100644 index 0000000..99d2b8a --- /dev/null +++ b/include/cglm/simd/x86.h @@ -0,0 +1,136 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_simd_x86_h +#define cglm_simd_x86_h +#include "intrin.h" +#ifdef CGLM_SIMD_x86 + +#ifdef CGLM_ALL_UNALIGNED +# define glmm_load(p) _mm_loadu_ps(p) +# define glmm_store(p, a) _mm_storeu_ps(p, a) +#else +# define glmm_load(p) _mm_load_ps(p) +# define glmm_store(p, a) _mm_store_ps(p, a) +#endif + +#ifdef CGLM_USE_INT_DOMAIN +# define glmm_shuff1(xmm, z, y, x, w) \ + _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ + _MM_SHUFFLE(z, y, x, w))) +#else +# define glmm_shuff1(xmm, z, y, x, w) \ + _mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w)) +#endif + +#define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x) +#define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ + glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \ + z1, y1, x1, w1) + +#ifdef __AVX__ +# ifdef CGLM_ALL_UNALIGNED +# define glmm_load256(p) _mm256_loadu_ps(p) +# define glmm_store256(p, a) _mm256_storeu_ps(p, a) +# else +# define glmm_load256(p) _mm256_load_ps(p) +# define glmm_store256(p, a) _mm256_store_ps(p, a) +# endif +#endif + +static inline +__m128 +glmm_vhadds(__m128 v) { +#if defined(__SSE3__) + __m128 shuf, sums; + shuf = _mm_movehdup_ps(v); + sums = _mm_add_ps(v, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return sums; +#else + __m128 shuf, sums; + shuf = glmm_shuff1(v, 2, 3, 0, 1); + sums = _mm_add_ps(v, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return sums; +#endif +} + +static inline +float +glmm_hadd(__m128 v) { + return _mm_cvtss_f32(glmm_vhadds(v)); +} + +static inline +__m128 +glmm_vdots(__m128 a, __m128 b) { +#if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) + return _mm_dp_ps(a, b, 0xFF); +#elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) + __m128 x0, x1; + x0 = _mm_mul_ps(a, b); + x1 = _mm_hadd_ps(x0, x0); + return _mm_hadd_ps(x1, x1); +#else + return glmm_vhadds(_mm_mul_ps(a, b)); +#endif +} + +static inline +__m128 +glmm_vdot(__m128 a, __m128 b) { +#if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) + return _mm_dp_ps(a, b, 0xFF); +#elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) + __m128 x0, x1; + x0 = _mm_mul_ps(a, b); + x1 = _mm_hadd_ps(x0, x0); + return _mm_hadd_ps(x1, x1); +#else + __m128 x0; + x0 = _mm_mul_ps(a, b); + x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); + return _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 0, 1)); +#endif +} + +static inline +float +glmm_dot(__m128 a, __m128 b) { + return _mm_cvtss_f32(glmm_vdots(a, b)); +} + +static inline +float +glmm_norm(__m128 a) { + return _mm_cvtss_f32(_mm_sqrt_ss(glmm_vhadds(_mm_mul_ps(a, a)))); +} + +static inline +__m128 +glmm_load3(float v[3]) { + __m128i xy; + __m128 z; + + xy = _mm_loadl_epi64((const __m128i *)v); + z = _mm_load_ss(&v[2]); + + return _mm_movelh_ps(_mm_castsi128_ps(xy), z); +} + +static inline +void +glmm_store3(__m128 vx, float v[3]) { + _mm_storel_pi((__m64 *)&v[0], vx); + _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2)); +} + +#endif +#endif /* cglm_simd_x86_h */ diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 0c4f613..bbd1c31 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -122,6 +122,8 @@ void glm_vec4_copy(vec4 v, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, glmm_load(v)); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vld1q_f32(v)); #else dest[0] = v[0]; dest[1] = v[1]; @@ -157,6 +159,8 @@ void glm_vec4_zero(vec4 v) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(v, _mm_setzero_ps()); +#elif defined(CGLM_NEON_FP) + vst1q_f32(v, vdupq_n_f32(0.0f)); #else v[0] = 0.0f; v[1] = 0.0f; @@ -175,6 +179,8 @@ void glm_vec4_one(vec4 v) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(v, _mm_set1_ps(1.0f)); +#elif defined(CGLM_NEON_FP) + vst1q_f32(v, vdupq_n_f32(1.0f)); #else v[0] = 1.0f; v[1] = 1.0f; @@ -194,11 +200,8 @@ glm_vec4_one(vec4 v) { CGLM_INLINE float glm_vec4_dot(vec4 a, vec4 b) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - __m128 x0; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); - x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); - return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1))); +#if defined(CGLM_SIMD) + return glmm_dot(glmm_load(a), glmm_load(b)); #else return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; #endif @@ -218,15 +221,7 @@ glm_vec4_dot(vec4 a, vec4 b) { CGLM_INLINE float glm_vec4_norm2(vec4 v) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - __m128 x0; - x0 = glmm_load(v); - x0 = _mm_mul_ps(x0, x0); - x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); - return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1))); -#else - return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3]; -#endif + return glm_vec4_dot(v, v); } /*! @@ -239,12 +234,10 @@ glm_vec4_norm2(vec4 v) { CGLM_INLINE float glm_vec4_norm(vec4 v) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - __m128 x0; - x0 = glmm_load(v); - return _mm_cvtss_f32(_mm_sqrt_ss(glmm_dot(x0, x0))); +#if defined(CGLM_SIMD) + return glmm_norm(glmm_load(v)); #else - return sqrtf(glm_vec4_norm2(v)); + return sqrtf(glm_vec4_dot(v, v)); #endif } @@ -260,6 +253,8 @@ void glm_vec4_add(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = a[0] + b[0]; dest[1] = a[1] + b[1]; @@ -280,6 +275,8 @@ void glm_vec4_adds(vec4 v, float s, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s))); #else dest[0] = v[0] + s; dest[1] = v[1] + s; @@ -300,6 +297,8 @@ void glm_vec4_sub(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = a[0] - b[0]; dest[1] = a[1] - b[1]; @@ -320,6 +319,8 @@ void glm_vec4_subs(vec4 v, float s, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s))); #else dest[0] = v[0] - s; dest[1] = v[1] - s; @@ -340,6 +341,8 @@ void glm_vec4_mul(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = a[0] * b[0]; dest[1] = a[1] * b[1]; @@ -360,6 +363,8 @@ void glm_vec4_scale(vec4 v, float s, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s))); #else dest[0] = v[0] * s; dest[1] = v[1] * s; @@ -442,6 +447,10 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_add_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vaddq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += a[0] + b[0]; dest[1] += a[1] + b[1]; @@ -466,6 +475,10 @@ glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_sub_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vsubq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += a[0] - b[0]; dest[1] += a[1] - b[1]; @@ -490,6 +503,10 @@ glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_mul_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vmulq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += a[0] * b[0]; dest[1] += a[1] * b[1]; @@ -514,6 +531,10 @@ glm_vec4_muladds(vec4 a, float s, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_mul_ps(glmm_load(a), _mm_set1_ps(s)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vsubq_f32(vld1q_f32(a), + vdupq_n_f32(s)))); #else dest[0] += a[0] * s; dest[1] += a[1] * s; @@ -538,6 +559,10 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_max_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vmaxq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += glm_max(a[0], b[0]); dest[1] += glm_max(a[1], b[1]); @@ -562,6 +587,10 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_min_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vminq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += glm_min(a[0], b[0]); dest[1] += glm_min(a[1], b[1]); @@ -581,6 +610,8 @@ void glm_vec4_negate_to(vec4 v, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, veorq_s32(vld1q_f32(v), vdupq_n_f32(-0.0f))); #else dest[0] = -v[0]; dest[1] = -v[1]; @@ -614,7 +645,7 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) { float dot; x0 = glmm_load(v); - xdot = glmm_dot(x0, x0); + xdot = glmm_vdot(x0, x0); dot = _mm_cvtss_f32(xdot); if (dot == 0.0f) { @@ -658,10 +689,25 @@ glm_vec4_normalize(vec4 v) { CGLM_INLINE float glm_vec4_distance(vec4 a, vec4 b) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + __m128 x0; + x0 = _mm_sub_ps(glmm_load(b), glmm_load(a)); + x0 = _mm_mul_ps(x0, x0); + x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); + return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(x0, + glmm_shuff1(x0, 0, 1, 0, 1)))); +#elif defined(CGLM_NEON_FP) + float32x4_t v0; + float32_t r; + v0 = vsubq_f32(vld1q_f32(a), vld1q_f32(b)); + r = vaddvq_f32(vmulq_f32(v0, v0)); + return sqrtf(r); +#else return sqrtf(glm_pow2(b[0] - a[0]) + glm_pow2(b[1] - a[1]) + glm_pow2(b[2] - a[2]) + glm_pow2(b[3] - a[3])); +#endif } /*! @@ -676,6 +722,8 @@ void glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = glm_max(a[0], b[0]); dest[1] = glm_max(a[1], b[1]); @@ -696,6 +744,8 @@ void glm_vec4_minv(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = glm_min(a[0], b[0]); dest[1] = glm_min(a[1], b[1]); @@ -717,6 +767,9 @@ glm_vec4_clamp(vec4 v, float minVal, float maxVal) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)), _mm_set1_ps(maxVal))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)), + vdupq_n_f32(maxVal))); #else v[0] = glm_clamp(v[0], minVal, maxVal); v[1] = glm_clamp(v[1], minVal, maxVal); @@ -747,4 +800,23 @@ glm_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest) { glm_vec4_add(from, v, dest); } +/*! + * @brief helper to fill vec4 as [S^3, S^2, S, 1] + * + * @param[in] s parameter + * @param[out] dest destination + */ +CGLM_INLINE +void +glm_vec4_cubic(float s, vec4 dest) { + float ss; + + ss = s * s; + + dest[0] = ss * s; + dest[1] = ss; + dest[2] = s; + dest[3] = 1.0f; +} + #endif /* cglm_vec4_h */ diff --git a/makefile.am b/makefile.am index 2e9336c..d6cdc70 100644 --- a/makefile.am +++ b/makefile.am @@ -34,30 +34,32 @@ test_tests_CFLAGS = $(checkCFLAGS) cglmdir=$(includedir)/cglm cglm_HEADERS = include/cglm/version.h \ - include/cglm/cglm.h \ - include/cglm/call.h \ - include/cglm/cam.h \ - include/cglm/io.h \ - include/cglm/mat4.h \ - include/cglm/mat3.h \ - include/cglm/types.h \ - include/cglm/common.h \ - include/cglm/affine.h \ - include/cglm/vec3.h \ - include/cglm/vec3-ext.h \ - include/cglm/vec4.h \ - include/cglm/vec4-ext.h \ - include/cglm/euler.h \ - include/cglm/util.h \ - include/cglm/quat.h \ - include/cglm/affine-mat.h \ - include/cglm/plane.h \ - include/cglm/frustum.h \ - include/cglm/box.h \ - include/cglm/color.h \ - include/cglm/project.h \ - include/cglm/sphere.h \ - include/cglm/ease.h + include/cglm/cglm.h \ + include/cglm/call.h \ + include/cglm/cam.h \ + include/cglm/io.h \ + include/cglm/mat4.h \ + include/cglm/mat3.h \ + include/cglm/types.h \ + include/cglm/common.h \ + include/cglm/affine.h \ + include/cglm/vec3.h \ + include/cglm/vec3-ext.h \ + include/cglm/vec4.h \ + include/cglm/vec4-ext.h \ + include/cglm/euler.h \ + include/cglm/util.h \ + include/cglm/quat.h \ + include/cglm/affine-mat.h \ + include/cglm/plane.h \ + include/cglm/frustum.h \ + include/cglm/box.h \ + include/cglm/color.h \ + include/cglm/project.h \ + include/cglm/sphere.h \ + include/cglm/ease.h \ + include/cglm/curve.h \ + include/cglm/bezier.h cglm_calldir=$(includedir)/cglm/call cglm_call_HEADERS = include/cglm/call/mat4.h \ @@ -74,10 +76,14 @@ cglm_call_HEADERS = include/cglm/call/mat4.h \ include/cglm/call/box.h \ include/cglm/call/project.h \ include/cglm/call/sphere.h \ - include/cglm/call/ease.h + include/cglm/call/ease.h \ + include/cglm/call/curve.h \ + include/cglm/call/bezier.h cglm_simddir=$(includedir)/cglm/simd -cglm_simd_HEADERS = include/cglm/simd/intrin.h +cglm_simd_HEADERS = include/cglm/simd/intrin.h \ + include/cglm/simd/x86.h \ + include/cglm/simd/arm.h cglm_simd_sse2dir=$(includedir)/cglm/simd/sse2 cglm_simd_sse2_HEADERS = include/cglm/simd/sse2/affine.h \ @@ -107,7 +113,9 @@ libcglm_la_SOURCES=\ src/box.c \ src/project.c \ src/sphere.c \ - src/ease.c + src/ease.c \ + src/curve.c \ + src/bezier.c test_tests_SOURCES=\ test/src/test_common.c \ @@ -121,7 +129,8 @@ test_tests_SOURCES=\ test/src/test_vec4.c \ test/src/test_vec3.c \ test/src/test_mat3.c \ - test/src/test_affine.c + test/src/test_affine.c \ + test/src/test_bezier.c all-local: sh ./post-build.sh diff --git a/src/bezier.c b/src/bezier.c new file mode 100644 index 0000000..21e6495 --- /dev/null +++ b/src/bezier.c @@ -0,0 +1,27 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "../include/cglm/cglm.h" +#include "../include/cglm/call.h" + +CGLM_EXPORT +float +glmc_bezier(float s, float p0, float c0, float c1, float p1) { + return glm_bezier(s, p0, c0, c1, p1); +} + +CGLM_EXPORT +float +glmc_hermite(float s, float p0, float t0, float t1, float p1) { + return glm_hermite(s, p0, t0, t1, p1); +} + +CGLM_EXPORT +float +glmc_decasteljau(float prm, float p0, float c0, float c1, float p1) { + return glm_decasteljau(prm, p0, c0, c1, p1); +} diff --git a/src/curve.c b/src/curve.c new file mode 100644 index 0000000..74d4702 --- /dev/null +++ b/src/curve.c @@ -0,0 +1,15 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "../include/cglm/cglm.h" +#include "../include/cglm/call.h" + +CGLM_EXPORT +float +glmc_smc(float s, mat4 m, vec4 c) { + return glm_smc(s, m, c); +} diff --git a/src/mat4.c b/src/mat4.c index b62420e..c648a6e 100644 --- a/src/mat4.c +++ b/src/mat4.c @@ -151,3 +151,9 @@ void glmc_mat4_swap_row(mat4 mat, int row1, int row2) { glm_mat4_swap_row(mat, row1, row2); } + +CGLM_EXPORT +float +glmc_mat4_rmc(vec4 r, mat4 m, vec4 c) { + return glm_mat4_rmc(r, m, c); +} diff --git a/src/vec4.c b/src/vec4.c index 1a49710..0bb6a6e 100644 --- a/src/vec4.c +++ b/src/vec4.c @@ -206,6 +206,12 @@ glmc_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest) { glm_vec4_lerp(from, to, t, dest); } +CGLM_EXPORT +void +glmc_vec4_cubic(float s, vec4 dest) { + glm_vec4_cubic(s, dest); +} + /* ext */ CGLM_EXPORT diff --git a/test/src/test_bezier.c b/test/src/test_bezier.c new file mode 100644 index 0000000..bd7c796 --- /dev/null +++ b/test/src/test_bezier.c @@ -0,0 +1,65 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "test_common.h" + +CGLM_INLINE +float +test_bezier_plain(float s, float p0, float c0, float c1, float p1) { + float x, xx, xxx, ss, sss; + + x = 1.0f - s; + xx = x * x; + xxx = xx * x; + ss = s * s; + sss = ss * s; + + return p0 * xxx + 3.0f * (c0 * s * xx + c1 * ss * x) + p1 * sss; +} + +CGLM_INLINE +float +test_hermite_plain(float s, float p0, float t0, float t1, float p1) { + float ss, sss; + + ss = s * s; + sss = ss * s; + + return p0 * (2.0f * sss - 3.0f * ss + 1.0f) + + t0 * (sss - 2.0f * ss + s) + + p1 * (-2.0f * sss + 3.0f * ss) + + t1 * (sss - ss); +} + +void +test_bezier(void **state) { + float s, p0, p1, c0, c1, smc, Bs, Bs_plain; + + s = test_rand(); + p0 = test_rand(); + p1 = test_rand(); + c0 = test_rand(); + c1 = test_rand(); + + /* test cubic bezier */ + smc = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}); + Bs = glm_bezier(s, p0, c0, c1, p1); + Bs_plain = test_bezier_plain(s, p0, c0, c1, p1); + + assert_true(glm_eq(Bs, Bs_plain)); + assert_true(glm_eq(smc, Bs_plain)); + assert_true(glm_eq(Bs, smc)); + + /* test cubic hermite */ + smc = glm_smc(s, GLM_HERMITE_MAT, (vec4){p0, p1, c0, c1}); + Bs = glm_hermite(s, p0, c0, c1, p1); + Bs_plain = test_hermite_plain(s, p0, c0, c1, p1); + + assert_true(glm_eq(Bs, Bs_plain)); + assert_true(glm_eq(smc, Bs_plain)); + assert_true(glm_eq(Bs, smc)); +} diff --git a/test/src/test_common.c b/test/src/test_common.c index 405000d..d41d3cb 100644 --- a/test/src/test_common.c +++ b/test/src/test_common.c @@ -58,7 +58,7 @@ test_rand_vec4(vec4 dest) { } float -test_rand_angle(void) { +test_rand(void) { srand((unsigned int)time(NULL)); return drand48(); } diff --git a/test/src/test_common.h b/test/src/test_common.h index 7881e7a..8a16b0f 100644 --- a/test/src/test_common.h +++ b/test/src/test_common.h @@ -59,7 +59,7 @@ void test_rand_vec4(vec4 dest) ; float -test_rand_angle(void); +test_rand(void); void test_rand_quat(versor q); diff --git a/test/src/test_main.c b/test/src/test_main.c index ff77b02..8ce1673 100644 --- a/test/src/test_main.c +++ b/test/src/test_main.c @@ -38,7 +38,10 @@ main(int argc, const char * argv[]) { cmocka_unit_test(test_vec3), /* affine */ - cmocka_unit_test(test_affine) + cmocka_unit_test(test_affine), + + /* bezier */ + cmocka_unit_test(test_bezier) }; return cmocka_run_group_tests(tests, NULL, NULL); diff --git a/test/src/test_tests.h b/test/src/test_tests.h index 7b9cf0a..618cc9f 100644 --- a/test/src/test_tests.h +++ b/test/src/test_tests.h @@ -40,4 +40,7 @@ test_vec3(void **state); void test_affine(void **state); +void +test_bezier(void **state); + #endif /* test_tests_h */ diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj index 5678688..90e58a6 100644 --- a/win/cglm.vcxproj +++ b/win/cglm.vcxproj @@ -20,8 +20,10 @@ + + @@ -39,11 +41,14 @@ + + + @@ -60,6 +65,7 @@ + @@ -69,6 +75,7 @@ + @@ -77,6 +84,7 @@ + diff --git a/win/cglm.vcxproj.filters b/win/cglm.vcxproj.filters index 5e65853..dd66ee1 100644 --- a/win/cglm.vcxproj.filters +++ b/win/cglm.vcxproj.filters @@ -84,6 +84,12 @@ src + + src + + + src + @@ -233,5 +239,23 @@ include\cglm + + include\cglm\simd + + + include\cglm\simd + + + include\cglm\call + + + include\cglm + + + include\cglm + + + include\cglm\call + \ No newline at end of file