From b117f3bf80b8333245b56b5ec152aff90dfdabd0 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Mon, 21 Jan 2019 23:14:04 +0300 Subject: [PATCH 01/19] neon: add neon support for most vec4 operations --- include/cglm/simd/intrin.h | 18 ++++++++-- include/cglm/vec4.h | 71 ++++++++++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h index f4854bd..fb577ea 100644 --- a/include/cglm/simd/intrin.h +++ b/include/cglm/simd/intrin.h @@ -105,10 +105,24 @@ glmm_store3(__m128 vx, float v[3]) { #endif +#if defined(__SSE3__) +# include +#endif + +#if defined(__SSE4_1__) +# include +#endif + +#if defined(__SSE4_2__) +# include +#endif + /* ARM Neon */ -#if defined(__ARM_NEON) && defined(__ARM_NEON_FP) +#if defined(__ARM_NEON) # include -# define CGLM_NEON_FP 1 +# if defined(__ARM_NEON_FP) +# define CGLM_NEON_FP 1 +# endif #else # undef CGLM_NEON_FP #endif diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 0c4f613..ad2fb45 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -122,6 +122,8 @@ void glm_vec4_copy(vec4 v, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, glmm_load(v)); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vld1q_f32(v)); #else dest[0] = v[0]; dest[1] = v[1]; @@ -157,6 +159,8 @@ void glm_vec4_zero(vec4 v) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(v, _mm_setzero_ps()); +#elif defined(CGLM_NEON_FP) + vst1q_f32(v, vdupq_n_f32(0.0f)); #else v[0] = 0.0f; v[1] = 0.0f; @@ -175,6 +179,8 @@ void glm_vec4_one(vec4 v) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(v, _mm_set1_ps(1.0f)); +#elif defined(CGLM_NEON_FP) + vst1q_f32(v, vdupq_n_f32(1.0f)); #else v[0] = 1.0f; v[1] = 1.0f; @@ -194,11 +200,24 @@ glm_vec4_one(vec4 v) { CGLM_INLINE float glm_vec4_dot(vec4 a, vec4 b) { -#if defined( __SSE__ ) || defined( __SSE2__ ) +#if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) + return _mm_cvtss_f32(_mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF)); +#elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) + __m128 x0, x1; + x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x1 = _mm_hadd_ps(x0, x0); + return _mm_cvtss_f32(_mm_hadd_ps(x1, x1)); +#elif defined(__SSE__) || defined(__SSE2__) __m128 x0; x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1))); +#elif defined(CGLM_NEON_FP) + float32x4_t v0, v1, v2; + v0 = vmulq_f32(vld1q_f32(a), vld1q_f32(b)); + v1 = vaddq_f32(v0, vrev64q_f32(v0)); + v2 = vaddq_f32(v1, vcombine_f32(vget_high_f32(v1), vget_low_f32(v1))); + return vgetq_lane_f32(v2, 0); #else return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; #endif @@ -218,15 +237,7 @@ glm_vec4_dot(vec4 a, vec4 b) { CGLM_INLINE float glm_vec4_norm2(vec4 v) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - __m128 x0; - x0 = glmm_load(v); - x0 = _mm_mul_ps(x0, x0); - x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); - return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1))); -#else - return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3]; -#endif + return glm_vec4_dot(v, v); } /*! @@ -244,7 +255,7 @@ glm_vec4_norm(vec4 v) { x0 = glmm_load(v); return _mm_cvtss_f32(_mm_sqrt_ss(glmm_dot(x0, x0))); #else - return sqrtf(glm_vec4_norm2(v)); + return sqrtf(glm_vec4_dot(v, v)); #endif } @@ -260,6 +271,8 @@ void glm_vec4_add(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = a[0] + b[0]; dest[1] = a[1] + b[1]; @@ -280,6 +293,8 @@ void glm_vec4_adds(vec4 v, float s, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s))); #else dest[0] = v[0] + s; dest[1] = v[1] + s; @@ -300,6 +315,8 @@ void glm_vec4_sub(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = a[0] - b[0]; dest[1] = a[1] - b[1]; @@ -320,6 +337,8 @@ void glm_vec4_subs(vec4 v, float s, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s))); #else dest[0] = v[0] - s; dest[1] = v[1] - s; @@ -340,6 +359,8 @@ void glm_vec4_mul(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = a[0] * b[0]; dest[1] = a[1] * b[1]; @@ -360,6 +381,8 @@ void glm_vec4_scale(vec4 v, float s, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s))); #else dest[0] = v[0] * s; dest[1] = v[1] * s; @@ -442,6 +465,10 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_add_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vaddq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += a[0] + b[0]; dest[1] += a[1] + b[1]; @@ -466,6 +493,10 @@ glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_sub_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vsubq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += a[0] - b[0]; dest[1] += a[1] - b[1]; @@ -490,6 +521,10 @@ glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_mul_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vmulq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += a[0] * b[0]; dest[1] += a[1] * b[1]; @@ -514,6 +549,10 @@ glm_vec4_muladds(vec4 a, float s, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_mul_ps(glmm_load(a), _mm_set1_ps(s)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vsubq_f32(vld1q_f32(a), + vdupq_n_f32(s)))); #else dest[0] += a[0] * s; dest[1] += a[1] * s; @@ -538,6 +577,10 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_max_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vmaxq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += glm_max(a[0], b[0]); dest[1] += glm_max(a[1], b[1]); @@ -562,6 +605,10 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) { glmm_store(dest, _mm_add_ps(glmm_load(dest), _mm_min_ps(glmm_load(a), glmm_load(b)))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), + vminq_f32(vld1q_f32(a), + vld1q_f32(b)))); #else dest[0] += glm_min(a[0], b[0]); dest[1] += glm_min(a[1], b[1]); @@ -581,6 +628,8 @@ void glm_vec4_negate_to(vec4 v, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, veorq_s32(vld1q_f32(v), vdupq_n_f32(-0.0f))); #else dest[0] = -v[0]; dest[1] = -v[1]; From f0c2a2984e7a8243bb18c6d7f1070c462a1708a0 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 22 Jan 2019 09:05:38 +0300 Subject: [PATCH 02/19] simd, neon: add missing neon support for vec4 --- include/cglm/vec4.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index ad2fb45..5d0e466 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -725,6 +725,8 @@ void glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = glm_max(a[0], b[0]); dest[1] = glm_max(a[1], b[1]); @@ -745,6 +747,8 @@ void glm_vec4_minv(vec4 a, vec4 b, vec4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b))); #else dest[0] = glm_min(a[0], b[0]); dest[1] = glm_min(a[1], b[1]); @@ -766,6 +770,9 @@ glm_vec4_clamp(vec4 v, float minVal, float maxVal) { #if defined( __SSE__ ) || defined( __SSE2__ ) glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)), _mm_set1_ps(maxVal))); +#elif defined(CGLM_NEON_FP) + vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)), + vdupq_n_f32(maxVal))); #else v[0] = glm_clamp(v[0], minVal, maxVal); v[1] = glm_clamp(v[1], minVal, maxVal); From f65f1d491bec03b6de58fb12918e7083f7f5a1ce Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 22 Jan 2019 09:23:51 +0300 Subject: [PATCH 03/19] simd: optimize vec4_distance with sse and neon --- include/cglm/vec4.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 5d0e466..03dc405 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -707,10 +707,25 @@ glm_vec4_normalize(vec4 v) { CGLM_INLINE float glm_vec4_distance(vec4 a, vec4 b) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + __m128 x0; + x0 = _mm_sub_ps(glmm_load(b), glmm_load(a)); + x0 = _mm_mul_ps(x0, x0); + x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); + return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(x0, + glmm_shuff1(x0, 0, 1, 0, 1)))); +#elif defined(CGLM_NEON_FP) + float32x4_t v0; + float32_t r; + v0 = vsubq_f32(vld1q_f32(a), vld1q_f32(b)); + r = vaddvq_f32(vmulq_f32(v0, v0)); + return sqrtf(r); +#else return sqrtf(glm_pow2(b[0] - a[0]) + glm_pow2(b[1] - a[1]) + glm_pow2(b[2] - a[2]) + glm_pow2(b[3] - a[3])); +#endif } /*! From be6aa9a89a1a5132fc0defa7ae44c83c636c5937 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 22 Jan 2019 09:39:57 +0300 Subject: [PATCH 04/19] simd: optimize some mat4 operations with neon --- include/cglm/mat4.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h index ea3b34e..56c6de3 100644 --- a/include/cglm/mat4.h +++ b/include/cglm/mat4.h @@ -118,6 +118,11 @@ glm_mat4_copy(mat4 mat, mat4 dest) { glmm_store(dest[1], glmm_load(mat[1])); glmm_store(dest[2], glmm_load(mat[2])); glmm_store(dest[3], glmm_load(mat[3])); +#elif defined(CGLM_NEON_FP) + vst1q_f32(dest[0], vld1q_f32(mat[0])); + vst1q_f32(dest[1], vld1q_f32(mat[1])); + vst1q_f32(dest[2], vld1q_f32(mat[2])); + vst1q_f32(dest[3], vld1q_f32(mat[3])); #else glm_mat4_ucopy(mat, dest); #endif @@ -252,7 +257,7 @@ glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) { glm_mat4_mul_avx(m1, m2, dest); #elif defined( __SSE__ ) || defined( __SSE2__ ) glm_mat4_mul_sse2(m1, m2, dest); -#elif defined( __ARM_NEON_FP ) +#elif defined(CGLM_NEON_FP) glm_mat4_mul_neon(m1, m2, dest); #else float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3], @@ -506,6 +511,13 @@ void glm_mat4_scale(mat4 m, float s) { #if defined( __SSE__ ) || defined( __SSE2__ ) glm_mat4_scale_sse2(m, s); +#elif defined(CGLM_NEON_FP) + float32x4_t v0; + v0 = vdupq_n_f32(s); + vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), v0)); + vst1q_f32(m[1], vmulq_f32(vld1q_f32(m[1]), v0)); + vst1q_f32(m[2], vmulq_f32(vld1q_f32(m[2]), v0)); + vst1q_f32(m[3], vmulq_f32(vld1q_f32(m[3]), v0)); #else glm_mat4_scale_p(m, s); #endif From 31bb303c55fde6d93aa52c9083e64fd1b2b7be54 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Thu, 24 Jan 2019 10:17:49 +0300 Subject: [PATCH 05/19] simd: organise SIMD-functions * optimize dot product --- .gitignore | 1 + CREDITS | 3 + include/cglm/quat.h | 2 +- include/cglm/simd/arm.h | 41 +++++++++++ include/cglm/simd/intrin.h | 114 ++++++++++--------------------- include/cglm/simd/x86.h | 136 +++++++++++++++++++++++++++++++++++++ include/cglm/vec4.h | 28 ++------ makefile.am | 52 +++++++------- win/cglm.vcxproj | 2 + win/cglm.vcxproj.filters | 6 ++ 10 files changed, 259 insertions(+), 126 deletions(-) create mode 100644 include/cglm/simd/arm.h create mode 100644 include/cglm/simd/x86.h diff --git a/.gitignore b/.gitignore index d500b97..195a82c 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,4 @@ win/cglm_test_* win/x64 win/x85 win/Debug +cglm-test-ios* diff --git a/CREDITS b/CREDITS index 0488bad..263dd2d 100644 --- a/CREDITS +++ b/CREDITS @@ -52,3 +52,6 @@ https://gamedev.stackexchange.com/questions/28395/rotating-vector3-by-a-quaterni 9. Sphere AABB intersect https://github.com/erich666/GraphicsGems/blob/master/gems/BoxSphere.c + +10. Horizontal add +https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86 diff --git a/include/cglm/quat.h b/include/cglm/quat.h index 1db0161..f5f29af 100644 --- a/include/cglm/quat.h +++ b/include/cglm/quat.h @@ -218,7 +218,7 @@ glm_quat_normalize_to(versor q, versor dest) { float dot; x0 = glmm_load(q); - xdot = glmm_dot(x0, x0); + xdot = glmm_vdot(x0, x0); dot = _mm_cvtss_f32(xdot); if (dot <= 0.0f) { diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h new file mode 100644 index 0000000..5412461 --- /dev/null +++ b/include/cglm/simd/arm.h @@ -0,0 +1,41 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_simd_arm_h +#define cglm_simd_arm_h +#include "intrin.h" +#ifdef CGLM_SIMD_ARM + +#define glmm_load(p) vld1q_f32(p) +#define glmm_store(p, a) vst1q_f32(p, a) + +static inline +float +glmm_hadd(float32x4_t v) { +#if defined(__aarch64__) + return vaddvq_f32(v); +#else + v = vaddq_f32(v, vrev64q_f32(v)); + v = vaddq_f32(v, vcombine_f32(vget_high_f32(v), vget_low_f32(v))); + return vgetq_lane_f32(v, 0); +#endif +} + +static inline +float +glmm_dot(float32x4_t a, float32x4_t b) { + return glmm_hadd(vmulq_f32(a, b)); +} + +static inline +float +glmm_norm(float32x4_t a) { + return sqrtf(glmm_dot(a, a)); +} + +#endif +#endif /* cglm_simd_arm_h */ diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h index fb577ea..a44b905 100644 --- a/include/cglm/simd/intrin.h +++ b/include/cglm/simd/intrin.h @@ -27,94 +27,39 @@ #if defined( __SSE__ ) || defined( __SSE2__ ) # include # include - -/* OPTIONAL: You may save some instructions but latency (not sure) */ -#ifdef CGLM_USE_INT_DOMAIN -# define glmm_shuff1(xmm, z, y, x, w) \ - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ - _MM_SHUFFLE(z, y, x, w))) -#else -# define glmm_shuff1(xmm, z, y, x, w) \ - _mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w)) -#endif - -#define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x) -#define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ - glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \ - z1, y1, x1, w1) - -static inline -__m128 -glmm_dot(__m128 a, __m128 b) { - __m128 x0; - x0 = _mm_mul_ps(a, b); - x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); - return _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 0, 1)); -} - -static inline -__m128 -glmm_norm(__m128 a) { - return _mm_sqrt_ps(glmm_dot(a, a)); -} - -static inline -__m128 -glmm_load3(float v[3]) { - __m128i xy; - __m128 z; - - xy = _mm_loadl_epi64((const __m128i *)v); - z = _mm_load_ss(&v[2]); - - return _mm_movelh_ps(_mm_castsi128_ps(xy), z); -} - -static inline -void -glmm_store3(__m128 vx, float v[3]) { - _mm_storel_pi((__m64 *)&v[0], vx); - _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2)); -} - -#ifdef CGLM_ALL_UNALIGNED -# define glmm_load(p) _mm_loadu_ps(p) -# define glmm_store(p, a) _mm_storeu_ps(p, a) -#else -# define glmm_load(p) _mm_load_ps(p) -# define glmm_store(p, a) _mm_store_ps(p, a) -#endif - -#endif - -/* x86, x64 */ -#if defined( __SSE__ ) || defined( __SSE2__ ) # define CGLM_SSE_FP 1 -#endif - -#ifdef __AVX__ -# define CGLM_AVX_FP 1 - -#ifdef CGLM_ALL_UNALIGNED -# define glmm_load256(p) _mm256_loadu_ps(p) -# define glmm_store256(p, a) _mm256_storeu_ps(p, a) -#else -# define glmm_load256(p) _mm256_load_ps(p) -# define glmm_store256(p, a) _mm256_store_ps(p, a) -#endif - +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif #endif #if defined(__SSE3__) # include +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif #endif #if defined(__SSE4_1__) # include +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif #endif #if defined(__SSE4_2__) # include +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif +#endif + +#ifdef __AVX__ +# include +# define CGLM_AVX_FP 1 +# ifndef CGLM_SIMD_x86 +# define CGLM_SIMD_x86 +# endif #endif /* ARM Neon */ @@ -122,9 +67,24 @@ glmm_store3(__m128 vx, float v[3]) { # include # if defined(__ARM_NEON_FP) # define CGLM_NEON_FP 1 +# ifndef CGLM_SIMD_ARM +# define CGLM_SIMD_ARM +# endif # endif -#else -# undef CGLM_NEON_FP +#endif + +#if defined(CGLM_SIMD_x86) || defined(CGLM_NEON_FP) +# ifndef CGLM_SIMD +# define CGLM_SIMD +# endif +#endif + +#if defined(CGLM_SIMD_x86) +# include "x86.h" +#endif + +#if defined(CGLM_SIMD_ARM) +# include "arm.h" #endif #endif /* cglm_intrin_h */ diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h new file mode 100644 index 0000000..520a834 --- /dev/null +++ b/include/cglm/simd/x86.h @@ -0,0 +1,136 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_simd_x86_h +#define cglm_simd_x86_h +#include "intrin.h" +#ifdef CGLM_SIMD_x86 + +#ifdef CGLM_ALL_UNALIGNED +# define glmm_load(p) _mm_loadu_ps(p) +# define glmm_store(p, a) _mm_storeu_ps(p, a) +#else +# define glmm_load(p) _mm_load_ps(p) +# define glmm_store(p, a) _mm_store_ps(p, a) +#endif + +#ifdef CGLM_USE_INT_DOMAIN +# define glmm_shuff1(xmm, z, y, x, w) \ + _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ + _MM_SHUFFLE(z, y, x, w))) +#else +# define glmm_shuff1(xmm, z, y, x, w) \ + _mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w)) +#endif + +#define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x) +#define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ + glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \ + z1, y1, x1, w1) + +#ifdef __AVX__ +# ifdef CGLM_ALL_UNALIGNED +# define glmm_load256(p) _mm256_loadu_ps(p) +# define glmm_store256(p, a) _mm256_storeu_ps(p, a) +# else +# define glmm_load256(p) _mm256_load_ps(p) +# define glmm_store256(p, a) _mm256_store_ps(p, a) +# endif +#endif + +static inline +__m128 +glmm_vhadds(__m128 v) { +#if defined(__SSE3__) + __m128 shuf, sums; + shuf = _mm_movehdup_ps(v); + sums = _mm_add_ps(v, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return sums; +#else + __m128 shuf, sums; + shuf = glmm_shuff1(v, 2, 3, 0, 1); + sums = _mm_add_ps(v, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return sums; +#endif +} + +static inline +float +glmm_hadd(__m128 v) { + return _mm_cvtss_f32(glmm_vhadds(v)); +} + +static inline +__m128 +glmm_vdots(__m128 a, __m128 b) { +#if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) + return _mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF); +#elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) + __m128 x0, x1; + x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x1 = _mm_hadd_ps(x0, x0); + return _mm_hadd_ps(x1, x1); +#else + return glmm_vhadds(_mm_mul_ps(a, b)); +#endif +} + +static inline +__m128 +glmm_vdot(__m128 a, __m128 b) { +#if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) + return _mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF); +#elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) + __m128 x0, x1; + x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x1 = _mm_hadd_ps(x0, x0); + return _mm_hadd_ps(x1, x1); +#else + __m128 x0; + x0 = _mm_mul_ps(a, b); + x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); + return _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 0, 1)); +#endif +} + +static inline +float +glmm_dot(__m128 a, __m128 b) { + return _mm_cvtss_f32(glmm_vdots(a, b)); +} + +static inline +float +glmm_norm(__m128 a) { + return _mm_cvtss_f32(_mm_sqrt_ss(glmm_vhadds(_mm_mul_ps(a, a)))); +} + +static inline +__m128 +glmm_load3(float v[3]) { + __m128i xy; + __m128 z; + + xy = _mm_loadl_epi64((const __m128i *)v); + z = _mm_load_ss(&v[2]); + + return _mm_movelh_ps(_mm_castsi128_ps(xy), z); +} + +static inline +void +glmm_store3(__m128 vx, float v[3]) { + _mm_storel_pi((__m64 *)&v[0], vx); + _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2)); +} + +#endif +#endif /* cglm_simd_x86_h */ diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 03dc405..9da0d96 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -200,24 +200,8 @@ glm_vec4_one(vec4 v) { CGLM_INLINE float glm_vec4_dot(vec4 a, vec4 b) { -#if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) - return _mm_cvtss_f32(_mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF)); -#elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) - __m128 x0, x1; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); - x1 = _mm_hadd_ps(x0, x0); - return _mm_cvtss_f32(_mm_hadd_ps(x1, x1)); -#elif defined(__SSE__) || defined(__SSE2__) - __m128 x0; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); - x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); - return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1))); -#elif defined(CGLM_NEON_FP) - float32x4_t v0, v1, v2; - v0 = vmulq_f32(vld1q_f32(a), vld1q_f32(b)); - v1 = vaddq_f32(v0, vrev64q_f32(v0)); - v2 = vaddq_f32(v1, vcombine_f32(vget_high_f32(v1), vget_low_f32(v1))); - return vgetq_lane_f32(v2, 0); +#if defined(CGLM_SIMD) + return glmm_dot(glmm_load(a), glmm_load(b)); #else return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; #endif @@ -250,10 +234,8 @@ glm_vec4_norm2(vec4 v) { CGLM_INLINE float glm_vec4_norm(vec4 v) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - __m128 x0; - x0 = glmm_load(v); - return _mm_cvtss_f32(_mm_sqrt_ss(glmm_dot(x0, x0))); +#if defined(CGLM_SIMD) + return glmm_norm(glmm_load(v)); #else return sqrtf(glm_vec4_dot(v, v)); #endif @@ -663,7 +645,7 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) { float dot; x0 = glmm_load(v); - xdot = glmm_dot(x0, x0); + xdot = glmm_vdot(x0, x0); dot = _mm_cvtss_f32(xdot); if (dot == 0.0f) { diff --git a/makefile.am b/makefile.am index 2e9336c..63fa285 100644 --- a/makefile.am +++ b/makefile.am @@ -34,30 +34,30 @@ test_tests_CFLAGS = $(checkCFLAGS) cglmdir=$(includedir)/cglm cglm_HEADERS = include/cglm/version.h \ - include/cglm/cglm.h \ - include/cglm/call.h \ - include/cglm/cam.h \ - include/cglm/io.h \ - include/cglm/mat4.h \ - include/cglm/mat3.h \ - include/cglm/types.h \ - include/cglm/common.h \ - include/cglm/affine.h \ - include/cglm/vec3.h \ - include/cglm/vec3-ext.h \ - include/cglm/vec4.h \ - include/cglm/vec4-ext.h \ - include/cglm/euler.h \ - include/cglm/util.h \ - include/cglm/quat.h \ - include/cglm/affine-mat.h \ - include/cglm/plane.h \ - include/cglm/frustum.h \ - include/cglm/box.h \ - include/cglm/color.h \ - include/cglm/project.h \ - include/cglm/sphere.h \ - include/cglm/ease.h + include/cglm/cglm.h \ + include/cglm/call.h \ + include/cglm/cam.h \ + include/cglm/io.h \ + include/cglm/mat4.h \ + include/cglm/mat3.h \ + include/cglm/types.h \ + include/cglm/common.h \ + include/cglm/affine.h \ + include/cglm/vec3.h \ + include/cglm/vec3-ext.h \ + include/cglm/vec4.h \ + include/cglm/vec4-ext.h \ + include/cglm/euler.h \ + include/cglm/util.h \ + include/cglm/quat.h \ + include/cglm/affine-mat.h \ + include/cglm/plane.h \ + include/cglm/frustum.h \ + include/cglm/box.h \ + include/cglm/color.h \ + include/cglm/project.h \ + include/cglm/sphere.h \ + include/cglm/ease.h cglm_calldir=$(includedir)/cglm/call cglm_call_HEADERS = include/cglm/call/mat4.h \ @@ -77,7 +77,9 @@ cglm_call_HEADERS = include/cglm/call/mat4.h \ include/cglm/call/ease.h cglm_simddir=$(includedir)/cglm/simd -cglm_simd_HEADERS = include/cglm/simd/intrin.h +cglm_simd_HEADERS = include/cglm/simd/intrin.h \ + include/cglm/simd/x86.h \ + include/cglm/simd/arm.h cglm_simd_sse2dir=$(includedir)/cglm/simd/sse2 cglm_simd_sse2_HEADERS = include/cglm/simd/sse2/affine.h \ diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj index 5678688..97d9d08 100644 --- a/win/cglm.vcxproj +++ b/win/cglm.vcxproj @@ -69,6 +69,7 @@ + @@ -77,6 +78,7 @@ + diff --git a/win/cglm.vcxproj.filters b/win/cglm.vcxproj.filters index 5e65853..a668242 100644 --- a/win/cglm.vcxproj.filters +++ b/win/cglm.vcxproj.filters @@ -233,5 +233,11 @@ include\cglm + + include\cglm\simd + + + include\cglm\simd + \ No newline at end of file From fc7f958167944c2a568a0748b0ab3c159ec9629d Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 25 Jan 2019 21:56:17 +0300 Subject: [PATCH 06/19] simd: remove re-load in SSE4 and SSE3 --- include/cglm/simd/x86.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 520a834..99d2b8a 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -72,10 +72,10 @@ static inline __m128 glmm_vdots(__m128 a, __m128 b) { #if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) - return _mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF); + return _mm_dp_ps(a, b, 0xFF); #elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) __m128 x0, x1; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x0 = _mm_mul_ps(a, b); x1 = _mm_hadd_ps(x0, x0); return _mm_hadd_ps(x1, x1); #else @@ -87,10 +87,10 @@ static inline __m128 glmm_vdot(__m128 a, __m128 b) { #if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) - return _mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF); + return _mm_dp_ps(a, b, 0xFF); #elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) __m128 x0, x1; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x0 = _mm_mul_ps(a, b); x1 = _mm_hadd_ps(x0, x0); return _mm_hadd_ps(x1, x1); #else From 59b9e54879c30f380c15d9799b6976bb72fadfb6 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 26 Jan 2019 15:54:10 +0300 Subject: [PATCH 07/19] vec4: helper to fill vec4 as [S^3, S^2, S, 1] --- include/cglm/call/vec4.h | 4 ++++ include/cglm/vec4.h | 19 +++++++++++++++++++ src/vec4.c | 6 ++++++ 3 files changed, 29 insertions(+) diff --git a/include/cglm/call/vec4.h b/include/cglm/call/vec4.h index 9a72510..936bb52 100644 --- a/include/cglm/call/vec4.h +++ b/include/cglm/call/vec4.h @@ -153,6 +153,10 @@ CGLM_EXPORT void glmc_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest); +CGLM_EXPORT +void +glmc_vec4_cubic(float s, vec4 dest); + /* ext */ CGLM_EXPORT diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 9da0d96..1d68841 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -800,4 +800,23 @@ glm_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest) { glm_vec4_add(from, v, dest); } +/*! + * @brief helper to fill vec4 as [S^3, S^2, S, 1] + * + * @param[in] s parameter + * @param[out] dest destination + */ +CGLM_INLINE +void +glm_vec4_cubic(float s, vec4 dest) { + float ss; + + ss = s * s; + + dest[0] = s; + dest[1] = ss; + dest[2] = ss * s; + dest[3] = 1.0f; +} + #endif /* cglm_vec4_h */ diff --git a/src/vec4.c b/src/vec4.c index 1a49710..0bb6a6e 100644 --- a/src/vec4.c +++ b/src/vec4.c @@ -206,6 +206,12 @@ glmc_vec4_lerp(vec4 from, vec4 to, float t, vec4 dest) { glm_vec4_lerp(from, to, t, dest); } +CGLM_EXPORT +void +glmc_vec4_cubic(float s, vec4 dest) { + glm_vec4_cubic(s, dest); +} + /* ext */ CGLM_EXPORT From 807d5589b4b6c8a775fda77a9e4159ed55f39d06 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 26 Jan 2019 16:05:11 +0300 Subject: [PATCH 08/19] call: add missing end guard to call headers --- include/cglm/call/ease.h | 3 +++ include/cglm/call/sphere.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/cglm/call/ease.h b/include/cglm/call/ease.h index 9f1757e..87e39ca 100644 --- a/include/cglm/call/ease.h +++ b/include/cglm/call/ease.h @@ -137,4 +137,7 @@ CGLM_EXPORT float glmc_ease_bounce_inout(float t); +#ifdef __cplusplus +} +#endif #endif /* cglmc_ease_h */ diff --git a/include/cglm/call/sphere.h b/include/cglm/call/sphere.h index 02c3d55..9b96546 100644 --- a/include/cglm/call/sphere.h +++ b/include/cglm/call/sphere.h @@ -33,4 +33,7 @@ CGLM_EXPORT bool glmc_sphere_point(vec4 s, vec3 point); +#ifdef __cplusplus +} +#endif #endif /* cglmc_sphere_h */ From 32ddf49756461300fce6ee6e24be34a968066c7b Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 26 Jan 2019 18:05:05 +0300 Subject: [PATCH 09/19] mat4: helper for row * matrix * column --- docs/source/mat4.rst | 18 ++++++++++++++++++ docs/source/vec4.rst | 14 +++++++++----- include/cglm/call/mat4.h | 4 ++++ include/cglm/mat4.h | 22 ++++++++++++++++++++++ src/mat4.c | 6 ++++++ 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/docs/source/mat4.rst b/docs/source/mat4.rst index 30f48fe..294f8f4 100644 --- a/docs/source/mat4.rst +++ b/docs/source/mat4.rst @@ -45,6 +45,7 @@ Functions: #. :c:func:`glm_mat4_inv_fast` #. :c:func:`glm_mat4_swap_col` #. :c:func:`glm_mat4_swap_row` +#. :c:func:`glm_mat4_rmc` Functions documentation ~~~~~~~~~~~~~~~~~~~~~~~ @@ -270,3 +271,20 @@ Functions documentation | *[in, out]* **mat** matrix | *[in]* **row1** row1 | *[in]* **row2** row2 + +.. c:function:: float glm_mat4_rmc(vec4 r, mat4 m, vec4 c) + + | **rmc** stands for **Row** * **Matrix** * **Column** + + | helper for R (row vector) * M (matrix) * C (column vector) + + | the result is scalar because S * M = Matrix1x4 (row vector), + | then Matrix1x4 * Vec4 (column vector) = Matrix1x1 (Scalar) + + Parameters: + | *[in]* **r** row vector or matrix1x4 + | *[in]* **m** matrix4x4 + | *[in]* **c** column vector or matrix4x1 + + Returns: + scalar value e.g. Matrix1x1 diff --git a/docs/source/vec4.rst b/docs/source/vec4.rst index 5bb1ac7..f497868 100644 --- a/docs/source/vec4.rst +++ b/docs/source/vec4.rst @@ -58,11 +58,7 @@ Functions: #. :c:func:`glm_vec4_minv` #. :c:func:`glm_vec4_clamp` #. :c:func:`glm_vec4_lerp` -#. :c:func:`glm_vec4_isnan` -#. :c:func:`glm_vec4_isinf` -#. :c:func:`glm_vec4_isvalid` -#. :c:func:`glm_vec4_sign` -#. :c:func:`glm_vec4_sqrt` +#. :c:func:`glm_vec4_cubic` Functions documentation ~~~~~~~~~~~~~~~~~~~~~~~ @@ -401,3 +397,11 @@ Functions documentation | *[in]* **to** to value | *[in]* **t** interpolant (amount) clamped between 0 and 1 | *[out]* **dest** destination + +.. c:function:: void glm_vec4_cubic(float s, vec4 dest) + + helper to fill vec4 as [S^3, S^2, S, 1] + + Parameters: + | *[in]* **s** parameter + | *[out]* **dest** destination diff --git a/include/cglm/call/mat4.h b/include/cglm/call/mat4.h index 7e76f73..54fbcbe 100644 --- a/include/cglm/call/mat4.h +++ b/include/cglm/call/mat4.h @@ -113,6 +113,10 @@ CGLM_EXPORT void glmc_mat4_swap_row(mat4 mat, int row1, int row2); +CGLM_EXPORT +float +glmc_mat4_rmc(vec4 r, mat4 m, vec4 c); + #ifdef __cplusplus } #endif diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h index 56c6de3..35d4117 100644 --- a/include/cglm/mat4.h +++ b/include/cglm/mat4.h @@ -677,4 +677,26 @@ glm_mat4_swap_row(mat4 mat, int row1, int row2) { mat[3][row2] = tmp[3]; } +/*! + * @brief helper for R (row vector) * M (matrix) * C (column vector) + * + * rmc stands for Row * Matrix * Column + * + * the result is scalar because S * M = Matrix1x4 (row vector), + * then Matrix1x4 * Vec4 (column vector) = Matrix1x1 (Scalar) + * + * @param[in] r row vector or matrix1x4 + * @param[in] m matrix4x4 + * @param[in] c column vector or matrix4x1 + * + * @return scalar value e.g. B(s) + */ +CGLM_INLINE +float +glm_mat4_rmc(vec4 r, mat4 m, vec4 c) { + vec4 tmp; + glm_mat4_mulv(m, r, tmp); + return glm_vec4_dot(c, tmp); +} + #endif /* cglm_mat_h */ diff --git a/src/mat4.c b/src/mat4.c index b62420e..c648a6e 100644 --- a/src/mat4.c +++ b/src/mat4.c @@ -151,3 +151,9 @@ void glmc_mat4_swap_row(mat4 mat, int row1, int row2) { glm_mat4_swap_row(mat, row1, row2); } + +CGLM_EXPORT +float +glmc_mat4_rmc(vec4 r, mat4 m, vec4 c) { + return glm_mat4_rmc(r, m, c); +} From 60cb4beb0a7adc8073f559c6ff16df618604a074 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 26 Jan 2019 18:06:26 +0300 Subject: [PATCH 10/19] curve: helper for calculate result of SMC multiplication --- docs/source/api.rst | 1 + docs/source/curve.rst | 41 +++++++++++++++++++++++++++++++++++++++ include/cglm/call.h | 1 + include/cglm/call/curve.h | 23 ++++++++++++++++++++++ include/cglm/cglm.h | 1 + include/cglm/curve.h | 40 ++++++++++++++++++++++++++++++++++++++ makefile.am | 9 ++++++--- src/curve.c | 15 ++++++++++++++ win/cglm.vcxproj | 3 +++ win/cglm.vcxproj.filters | 9 +++++++++ 10 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 docs/source/curve.rst create mode 100644 include/cglm/call/curve.h create mode 100644 include/cglm/curve.h create mode 100644 src/curve.c diff --git a/docs/source/api.rst b/docs/source/api.rst index e88b426..c7f74a5 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -46,3 +46,4 @@ Follow the :doc:`build` documentation for this io call sphere + curve diff --git a/docs/source/curve.rst b/docs/source/curve.rst new file mode 100644 index 0000000..26c9b75 --- /dev/null +++ b/docs/source/curve.rst @@ -0,0 +1,41 @@ +.. default-domain:: C + +Curve +================================================================================ + +Header: cglm/curve.h + +Common helpers for common curves. For specific curve see its header/doc +e.g bezier + +Table of contents (click to go): +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Functions: + +1. :c:func:`glm_smc` + +Functions documentation +~~~~~~~~~~~~~~~~~~~~~~~ + +.. c:function:: float glm_smc(float s, mat4 m, vec4 c) + + | helper function to calculate **S** * **M** * **C** multiplication for curves + + | this function does not encourage you to use SMC, instead it is a helper if you use SMC. + + | if you want to specify S as vector then use more generic glm_mat4_rmc() func. + + | Example usage: + + .. code-block:: c + + Bs = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + + Parameters: + | *[in]* **s** parameter between 0 and 1 (this will be [s3, s2, s, 1]) + | *[in]* **m** basis matrix + | *[out]* **c** position/control vector + + Returns: + scalar value e.g. Bs diff --git a/include/cglm/call.h b/include/cglm/call.h index b7fa6e1..7c1c8d7 100644 --- a/include/cglm/call.h +++ b/include/cglm/call.h @@ -27,6 +27,7 @@ extern "C" { #include "call/project.h" #include "call/sphere.h" #include "call/ease.h" +#include "call/curve.h" #ifdef __cplusplus } diff --git a/include/cglm/call/curve.h b/include/cglm/call/curve.h new file mode 100644 index 0000000..061fdb9 --- /dev/null +++ b/include/cglm/call/curve.h @@ -0,0 +1,23 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglmc_curve_h +#define cglmc_curve_h +#ifdef __cplusplus +extern "C" { +#endif + +#include "../cglm.h" + +CGLM_EXPORT +float +glmc_smc(float s, mat4 m, vec4 c); + +#ifdef __cplusplus +} +#endif +#endif /* cglmc_curve_h */ diff --git a/include/cglm/cglm.h b/include/cglm/cglm.h index 8b37162..d79a88e 100644 --- a/include/cglm/cglm.h +++ b/include/cglm/cglm.h @@ -26,5 +26,6 @@ #include "project.h" #include "sphere.h" #include "ease.h" +#include "curve.h" #endif /* cglm_h */ diff --git a/include/cglm/curve.h b/include/cglm/curve.h new file mode 100644 index 0000000..5033be5 --- /dev/null +++ b/include/cglm/curve.h @@ -0,0 +1,40 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_curve_h +#define cglm_curve_h + +#include "common.h" +#include "vec4.h" +#include "mat4.h" + +/*! + * @brief helper function to calculate S*M*C multiplication for curves + * + * This function does not encourage you to use SMC, + * instead it is a helper if you use SMC. + * + * if you want to specify S as vector then use more generic glm_mat4_rmc() func. + * + * Example usage: + * B(s) = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + * + * @param[in] s parameter between 0 and 1 (this will be [s3, s2, s, 1]) + * @param[in] m basis matrix + * @param[in] c position/control vector + * + * @return B(s) + */ +CGLM_INLINE +float +glm_smc(float s, mat4 m, vec4 c) { + vec4 vs; + glm_vec4_cubic(s, vs); + return glm_mat4_rmc(vs, m, c); +} + +#endif /* cglm_curve_h */ diff --git a/makefile.am b/makefile.am index 63fa285..d6498c6 100644 --- a/makefile.am +++ b/makefile.am @@ -57,7 +57,8 @@ cglm_HEADERS = include/cglm/version.h \ include/cglm/color.h \ include/cglm/project.h \ include/cglm/sphere.h \ - include/cglm/ease.h + include/cglm/ease.h \ + include/cglm/curve.h cglm_calldir=$(includedir)/cglm/call cglm_call_HEADERS = include/cglm/call/mat4.h \ @@ -74,7 +75,8 @@ cglm_call_HEADERS = include/cglm/call/mat4.h \ include/cglm/call/box.h \ include/cglm/call/project.h \ include/cglm/call/sphere.h \ - include/cglm/call/ease.h + include/cglm/call/ease.h \ + include/cglm/call/curve.h cglm_simddir=$(includedir)/cglm/simd cglm_simd_HEADERS = include/cglm/simd/intrin.h \ @@ -109,7 +111,8 @@ libcglm_la_SOURCES=\ src/box.c \ src/project.c \ src/sphere.c \ - src/ease.c + src/ease.c \ + src/curve.c test_tests_SOURCES=\ test/src/test_common.c \ diff --git a/src/curve.c b/src/curve.c new file mode 100644 index 0000000..74d4702 --- /dev/null +++ b/src/curve.c @@ -0,0 +1,15 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "../include/cglm/cglm.h" +#include "../include/cglm/call.h" + +CGLM_EXPORT +float +glmc_smc(float s, mat4 m, vec4 c) { + return glm_smc(s, m, c); +} diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj index 97d9d08..82293f0 100644 --- a/win/cglm.vcxproj +++ b/win/cglm.vcxproj @@ -22,6 +22,7 @@ + @@ -44,6 +45,7 @@ + @@ -60,6 +62,7 @@ + diff --git a/win/cglm.vcxproj.filters b/win/cglm.vcxproj.filters index a668242..7f9735b 100644 --- a/win/cglm.vcxproj.filters +++ b/win/cglm.vcxproj.filters @@ -84,6 +84,9 @@ src + + src + @@ -239,5 +242,11 @@ include\cglm\simd + + include\cglm\call + + + include\cglm + \ No newline at end of file From 11a6e4471e6ffc4868c6cc1ac712671698ba54db Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Mon, 28 Jan 2019 14:26:02 +0300 Subject: [PATCH 11/19] fix vec4_cubic function --- include/cglm/vec4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 1d68841..bbd1c31 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -813,9 +813,9 @@ glm_vec4_cubic(float s, vec4 dest) { ss = s * s; - dest[0] = s; + dest[0] = ss * s; dest[1] = ss; - dest[2] = ss * s; + dest[2] = s; dest[3] = 1.0f; } From b0e48a56ca4380c80d30f2faf0a896b7a9306554 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Mon, 28 Jan 2019 15:31:03 +0300 Subject: [PATCH 12/19] test: rename test_rand_angle() to test_rand() --- test/src/test_common.c | 2 +- test/src/test_common.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/src/test_common.c b/test/src/test_common.c index 405000d..d41d3cb 100644 --- a/test/src/test_common.c +++ b/test/src/test_common.c @@ -58,7 +58,7 @@ test_rand_vec4(vec4 dest) { } float -test_rand_angle(void) { +test_rand(void) { srand((unsigned int)time(NULL)); return drand48(); } diff --git a/test/src/test_common.h b/test/src/test_common.h index 7881e7a..8a16b0f 100644 --- a/test/src/test_common.h +++ b/test/src/test_common.h @@ -59,7 +59,7 @@ void test_rand_vec4(vec4 dest) ; float -test_rand_angle(void); +test_rand(void); void test_rand_quat(versor q); From 730cb1e9f782e0d9fc4ebece06be20f6ecf976e2 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Mon, 28 Jan 2019 15:32:24 +0300 Subject: [PATCH 13/19] add bezier helpers --- include/cglm/bezier.h | 56 ++++++++++++++++++++++++++++++++++++++ include/cglm/call/bezier.h | 23 ++++++++++++++++ include/cglm/cglm.h | 1 + src/bezier.c | 15 ++++++++++ test/src/test_bezier.c | 41 ++++++++++++++++++++++++++++ test/src/test_main.c | 5 +++- test/src/test_tests.h | 3 ++ 7 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 include/cglm/bezier.h create mode 100644 include/cglm/call/bezier.h create mode 100644 src/bezier.c create mode 100644 test/src/test_bezier.c diff --git a/include/cglm/bezier.h b/include/cglm/bezier.h new file mode 100644 index 0000000..f36ac1d --- /dev/null +++ b/include/cglm/bezier.h @@ -0,0 +1,56 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_bezier_h +#define cglm_bezier_h + +#define GLM_BEZIER_MAT_INIT {{-1.0f, 3.0f, -3.0f, 1.0f}, \ + { 3.0f, -6.0f, 3.0f, 0.0f}, \ + {-3.0f, 3.0f, 0.0f, 0.0f}, \ + { 1.0f, 0.0f, 0.0f, 0.0f}} + +/* for C only */ +#define GLM_BEZIER_MAT ((mat4)GLM_BEZIER_MAT_INIT) + +#define CGLM_DECASTEL_EPS 1e-9 +#define CGLM_DECASTEL_MAX 1000 +#define CGLM_DECASTEL_SMALL 1e-20 + +/*! + * @brief cubic bezier interpolation + * + * Formula: + * B(s) = P0*(1-s)^3 + 3*C0*s*(1-s)^2 + 3*C1*s^2*(1-s) + P1*s^3 + * + * similar result using matrix: + * B(s) = glm_smc(t, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + * + * glm_eq(glm_smc(...), glm_bezier(...)) should return TRUE + * + * @param[in] s parameter between 0 and 1 + * @param[in] p0 begin point + * @param[in] c0 control point 1 + * @param[in] c1 control point 2 + * @param[in] p1 end point + * + * @return B(s) + */ +CGLM_INLINE +float +glm_bezier(float s, float p0, float c0, float c1, float p1) { + float x, xx, ss, xs3, a; + + x = 1.0f - s; + xx = x * x; + ss = s * s; + xs3 = (s - ss) * 3.0f; + a = p0 * xx + c0 * xs3; + + return a + s * (c1 * xs3 + p1 * ss - a); +} + +#endif /* cglm_bezier_h */ diff --git a/include/cglm/call/bezier.h b/include/cglm/call/bezier.h new file mode 100644 index 0000000..531c15d --- /dev/null +++ b/include/cglm/call/bezier.h @@ -0,0 +1,23 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglmc_curve_h +#define cglmc_curve_h +#ifdef __cplusplus +extern "C" { +#endif + +#include "../cglm.h" + +CGLM_EXPORT +float +glmc_bezier(float s, float p0, float c0, float c1, float p1); + +#ifdef __cplusplus +} +#endif +#endif /* cglmc_curve_h */ diff --git a/include/cglm/cglm.h b/include/cglm/cglm.h index d79a88e..7c301bf 100644 --- a/include/cglm/cglm.h +++ b/include/cglm/cglm.h @@ -27,5 +27,6 @@ #include "sphere.h" #include "ease.h" #include "curve.h" +#include "bezier.h" #endif /* cglm_h */ diff --git a/src/bezier.c b/src/bezier.c new file mode 100644 index 0000000..244e580 --- /dev/null +++ b/src/bezier.c @@ -0,0 +1,15 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "../include/cglm/cglm.h" +#include "../include/cglm/call.h" + +CGLM_EXPORT +float +glmc_bezier(float s, float p0, float c0, float c1, float p1) { + return glm_bezier(s, p0, c0, c1, p1); +} diff --git a/test/src/test_bezier.c b/test/src/test_bezier.c new file mode 100644 index 0000000..32d8c35 --- /dev/null +++ b/test/src/test_bezier.c @@ -0,0 +1,41 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "test_common.h" + +CGLM_INLINE +float +test_bezier_plain(float s, float p0, float c0, float c1, float p1) { + float x, xx, xxx, ss, sss; + + x = 1.0f - s; + xx = x * x; + xxx = xx * x; + ss = s * s; + sss = ss * s; + + return p0 * xxx + 3.0f * (c0 * s * xx + c1 * ss * x) + p1 * sss; +} + +void +test_bezier(void **state) { + float s, p0, p1, c0, c1, smc, Bs, Bs_plain; + + s = test_rand(); + p0 = test_rand(); + p1 = test_rand(); + c0 = test_rand(); + c1 = test_rand(); + + smc = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}); + Bs = glm_bezier(s, p0, c0, c1, p1); + Bs_plain = test_bezier_plain(s, p0, c0, c1, p1); + + assert_true(glm_eq(Bs, Bs_plain)); + assert_true(glm_eq(smc, Bs_plain)); + assert_true(glm_eq(Bs, smc)); +} diff --git a/test/src/test_main.c b/test/src/test_main.c index ff77b02..8ce1673 100644 --- a/test/src/test_main.c +++ b/test/src/test_main.c @@ -38,7 +38,10 @@ main(int argc, const char * argv[]) { cmocka_unit_test(test_vec3), /* affine */ - cmocka_unit_test(test_affine) + cmocka_unit_test(test_affine), + + /* bezier */ + cmocka_unit_test(test_bezier) }; return cmocka_run_group_tests(tests, NULL, NULL); diff --git a/test/src/test_tests.h b/test/src/test_tests.h index 7b9cf0a..618cc9f 100644 --- a/test/src/test_tests.h +++ b/test/src/test_tests.h @@ -40,4 +40,7 @@ test_vec3(void **state); void test_affine(void **state); +void +test_bezier(void **state); + #endif /* test_tests_h */ From c22231f29621ad13e2638450cfe1bce56c4bca99 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Mon, 28 Jan 2019 15:52:42 +0300 Subject: [PATCH 14/19] curve: de casteljau implementation for solving cubic bezier --- CREDITS | 6 ++++ include/cglm/bezier.h | 73 ++++++++++++++++++++++++++++++++++++++ include/cglm/call.h | 1 + include/cglm/call/bezier.h | 14 ++++++-- src/bezier.c | 12 +++++++ 5 files changed, 103 insertions(+), 3 deletions(-) diff --git a/CREDITS b/CREDITS index 263dd2d..7272ddc 100644 --- a/CREDITS +++ b/CREDITS @@ -55,3 +55,9 @@ https://github.com/erich666/GraphicsGems/blob/master/gems/BoxSphere.c 10. Horizontal add https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86 + +11. de casteljau implementation and comments +https://forums.khronos.org/showthread.php/10264-Animations-in-1-4-1-release-notes-revision-A/page2?highlight=bezier +https://forums.khronos.org/showthread.php/10644-Animation-Bezier-interpolation +https://forums.khronos.org/showthread.php/10387-2D-Tangents-in-Bezier-Splines?p=34164&viewfull=1#post34164 +https://forums.khronos.org/showthread.php/10651-Animation-TCB-Spline-Interpolation-in-COLLADA?highlight=bezier diff --git a/include/cglm/bezier.h b/include/cglm/bezier.h index f36ac1d..594da86 100644 --- a/include/cglm/bezier.h +++ b/include/cglm/bezier.h @@ -53,4 +53,77 @@ glm_bezier(float s, float p0, float c0, float c1, float p1) { return a + s * (c1 * xs3 + p1 * ss - a); } +/*! + * @brief iterative way to solve cubic equation + * + * @param[in] s parameter between 0 and 1 + * @param[in] p0 begin point + * @param[in] c0 control point 1 + * @param[in] c1 control point 2 + * @param[in] p1 end point + * + * @return parameter to use in cubic equation + */ +CGLM_INLINE +float +glm_decasteljau(float prm, float p0, float c0, float c1, float p1) { + float u, v, a, b, c, d, e, f; + int i; + + if (prm - p0 < CGLM_DECASTEL_SMALL) + return 0.0f; + + if (p1 - prm < CGLM_DECASTEL_SMALL) + return 1.0f; + + u = 0.0f; + v = 1.0f; + + for (i = 0; i < CGLM_DECASTEL_MAX; i++) { + /* de Casteljau Subdivision */ + a = (p0 + c0) * 0.5f; + b = (c0 + c1) * 0.5f; + c = (c1 + p1) * 0.5f; + d = (a + b) * 0.5f; + e = (b + c) * 0.5f; + f = (d + e) * 0.5f; /* this one is on the curve! */ + + /* The curve point is close enough to our wanted t */ + if (fabsf(f - prm) < CGLM_DECASTEL_EPS) + return glm_clamp_zo((u + v) * 0.5f); + + /* dichotomy */ + if (f < prm) { + p0 = f; + c0 = e; + c1 = c; + u = (u + v) * 0.5f; + } else { + c0 = a; + c1 = d; + p1 = f; + v = (u + v) * 0.5f; + } + } + + return glm_clamp_zo((u + v) * 0.5f); +} + +/*! + * @brief solve cubic bezier equation + * + * @param[in] s parameter between 0 and 1 + * @param[in] p0 begin point + * @param[in] c0 control point 1 + * @param[in] c1 control point 2 + * @param[in] p1 end point + * + * @return parameter to use in cubic equation + */ +CGLM_INLINE +float +glm_bezier_solve(float prm, float p0, float c0, float c1, float p1) { + return glm_decasteljau(prm, p0, c0, c1, p1); +} + #endif /* cglm_bezier_h */ diff --git a/include/cglm/call.h b/include/cglm/call.h index 7c1c8d7..7cbd501 100644 --- a/include/cglm/call.h +++ b/include/cglm/call.h @@ -28,6 +28,7 @@ extern "C" { #include "call/sphere.h" #include "call/ease.h" #include "call/curve.h" +#include "call/bezier.h" #ifdef __cplusplus } diff --git a/include/cglm/call/bezier.h b/include/cglm/call/bezier.h index 531c15d..c90a178 100644 --- a/include/cglm/call/bezier.h +++ b/include/cglm/call/bezier.h @@ -5,8 +5,8 @@ * Full license can be found in the LICENSE file */ -#ifndef cglmc_curve_h -#define cglmc_curve_h +#ifndef cglmc_bezier_h +#define cglmc_bezier_h #ifdef __cplusplus extern "C" { #endif @@ -17,7 +17,15 @@ CGLM_EXPORT float glmc_bezier(float s, float p0, float c0, float c1, float p1); +CGLM_EXPORT +float +glmc_decasteljau(float prm, float p0, float c0, float c1, float p1); + +CGLM_EXPORT +float +glmc_bezier_solve(float prm, float p0, float c0, float c1, float p1); + #ifdef __cplusplus } #endif -#endif /* cglmc_curve_h */ +#endif /* cglmc_bezier_h */ diff --git a/src/bezier.c b/src/bezier.c index 244e580..36d2776 100644 --- a/src/bezier.c +++ b/src/bezier.c @@ -13,3 +13,15 @@ float glmc_bezier(float s, float p0, float c0, float c1, float p1) { return glm_bezier(s, p0, c0, c1, p1); } + +CGLM_EXPORT +float +glmc_decasteljau(float prm, float p0, float c0, float c1, float p1) { + return glm_decasteljau(prm, p0, c0, c1, p1); +} + +CGLM_EXPORT +float +glmc_bezier_solve(float prm, float p0, float c0, float c1, float p1) { + return glm_bezier_solve(prm, p0, c0, c1, p1); +} From 1e121a485511ddf6e5d249f34d880d3dbb658005 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 29 Jan 2019 22:11:04 +0300 Subject: [PATCH 15/19] mat4: fix rmc multiplication --- include/cglm/mat4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h index 35d4117..74e876a 100644 --- a/include/cglm/mat4.h +++ b/include/cglm/mat4.h @@ -695,8 +695,8 @@ CGLM_INLINE float glm_mat4_rmc(vec4 r, mat4 m, vec4 c) { vec4 tmp; - glm_mat4_mulv(m, r, tmp); - return glm_vec4_dot(c, tmp); + glm_mat4_mulv(m, c, tmp); + return glm_vec4_dot(r, tmp); } #endif /* cglm_mat_h */ From 7848dda1dd9c577a1d107fba34ffcfea03ada05a Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 29 Jan 2019 22:17:44 +0300 Subject: [PATCH 16/19] curve: cubic hermite intrpolation --- include/cglm/bezier.h | 56 ++++++++++++++++++++++++++++++++------ include/cglm/call/bezier.h | 4 +++ makefile.am | 12 +++++--- src/bezier.c | 6 ++++ test/src/test_bezier.c | 24 ++++++++++++++++ win/cglm.vcxproj | 3 ++ win/cglm.vcxproj.filters | 9 ++++++ 7 files changed, 102 insertions(+), 12 deletions(-) diff --git a/include/cglm/bezier.h b/include/cglm/bezier.h index 594da86..8b1ee6e 100644 --- a/include/cglm/bezier.h +++ b/include/cglm/bezier.h @@ -8,13 +8,17 @@ #ifndef cglm_bezier_h #define cglm_bezier_h -#define GLM_BEZIER_MAT_INIT {{-1.0f, 3.0f, -3.0f, 1.0f}, \ - { 3.0f, -6.0f, 3.0f, 0.0f}, \ - {-3.0f, 3.0f, 0.0f, 0.0f}, \ - { 1.0f, 0.0f, 0.0f, 0.0f}} - +#define GLM_BEZIER_MAT_INIT {{-1.0f, 3.0f, -3.0f, 1.0f}, \ + { 3.0f, -6.0f, 3.0f, 0.0f}, \ + {-3.0f, 3.0f, 0.0f, 0.0f}, \ + { 1.0f, 0.0f, 0.0f, 0.0f}} +#define GLM_HERMITE_MAT_INIT {{ 2.0f, -3.0f, 0.0f, 1.0f}, \ + {-2.0f, 3.0f, 0.0f, 0.0f}, \ + { 1.0f, -2.0f, 1.0f, 0.0f}, \ + { 1.0f, -1.0f, 0.0f, 0.0f}} /* for C only */ -#define GLM_BEZIER_MAT ((mat4)GLM_BEZIER_MAT_INIT) +#define GLM_BEZIER_MAT ((mat4)GLM_BEZIER_MAT_INIT) +#define GLM_HERMITE_MAT ((mat4)GLM_HERMITE_MAT_INIT) #define CGLM_DECASTEL_EPS 1e-9 #define CGLM_DECASTEL_MAX 1000 @@ -54,10 +58,46 @@ glm_bezier(float s, float p0, float c0, float c1, float p1) { } /*! - * @brief iterative way to solve cubic equation + * @brief cubic hermite interpolation + * + * Formula: + * H(s) = P0*(2*s^3 - 3*s^2 + 1) + T0*(s^3 - 2*s^2 + s) + * + P1*(-2*s^3 + 3*s^2) + T1*(s^3 - s^2) + * + * similar result using matrix: + * H(s) = glm_smc(t, GLM_HERMITE_MAT, (vec4){p0, p1, c0, c1}) + * + * glm_eq(glm_smc(...), glm_hermite(...)) should return TRUE * * @param[in] s parameter between 0 and 1 * @param[in] p0 begin point + * @param[in] t0 tangent 1 + * @param[in] t1 tangent 2 + * @param[in] p1 end point + * + * @return B(s) + */ +CGLM_INLINE +float +glm_hermite(float s, float p0, float t0, float t1, float p1) { + float ss, d, a, b, c, e, f; + + ss = s * s; + a = ss + ss; + c = a + ss; + b = a * s; + d = s * ss; + f = d - ss; + e = b - c; + + return p0 * (e + 1.0f) + t0 * (f - ss + s) + t1 * f - p1 * e; +} + +/*! + * @brief iterative way to solve cubic equation + * + * @param[in] prm parameter between 0 and 1 + * @param[in] p0 begin point * @param[in] c0 control point 1 * @param[in] c1 control point 2 * @param[in] p1 end point @@ -112,7 +152,7 @@ glm_decasteljau(float prm, float p0, float c0, float c1, float p1) { /*! * @brief solve cubic bezier equation * - * @param[in] s parameter between 0 and 1 + * @param[in] prm parameter between 0 and 1 * @param[in] p0 begin point * @param[in] c0 control point 1 * @param[in] c1 control point 2 diff --git a/include/cglm/call/bezier.h b/include/cglm/call/bezier.h index c90a178..b37f349 100644 --- a/include/cglm/call/bezier.h +++ b/include/cglm/call/bezier.h @@ -17,6 +17,10 @@ CGLM_EXPORT float glmc_bezier(float s, float p0, float c0, float c1, float p1); +CGLM_EXPORT +float +glmc_hermite(float s, float p0, float t0, float t1, float p1); + CGLM_EXPORT float glmc_decasteljau(float prm, float p0, float c0, float c1, float p1); diff --git a/makefile.am b/makefile.am index d6498c6..d6cdc70 100644 --- a/makefile.am +++ b/makefile.am @@ -58,7 +58,8 @@ cglm_HEADERS = include/cglm/version.h \ include/cglm/project.h \ include/cglm/sphere.h \ include/cglm/ease.h \ - include/cglm/curve.h + include/cglm/curve.h \ + include/cglm/bezier.h cglm_calldir=$(includedir)/cglm/call cglm_call_HEADERS = include/cglm/call/mat4.h \ @@ -76,7 +77,8 @@ cglm_call_HEADERS = include/cglm/call/mat4.h \ include/cglm/call/project.h \ include/cglm/call/sphere.h \ include/cglm/call/ease.h \ - include/cglm/call/curve.h + include/cglm/call/curve.h \ + include/cglm/call/bezier.h cglm_simddir=$(includedir)/cglm/simd cglm_simd_HEADERS = include/cglm/simd/intrin.h \ @@ -112,7 +114,8 @@ libcglm_la_SOURCES=\ src/project.c \ src/sphere.c \ src/ease.c \ - src/curve.c + src/curve.c \ + src/bezier.c test_tests_SOURCES=\ test/src/test_common.c \ @@ -126,7 +129,8 @@ test_tests_SOURCES=\ test/src/test_vec4.c \ test/src/test_vec3.c \ test/src/test_mat3.c \ - test/src/test_affine.c + test/src/test_affine.c \ + test/src/test_bezier.c all-local: sh ./post-build.sh diff --git a/src/bezier.c b/src/bezier.c index 36d2776..0bb16fe 100644 --- a/src/bezier.c +++ b/src/bezier.c @@ -14,6 +14,12 @@ glmc_bezier(float s, float p0, float c0, float c1, float p1) { return glm_bezier(s, p0, c0, c1, p1); } +CGLM_EXPORT +float +glmc_hermite(float s, float p0, float t0, float t1, float p1) { + return glm_hermite(s, p0, t0, t1, p1); +} + CGLM_EXPORT float glmc_decasteljau(float prm, float p0, float c0, float c1, float p1) { diff --git a/test/src/test_bezier.c b/test/src/test_bezier.c index 32d8c35..bd7c796 100644 --- a/test/src/test_bezier.c +++ b/test/src/test_bezier.c @@ -21,6 +21,20 @@ test_bezier_plain(float s, float p0, float c0, float c1, float p1) { return p0 * xxx + 3.0f * (c0 * s * xx + c1 * ss * x) + p1 * sss; } +CGLM_INLINE +float +test_hermite_plain(float s, float p0, float t0, float t1, float p1) { + float ss, sss; + + ss = s * s; + sss = ss * s; + + return p0 * (2.0f * sss - 3.0f * ss + 1.0f) + + t0 * (sss - 2.0f * ss + s) + + p1 * (-2.0f * sss + 3.0f * ss) + + t1 * (sss - ss); +} + void test_bezier(void **state) { float s, p0, p1, c0, c1, smc, Bs, Bs_plain; @@ -31,6 +45,7 @@ test_bezier(void **state) { c0 = test_rand(); c1 = test_rand(); + /* test cubic bezier */ smc = glm_smc(s, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}); Bs = glm_bezier(s, p0, c0, c1, p1); Bs_plain = test_bezier_plain(s, p0, c0, c1, p1); @@ -38,4 +53,13 @@ test_bezier(void **state) { assert_true(glm_eq(Bs, Bs_plain)); assert_true(glm_eq(smc, Bs_plain)); assert_true(glm_eq(Bs, smc)); + + /* test cubic hermite */ + smc = glm_smc(s, GLM_HERMITE_MAT, (vec4){p0, p1, c0, c1}); + Bs = glm_hermite(s, p0, c0, c1, p1); + Bs_plain = test_hermite_plain(s, p0, c0, c1, p1); + + assert_true(glm_eq(Bs, Bs_plain)); + assert_true(glm_eq(smc, Bs_plain)); + assert_true(glm_eq(Bs, smc)); } diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj index 82293f0..90e58a6 100644 --- a/win/cglm.vcxproj +++ b/win/cglm.vcxproj @@ -20,6 +20,7 @@ + @@ -40,9 +41,11 @@ + + diff --git a/win/cglm.vcxproj.filters b/win/cglm.vcxproj.filters index 7f9735b..dd66ee1 100644 --- a/win/cglm.vcxproj.filters +++ b/win/cglm.vcxproj.filters @@ -87,6 +87,9 @@ src + + src + @@ -248,5 +251,11 @@ include\cglm + + include\cglm + + + include\cglm\call + \ No newline at end of file From 4e5879497e28e8932ff527985436fcd5950aca59 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 2 Feb 2019 15:29:48 +0300 Subject: [PATCH 17/19] update docs --- README.md | 4 ++ docs/source/api.rst | 1 + docs/source/bezier.rst | 89 ++++++++++++++++++++++++++++++++++++++++++ include/cglm/bezier.h | 2 +- 4 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 docs/source/bezier.rst diff --git a/README.md b/README.md index dffa54c..d03a03f 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,11 @@ Currently *cglm* uses default clip space configuration (-1, 1) for camera functi - inline or pre-compiled function call - frustum (extract view frustum planes, corners...) - bounding box (AABB in Frustum (culling), crop, merge...) +- bounding sphere - project, unproject +- easing functions +- curves +- curve interpolation helpers (S*M*C, deCasteljau...) - and other...
diff --git a/docs/source/api.rst b/docs/source/api.rst index c7f74a5..408eae7 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -47,3 +47,4 @@ Follow the :doc:`build` documentation for this call sphere curve + bezier diff --git a/docs/source/bezier.rst b/docs/source/bezier.rst new file mode 100644 index 0000000..8b29751 --- /dev/null +++ b/docs/source/bezier.rst @@ -0,0 +1,89 @@ +.. default-domain:: C + +Bezier +================================================================================ + +Header: cglm/bezier.h + +Common helpers for cubic bezier and similar curves. + +Table of contents (click to go): +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Functions: + +1. :c:func:`glm_bezier` +2. :c:func:`glm_hermite` +3. :c:func:`glm_decasteljau` + +Functions documentation +~~~~~~~~~~~~~~~~~~~~~~~ + +.. c:function:: float glm_bezier(float s, float p0, float c0, float c1, float p1) + + | cubic bezier interpolation + | formula: + + .. code-block:: text + + B(s) = P0*(1-s)^3 + 3*C0*s*(1-s)^2 + 3*C1*s^2*(1-s) + P1*s^3 + + | similar result using matrix: + + .. code-block:: text + + B(s) = glm_smc(t, GLM_BEZIER_MAT, (vec4){p0, c0, c1, p1}) + + | glm_eq(glm_smc(...), glm_bezier(...)) should return TRUE + + Parameters: + | *[in]* **s** parameter between 0 and 1 + | *[in]* **p0** begin point + | *[in]* **c0** control point 1 + | *[in]* **c1** control point 2 + | *[in]* **p1** end point + + Returns: + B(s) + +.. c:function:: float glm_hermite(float s, float p0, float t0, float t1, float p1) + + | cubic hermite interpolation + | formula: + + .. code-block:: text + + H(s) = P0*(2*s^3 - 3*s^2 + 1) + T0*(s^3 - 2*s^2 + s) + P1*(-2*s^3 + 3*s^2) + T1*(s^3 - s^2) + + | similar result using matrix: + + .. code-block:: text + + H(s) = glm_smc(t, GLM_HERMITE_MAT, (vec4){p0, p1, c0, c1}) + + | glm_eq(glm_smc(...), glm_hermite(...)) should return TRUE + + + Parameters: + | *[in]* **s** parameter between 0 and 1 + | *[in]* **p0** begin point + | *[in]* **t0** tangent 1 + | *[in]* **t1** tangent 2 + | *[in]* **p1** end point + + Returns: + B(s) + +.. c:function:: float glm_decasteljau(float prm, float p0, float c0, float c1, float p1) + + | iterative way to solve cubic equation + + Parameters: + | *[in]* **prm** parameter between 0 and 1 + | *[in]* **p0** begin point + | *[in]* **c0** control point 1 + | *[in]* **c1** control point 2 + | *[in]* **p1** end point + + Returns: + parameter to use in cubic equation diff --git a/include/cglm/bezier.h b/include/cglm/bezier.h index 8b1ee6e..1d194f5 100644 --- a/include/cglm/bezier.h +++ b/include/cglm/bezier.h @@ -75,7 +75,7 @@ glm_bezier(float s, float p0, float c0, float c1, float p1) { * @param[in] t1 tangent 2 * @param[in] p1 end point * - * @return B(s) + * @return H(s) */ CGLM_INLINE float From 60cfc870094b57f81e0589106396cd1cb298aae5 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 2 Feb 2019 15:30:05 +0300 Subject: [PATCH 18/19] remove bezier_solve for now --- include/cglm/bezier.h | 17 ----------------- include/cglm/call/bezier.h | 4 ---- src/bezier.c | 6 ------ 3 files changed, 27 deletions(-) diff --git a/include/cglm/bezier.h b/include/cglm/bezier.h index 1d194f5..2560594 100644 --- a/include/cglm/bezier.h +++ b/include/cglm/bezier.h @@ -149,21 +149,4 @@ glm_decasteljau(float prm, float p0, float c0, float c1, float p1) { return glm_clamp_zo((u + v) * 0.5f); } -/*! - * @brief solve cubic bezier equation - * - * @param[in] prm parameter between 0 and 1 - * @param[in] p0 begin point - * @param[in] c0 control point 1 - * @param[in] c1 control point 2 - * @param[in] p1 end point - * - * @return parameter to use in cubic equation - */ -CGLM_INLINE -float -glm_bezier_solve(float prm, float p0, float c0, float c1, float p1) { - return glm_decasteljau(prm, p0, c0, c1, p1); -} - #endif /* cglm_bezier_h */ diff --git a/include/cglm/call/bezier.h b/include/cglm/call/bezier.h index b37f349..a6a0eb4 100644 --- a/include/cglm/call/bezier.h +++ b/include/cglm/call/bezier.h @@ -25,10 +25,6 @@ CGLM_EXPORT float glmc_decasteljau(float prm, float p0, float c0, float c1, float p1); -CGLM_EXPORT -float -glmc_bezier_solve(float prm, float p0, float c0, float c1, float p1); - #ifdef __cplusplus } #endif diff --git a/src/bezier.c b/src/bezier.c index 0bb16fe..21e6495 100644 --- a/src/bezier.c +++ b/src/bezier.c @@ -25,9 +25,3 @@ float glmc_decasteljau(float prm, float p0, float c0, float c1, float p1) { return glm_decasteljau(prm, p0, c0, c1, p1); } - -CGLM_EXPORT -float -glmc_bezier_solve(float prm, float p0, float c0, float c1, float p1) { - return glm_bezier_solve(prm, p0, c0, c1, p1); -} From 18f06743ed40755910000a76653c26a6d28a9376 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 2 Feb 2019 15:54:09 +0300 Subject: [PATCH 19/19] build: make automake build slient (less-verbose) --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index 1349b90..d29b9bb 100644 --- a/configure.ac +++ b/configure.ac @@ -29,6 +29,7 @@ LT_INIT # Checks for libraries. AC_CHECK_LIB([m], [floor]) +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_SYS_LARGEFILE # Checks for header files.