From ad823d96819a2549f854ab7538ac253a3ab4d0a4 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Wed, 19 Jun 2019 23:35:38 +0300 Subject: [PATCH] mat2: implement some mat2 func * also implement as SSE --- include/cglm/cglm.h | 1 + include/cglm/io.h | 30 +++++++ include/cglm/mat2.h | 163 ++++++++++++++++++++++++++++++++++ include/cglm/simd/sse2/mat2.h | 45 ++++++++++ include/cglm/types.h | 2 + 5 files changed, 241 insertions(+) create mode 100644 include/cglm/mat2.h create mode 100644 include/cglm/simd/sse2/mat2.h diff --git a/include/cglm/cglm.h b/include/cglm/cglm.h index f4c203f..cbd2952 100644 --- a/include/cglm/cglm.h +++ b/include/cglm/cglm.h @@ -14,6 +14,7 @@ #include "vec4.h" #include "mat4.h" #include "mat3.h" +#include "mat2.h" #include "affine.h" #include "cam.h" #include "frustum.h" diff --git a/include/cglm/io.h b/include/cglm/io.h index ac0bfeb..cdcb4fb 100644 --- a/include/cglm/io.h +++ b/include/cglm/io.h @@ -83,6 +83,36 @@ glm_mat3_print(mat3 matrix, #undef n } +CGLM_INLINE +void +glm_mat2_print(mat2 matrix, + FILE * __restrict ostream) { + int i; + int j; + +#define m 2 +#define n 2 + + fprintf(ostream, "Matrix (float%dx%d):\n", m, n); + + for (i = 0; i < m; i++) { + fprintf(ostream, "\t|"); + for (j = 0; j < n; j++) { + fprintf(ostream, "%0.4f", matrix[j][i]);; + + if (j != n - 1) + fprintf(ostream, "\t"); + } + + fprintf(ostream, "|\n"); + } + + fprintf(ostream, "\n"); + +#undef m +#undef n +} + CGLM_INLINE void glm_vec4_print(vec4 vec, diff --git a/include/cglm/mat2.h b/include/cglm/mat2.h new file mode 100644 index 0000000..11001f4 --- /dev/null +++ b/include/cglm/mat2.h @@ -0,0 +1,163 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +/* + + */ + +#ifndef cglm_mat2_h +#define cglm_mat2_h + +#include "common.h" +#include "vec2.h" + +#ifdef CGLM_SSE_FP +# include "simd/sse2/mat2.h" +#endif + +#define GLM_MAT2_IDENTITY_INIT {{1.0f, 0.0f}, {0.0f, 1.0f}} +#define GLM_MAT2_ZERO_INIT {{0.0f, 0.0f}, {0.0f, 0.0f}} + + +/* for C only */ +#define GLM_MAT2_IDENTITY ((mat3)GLM_MAT2_IDENTITY_INIT) +#define GLM_MAT2_ZERO ((mat3)GLM_MAT2_ZERO_INIT) + +/*! + * @brief copy all members of [mat] to [dest] + * + * @param[in] mat source + * @param[out] dest destination + */ +CGLM_INLINE +void +glm_mat2_copy(mat2 mat, mat2 dest) { + dest[0][0] = mat[0][0]; + dest[0][1] = mat[0][1]; + dest[1][0] = mat[1][0]; + dest[1][1] = mat[1][1]; +} + +/*! + * @brief make given matrix identity. It is identical with below, + * but it is more easy to do that with this func especially for members + * e.g. glm_mat2_identity(aStruct->aMatrix); + * + * @code + * glm_mat2_copy(GLM_MAT2_IDENTITY, mat); // C only + * + * // or + * mat2 mat = GLM_MAT2_IDENTITY_INIT; + * @endcode + * + * @param[in, out] mat destination + */ +CGLM_INLINE +void +glm_mat2_identity(mat2 mat) { + CGLM_ALIGN_MAT mat2 t = GLM_MAT2_IDENTITY_INIT; + glm_mat2_copy(t, mat); +} + +/*! + * @brief make given matrix array's each element identity matrix + * + * @param[in, out] mat matrix array (must be aligned (16/32) + * if alignment is not disabled) + * + * @param[in] count count of matrices + */ +CGLM_INLINE +void +glm_mat2_identity_array(mat2 * __restrict mat, size_t count) { + CGLM_ALIGN_MAT mat2 t = GLM_MAT2_IDENTITY_INIT; + size_t i; + + for (i = 0; i < count; i++) { + glm_mat2_copy(t, mat[i]); + } +} + +/*! + * @brief make given matrix zero. + * + * @param[in, out] mat matrix + */ +CGLM_INLINE +void +glm_mat2_zero(mat2 mat) { + CGLM_ALIGN_MAT mat2 t = GLM_MAT2_ZERO_INIT; + glm_mat2_copy(t, mat); +} + +/*! + * @brief multiply m1 and m2 to dest + * + * m1, m2 and dest matrices can be same matrix, it is possible to write this: + * + * @code + * mat3 m = GLM_MAT3_IDENTITY_INIT; + * glm_mat3_mul(m, m, m); + * @endcode + * + * @param[in] m1 left matrix + * @param[in] m2 right matrix + * @param[out] dest destination matrix + */ +CGLM_INLINE +void +glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + glm_mat2_mul_sse2(m1, m2, dest); +#else + float a00 = m1[0][0], a01 = m1[0][1], + a10 = m1[1][0], a11 = m1[1][1], + b00 = m2[0][0], b01 = m2[0][1], + b10 = m2[1][0], b11 = m2[1][1]; + + dest[0][0] = a00 * b00 + a10 * b01; + dest[0][1] = a01 * b00 + a11 * b01; + dest[1][0] = a00 * b10 + a10 * b11; + dest[1][1] = a01 * b10 + a11 * b11; +#endif +} + +/*! + * @brief transpose mat3 and store in dest + * + * source matrix will not be transposed unless dest is m + * + * @param[in] m matrix + * @param[out] dest result + */ +CGLM_INLINE +void +glm_mat2_transpose_to(mat2 m, mat2 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + glm_mat2_transp_sse2(m, dest); +#else + dest[0][0] = m[0][0]; + dest[0][1] = m[1][0]; + dest[1][0] = m[0][1]; + dest[1][1] = m[1][1]; +#endif +} + +/*! + * @brief tranpose mat3 and store result in same matrix + * + * @param[in, out] m source and dest + */ +CGLM_INLINE +void +glm_mat2_transpose(mat2 m) { + float tmp; + tmp = m[0][1]; + m[0][1] = m[1][0]; + m[1][0] = tmp; +} +#endif /* cglm_mat2_h */ diff --git a/include/cglm/simd/sse2/mat2.h b/include/cglm/simd/sse2/mat2.h new file mode 100644 index 0000000..b3b4d97 --- /dev/null +++ b/include/cglm/simd/sse2/mat2.h @@ -0,0 +1,45 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_mat2_sse_h +#define cglm_mat2_sse_h +#if defined( __SSE__ ) || defined( __SSE2__ ) + +#include "../../common.h" +#include "../intrin.h" + +CGLM_INLINE +void +glm_mat2_mul_sse2(mat2 m1, mat2 m2, mat2 dest) { + __m128 x0, x1, x2; + + x1 = glmm_load(m1[0]); /* d c b a */ + x2 = glmm_load(m2[0]); /* h g f e */ + + /* + dest[0][0] = a * e + c * f; + dest[0][1] = b * e + d * f; + dest[1][0] = a * g + c * h; + dest[1][1] = b * g + d * h; + */ + x0 = _mm_mul_ps(_mm_movelh_ps(x1, x1), glmm_shuff1(x2, 2, 2, 0, 0)); + x1 = _mm_mul_ps(_mm_movehl_ps(x1, x1), glmm_shuff1(x2, 3, 3, 1, 1)); + x1 = _mm_add_ps(x0, x1); + + glmm_store(dest[0], x1); +} + +CGLM_INLINE +void +glm_mat2_transp_sse2(mat2 m, mat2 dest) { + /* d c b a */ + /* d b c a */ + glmm_store(dest[0], glmm_shuff1(glmm_load(m[0]), 3, 1, 2, 0)); +} + +#endif +#endif /* cglm_mat2_sse_h */ diff --git a/include/cglm/types.h b/include/cglm/types.h index 1190b73..13a315d 100644 --- a/include/cglm/types.h +++ b/include/cglm/types.h @@ -38,6 +38,8 @@ typedef int ivec3[3]; typedef CGLM_ALIGN_IF(16) float vec4[4]; typedef vec4 versor; typedef vec3 mat3[3]; +// typedef vec4 mat2; +typedef CGLM_ALIGN_IF(16) vec2 mat2[2]; #ifdef __AVX__ typedef CGLM_ALIGN_IF(32) vec4 mat4[4];