From a85bf476837b3b7253b96b6aeb306e7f36a86f27 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 8 Oct 2016 14:15:31 +0300 Subject: [PATCH] mat3 multiplication --- include/cglm-intrin.h | 2 +- include/cglm-mat3-simd-sse2.h | 60 +++++++++++++++++++ include/cglm-mat3.h | 109 ++++++++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 include/cglm-mat3-simd-sse2.h create mode 100644 include/cglm-mat3.h diff --git a/include/cglm-intrin.h b/include/cglm-intrin.h index 959a57c..c3bc3a2 100644 --- a/include/cglm-intrin.h +++ b/include/cglm-intrin.h @@ -30,7 +30,7 @@ #define _mm_shuffle2_ps(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ _mm_shuffle1_ps(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \ - z1, y1, x1, w1); + z1, y1, x1, w1) #endif #endif /* cglm_intrin_h */ diff --git a/include/cglm-mat3-simd-sse2.h b/include/cglm-mat3-simd-sse2.h new file mode 100644 index 0000000..4c6e7f4 --- /dev/null +++ b/include/cglm-mat3-simd-sse2.h @@ -0,0 +1,60 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_mat3_sse_h +#define cglm_mat3_sse_h +#if defined( __SSE__ ) || defined( __SSE2__ ) + +#include "cglm-intrin.h" + +CGLM_INLINE +void +glm_mat3_mul_sse2(mat3 m1, mat3 m2, mat3 dest) { + __m128 l0, l1, l2; + __m128 r0, r1, r2; + __m128 x0, x1, x2; + + l0 = _mm_loadu_ps(m1[0]); + l1 = _mm_loadu_ps(&m1[1][1]); + l2 = _mm_set1_ps(m1[2][2]); + + r0 = _mm_loadu_ps(m2[0]); + r1 = _mm_loadu_ps(&m2[1][1]); + r2 = _mm_set1_ps(m2[2][2]); + + x1 = _mm_shuffle2_ps(l0, l1, 1, 0, 3, 3, 0, 3, 2, 0); + x2 = _mm_shuffle2_ps(l1, l2, 0, 0, 3, 2, 0, 2, 1, 0); + + x0 = _mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps(l0, 0, 2, 1, 0), + _mm_shuffle1_ps(r0, 3, 0, 0, 0)), + _mm_mul_ps(x1, + _mm_shuffle2_ps(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0))); + + x0 = _mm_add_ps(x0, + _mm_mul_ps(x2, + _mm_shuffle2_ps(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0))); + + _mm_storeu_ps(dest[0], x0); + + x0 = _mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps(l0, 1, 0, 2, 1), + _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 2, 3, 3))), + _mm_mul_ps(_mm_shuffle1_ps(x1, 1, 0, 2, 1), + _mm_shuffle1_ps(r1, 3, 3, 0, 0))); + + x0 = _mm_add_ps(x0, + _mm_mul_ps(_mm_shuffle1_ps(x2, 1, 0, 2, 1), + _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(0, 0, 1, 1)))); + + _mm_storeu_ps(&dest[1][1], x0); + + dest[2][2] = m1[0][2] * m2[2][0] + + m1[1][2] * m2[2][1] + + m1[2][2] * m2[2][2]; +} + +#endif +#endif /* cglm_mat3_sse_h */ diff --git a/include/cglm-mat3.h b/include/cglm-mat3.h new file mode 100644 index 0000000..f17f31a --- /dev/null +++ b/include/cglm-mat3.h @@ -0,0 +1,109 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#ifndef cglm_mat3_h +#define cglm_mat3_h + +#include "cglm.h" +#include "cglm-mat3-simd-sse2.h" + +#define GLM_MAT3_IDENTITY_INIT {{1.0f, 0.0f, 0.0f}, \ + {0.0f, 1.0f, 0.0f}, \ + {0.0f, 0.0f, 1.0f}} + +/* for C only */ +#define GLM_MAT3_IDENTITY (mat3)GLM_MAT3_IDENTITY_INIT + +/*! + * @brief copy all members of [mat] to [dest] + * + * @param[in] mat source + * @param[out] dest destination + */ +CGLM_INLINE +void +glm_mat3_dup(mat3 mat, mat3 dest) { + glm__memcpy(float, dest, mat, sizeof(mat3)); +} + +/*! + * @brief multiply m1 and m2 to dest + * + * m1, m2 and dest matrices can be same matrix, it is possible to write this: + * + * @code + * mat3 m = GLM_MAT3_IDENTITY_INIT; + * glm_mat3_mul(m, m, m); + * @endcode + * + * @param[in] m1 left matrix + * @param[in] m2 right matrix + * @param[out] dest destination matrix + */ +CGLM_INLINE +void +glm_mat3_mul(mat3 m1, mat3 m2, mat3 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + glm_mat3_mul_sse2(m1, m2, dest); +#else + float a00, a01, a02, b00, b01, b02, + a10, a11, a12, b10, b11, b12, + a20, a21, a22, b20, b21, b22; + + a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], + a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2], + a20 = m1[2][0], a21 = m1[2][1], a22 = m1[2][2], + + b00 = m2[0][0], b01 = m2[0][1], b02 = m2[0][2], + b10 = m2[1][0], b11 = m2[1][1], b12 = m2[1][2], + b20 = m2[2][0], b21 = m2[2][1], b22 = m2[2][2], + + dest[0][0] = a00 * b00 + a10 * b01 + a20 * b02; + dest[0][1] = a01 * b00 + a11 * b01 + a21 * b02; + dest[0][2] = a02 * b00 + a12 * b01 + a22 * b02; + dest[1][0] = a00 * b10 + a10 * b11 + a20 * b12; + + dest[1][1] = a01 * b10 + a11 * b11 + a21 * b12; + dest[1][2] = a02 * b10 + a12 * b11 + a22 * b12; + dest[2][0] = a00 * b20 + a10 * b21 + a20 * b22; + dest[2][1] = a01 * b20 + a11 * b21 + a21 * b22; + + dest[2][2] = a02 * b20 + a12 * b21 + a22 * b22; +#endif +} + +CGLM_INLINE +void +glm_mat3_print(mat3 matrix, + FILE * __restrict ostream) { + int i; + int j; + +#define m 3 +#define n 3 + + fprintf(ostream, "Matrix (float%dx%d):\n", m, n); + + for (i = 0; i < m; i++) { + fprintf(ostream, "\t|"); + for (j = 0; j < n; j++) { + fprintf(ostream, "%0.4f", matrix[j][i]);; + + if (j != n - 1) + fprintf(ostream, "\t"); + } + + fprintf(ostream, "|\n"); + } + + fprintf(ostream, "\n"); + +#undef m +#undef n +} + +#endif /* cglm_mat3_h */