fix mat4_mulv and implement sse2 version

This commit is contained in:
Recep Aslantas
2017-01-24 23:07:14 +03:00
parent b677a026a2
commit 99c8aeab77
2 changed files with 29 additions and 4 deletions

View File

@@ -81,6 +81,25 @@ glm_mat4_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
}
CGLM_INLINE
void
glm_mat4_mulv_sse2(mat4 m, vec4 v, vec4 dest) {
__m128 x0, x1, x2;
x0 = _mm_load_ps(v);
x1 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
_mm_shuffle1_ps1(x0, 0)),
_mm_mul_ps(_mm_load_ps(m[1]),
_mm_shuffle1_ps1(x0, 1)));
x2 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
_mm_shuffle1_ps1(x0, 2)),
_mm_mul_ps(_mm_load_ps(m[3]),
_mm_shuffle1_ps1(x0, 3)));
_mm_store_ps(dest, _mm_add_ps(x1, x2));
}
CGLM_INLINE
float
glm_mat4_det_sse2(mat4 mat) {

View File

@@ -234,10 +234,16 @@ glm_mat4_mulN(mat4 * __restrict matrices[], int len, mat4 dest) {
CGLM_INLINE
void
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
dest[0] = m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2] + m[3][0] * v[3];
dest[1] = m[0][1] * v[0] + m[1][1] * v[1] + m[2][1] * v[2] + m[3][1] * v[3];
dest[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2] + m[3][2] * v[3];
dest[3] = m[0][3] * v[0] + m[1][3] * v[1] + m[2][3] * v[2] + m[3][3] * v[3];
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_mulv_sse2(m, v, dest);
#else
vec4 res;
res[0] = m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2] + m[3][0] * v[3];
res[1] = m[0][1] * v[0] + m[1][1] * v[1] + m[2][1] * v[2] + m[3][1] * v[3];
res[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2] + m[3][2] * v[3];
res[3] = m[0][3] * v[0] + m[1][3] * v[1] + m[2][3] * v[2] + m[3][3] * v[3];
glm_vec4_dup(res, dest);
#endif
}
/*!