diff --git a/.vscode/settings.json b/.vscode/settings.json index 5b62438..c6af4c0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,5 @@ { "C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools", - "restructuredtext.confPath": "${workspaceFolder}/docs/source" + "restructuredtext.confPath": "${workspaceFolder}/docs/source", + "workbench.colorTheme": "Visual Studio Dark - C++" } \ No newline at end of file diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index 50cec46..8ba5494 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -12,6 +12,8 @@ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || defined(__aarch64__) # define CGLM_ARM64 1 +#else +# define CGLM_ARM64 0 #endif #define glmm_load(p) vld1q_f32(p) @@ -38,6 +40,18 @@ #define glmm_combine_lh(x, y) vcombine_f32(vget_low_f32(x), vget_high_f32(y)) #define glmm_combine_hh(x, y) vcombine_f32(vget_high_f32(x), vget_high_f32(y)) +#if defined(_WIN32) && defined(_MSC_VER) +/* # define glmm_float32x4_init(x, y, z, w) { .n128_f32 = { x, y, z, w } } */ +CGLM_INLINE +float32x4_t +glmm_float32x4_init(float x, float y, float z, float w) { + CGLM_ALIGN(16) float v[4] = {x, y, z, w}; + return vld1q_f32(v); +} +#else +# define glmm_float32x4_init(x, y, z, w) { x, y, z, w } +#endif + static inline float32x4_t glmm_abs(float32x4_t v) { diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h index a6ca5b0..bfdc94e 100644 --- a/include/cglm/simd/intrin.h +++ b/include/cglm/simd/intrin.h @@ -17,7 +17,7 @@ # ifndef __SSE__ # define __SSE__ # endif -# endif +#endif /* do not use alignment for older visual studio versions */ # if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */ # define CGLM_ALL_UNALIGNED @@ -63,17 +63,43 @@ #endif /* ARM Neon */ -#if defined(__ARM_NEON) -# include -# if defined(__ARM_NEON_FP) -# define CGLM_NEON_FP 1 +#if defined(_WIN32) +/* TODO: non-ARM stuff already inported, will this be better option */ +/* # include */ + +# if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) +# include +# include +# ifndef CGLM_NEON_FP +# define CGLM_NEON_FP 1 +# endif +# ifndef CGLM_SIMD_ARM +# define CGLM_SIMD_ARM +# endif +# elif defined(_M_ARM) +# include +# include +# ifndef CGLM_NEON_FP +# define CGLM_NEON_FP 1 +# endif +# ifndef CGLM_SIMD_ARM +# define CGLM_SIMD_ARM +# endif +# endif + +#else /* non-windows */ +# if defined(__ARM_NEON) || defined(__ARM_NEON__) +# include +# if defined(__ARM_NEON_FP) +# define CGLM_NEON_FP 1 +# endif # ifndef CGLM_SIMD_ARM # define CGLM_SIMD_ARM # endif # endif #endif -#if defined(CGLM_SIMD_x86) || defined(CGLM_NEON_FP) +#if defined(CGLM_SIMD_x86) || defined(CGLM_SIMD_ARM) # ifndef CGLM_SIMD # define CGLM_SIMD # endif diff --git a/include/cglm/simd/neon/affine.h b/include/cglm/simd/neon/affine.h index da0a350..b0a65a6 100644 --- a/include/cglm/simd/neon/affine.h +++ b/include/cglm/simd/neon/affine.h @@ -7,7 +7,7 @@ #ifndef cglm_affine_neon_h #define cglm_affine_neon_h -#if defined(__ARM_NEON_FP) +#if defined(CGLM_NEON_FP) #include "../../common.h" #include "../intrin.h" diff --git a/include/cglm/simd/neon/mat2.h b/include/cglm/simd/neon/mat2.h index 471ebea..7d0d9eb 100644 --- a/include/cglm/simd/neon/mat2.h +++ b/include/cglm/simd/neon/mat2.h @@ -7,7 +7,7 @@ #ifndef cglm_mat2_neon_h #define cglm_mat2_neon_h -#if defined(__ARM_NEON_FP) +#if defined(CGLM_NEON_FP) #include "../../common.h" #include "../intrin.h" diff --git a/include/cglm/simd/neon/mat4.h b/include/cglm/simd/neon/mat4.h index 5b9f014..2d1184e 100644 --- a/include/cglm/simd/neon/mat4.h +++ b/include/cglm/simd/neon/mat4.h @@ -7,7 +7,7 @@ #ifndef cglm_mat4_neon_h #define cglm_mat4_neon_h -#if defined(__ARM_NEON_FP) +#if defined(CGLM_NEON_FP) #include "../../common.h" #include "../intrin.h" @@ -108,7 +108,7 @@ glm_mat4_det_neon(mat4 mat) { float32x4_t r0, r1, r2, r3, x0, x1, x2; float32x2_t ij, op, mn, kl, nn, mm, jj, ii, gh, ef, t12, t34; float32x4x2_t a1; - float32x4_t x3 = { 0.f, -0.f, 0.f, -0.f }; + float32x4_t x3 = glmm_float32x4_init(0.f, -0.f, 0.f, -0.f); /* 127 <- 0, [square] det(A) = det(At) */ r0 = glmm_load(mat[0]); /* d c b a */ @@ -181,7 +181,7 @@ glm_mat4_inv_neon(mat4 mat, mat4 dest) { x0, x1, x2, x3, x4, x5, x6, x7, x8; float32x4x2_t a1; float32x2_t lp, ko, hg, jn, im, fe, ae, bf, cg, dh; - float32x4_t x9 = { -0.f, 0.f, -0.f, 0.f }; + float32x4_t x9 = glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f); x8 = vrev64q_f32(x9); diff --git a/include/cglm/simd/neon/quat.h b/include/cglm/simd/neon/quat.h index f6b9e99..fbaf390 100644 --- a/include/cglm/simd/neon/quat.h +++ b/include/cglm/simd/neon/quat.h @@ -7,7 +7,7 @@ #ifndef cglm_quat_neon_h #define cglm_quat_neon_h -#if defined(__ARM_NEON_FP) +#if defined(CGLM_NEON_FP) #include "../../common.h" #include "../intrin.h" @@ -23,7 +23,7 @@ glm_quat_mul_neon(versor p, versor q, versor dest) { */ glmm_128 xp, xq, xqr, r, x, y, z, s2, s3; - glmm_128 s1 = {-0.f, 0.f, 0.f, -0.f}; + glmm_128 s1 = glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f); float32x2_t qh, ql; xp = glmm_load(p); /* 3 2 1 0 */ diff --git a/win/cglm-test.vcxproj b/win/cglm-test.vcxproj index e38d77d..97a3eda 100644 --- a/win/cglm-test.vcxproj +++ b/win/cglm-test.vcxproj @@ -1,10 +1,34 @@ + + Debug + ARM + + + Debug + ARM64 + + + Debug + ARM64EC + Debug Win32 + + Release + ARM + + + Release + ARM64 + + + Release + ARM64EC + Release Win32 @@ -86,6 +110,24 @@ v142 Unicode + + Application + true + v142 + Unicode + + + Application + true + v142 + Unicode + + + Application + true + v142 + Unicode + Application false @@ -93,6 +135,27 @@ true Unicode + + Application + false + v142 + true + Unicode + + + Application + false + v142 + true + Unicode + + + Application + false + v142 + true + Unicode + @@ -107,19 +170,55 @@ + + + + + + + + + + + + + + + + + + false + + false + + + false + + + false + true true + + true + + + true + + + true + false @@ -144,6 +243,69 @@ %(AdditionalDependencies) + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ../include;%(AdditionalIncludeDirectories) + + + Console + true + true + true + %(AdditionalDependencies) + + + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ../include;%(AdditionalIncludeDirectories) + + + Console + true + true + true + %(AdditionalDependencies) + + + + + + + Level3 + MaxSpeed + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ../include;%(AdditionalIncludeDirectories) + + + Console + true + true + true + %(AdditionalDependencies) + + @@ -180,6 +342,60 @@ %(AdditionalDependencies) + + + + + Level3 + Disabled + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ../include;%(AdditionalIncludeDirectories) + Default + + + Console + true + %(AdditionalDependencies) + + + + + + + Level3 + Disabled + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ../include;%(AdditionalIncludeDirectories) + Default + + + Console + true + %(AdditionalDependencies) + + + + + + + Level3 + Disabled + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ../include;%(AdditionalIncludeDirectories) + Default + + + Console + true + %(AdditionalDependencies) + + diff --git a/win/cglm.sln b/win/cglm.sln index 04f08b4..22f929b 100644 --- a/win/cglm.sln +++ b/win/cglm.sln @@ -9,24 +9,54 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cglm-test", "cglm-test.vcxp EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM = Debug|ARM + Debug|ARM64 = Debug|ARM64 + Debug|ARM64EC = Debug|ARM64EC Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 + Release|ARM = Release|ARM + Release|ARM64 = Release|ARM64 + Release|ARM64EC = Release|ARM64EC Release|x64 = Release|x64 Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM.ActiveCfg = Debug|ARM + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM.Build.0 = Debug|ARM + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64.Build.0 = Debug|ARM64 + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64EC.Build.0 = Debug|ARM64EC {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x64.ActiveCfg = Debug|x64 {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x64.Build.0 = Debug|x64 {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x86.ActiveCfg = Debug|Win32 {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x86.Build.0 = Debug|Win32 + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM.ActiveCfg = Release|ARM + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM.Build.0 = Release|ARM + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64.ActiveCfg = Release|ARM64 + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64.Build.0 = Release|ARM64 + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64EC.ActiveCfg = Release|ARM64EC + {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64EC.Build.0 = Release|ARM64EC {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x64.ActiveCfg = Release|x64 {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x64.Build.0 = Release|x64 {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x86.ActiveCfg = Release|Win32 {CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x86.Build.0 = Release|Win32 + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM.ActiveCfg = Debug|ARM + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM.Build.0 = Debug|ARM + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64.Build.0 = Debug|ARM64 + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64EC.Build.0 = Debug|ARM64EC {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x64.ActiveCfg = Debug|x64 {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x64.Build.0 = Debug|x64 {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x86.ActiveCfg = Debug|Win32 {200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x86.Build.0 = Debug|Win32 + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM.ActiveCfg = Release|ARM + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM.Build.0 = Release|ARM + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64.ActiveCfg = Release|ARM64 + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64.Build.0 = Release|ARM64 + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64EC.ActiveCfg = Release|ARM64EC + {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64EC.Build.0 = Release|ARM64EC {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|x64.ActiveCfg = Release|x64 {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|x64.Build.0 = Release|x64 {200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|x86.ActiveCfg = Release|Win32 diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj index 60c4af0..a0a9282 100644 --- a/win/cglm.vcxproj +++ b/win/cglm.vcxproj @@ -1,10 +1,34 @@ - + + + Debug + ARM + + + Debug + ARM64 + + + Debug + ARM64EC + Debug Win32 + + Release + ARM + + + Release + ARM64 + + + Release + ARM64EC + Release Win32 @@ -235,6 +259,24 @@ v141 Unicode + + DynamicLibrary + true + v141 + Unicode + + + DynamicLibrary + true + v141 + Unicode + + + DynamicLibrary + true + v141 + Unicode + DynamicLibrary false @@ -242,6 +284,27 @@ true Unicode + + DynamicLibrary + false + v141 + true + Unicode + + + DynamicLibrary + false + v141 + true + Unicode + + + DynamicLibrary + false + v141 + true + Unicode + @@ -256,9 +319,27 @@ + + + + + + + + + + + + + + + + + + true @@ -269,6 +350,18 @@ NativeRecommendedRules.ruleset true + + NativeRecommendedRules.ruleset + true + + + NativeRecommendedRules.ruleset + true + + + NativeRecommendedRules.ruleset + true + false NativeRecommendedRules.ruleset @@ -279,6 +372,21 @@ NativeRecommendedRules.ruleset true + + false + NativeRecommendedRules.ruleset + true + + + false + NativeRecommendedRules.ruleset + true + + + false + NativeRecommendedRules.ruleset + true + NotUsing @@ -314,6 +422,63 @@ Windows + + + Level3 + MaxSpeed + _DEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions) + AnySuitable + true + true + + + CompileAsC + None + Default + true + + + Windows + + + + + Level3 + MaxSpeed + _DEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions) + AnySuitable + true + true + + + CompileAsC + None + Default + true + + + Windows + + + + + Level3 + MaxSpeed + _DEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions) + AnySuitable + true + true + + + CompileAsC + None + Default + true + + + Windows + + Level3 @@ -354,7 +519,70 @@ true + + + Level3 + NotUsing + MaxSpeed + true + true + NDEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions) + None + + + CompileAsC + true + Default + + + Windows + true + true + + + + + Level3 + NotUsing + MaxSpeed + true + true + NDEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions) + None + + + CompileAsC + true + Default + + + Windows + true + true + + + + + Level3 + NotUsing + MaxSpeed + true + true + NDEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions) + None + + + CompileAsC + true + Default + + + Windows + true + true + + - \ No newline at end of file +