From 8a117017ea8094ba84188e321245ba295ecac559 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Tue, 7 Mar 2023 13:11:08 +0300
Subject: [PATCH 1/8] fix building ARM NEON on windows & msvc

---
 .vscode/settings.json         |   3 +-
 include/cglm/simd/arm.h       |   6 +
 include/cglm/simd/intrin.h    |   8 +-
 include/cglm/simd/neon/mat4.h |   4 +-
 include/cglm/simd/neon/quat.h |   2 +-
 win/cglm-test.vcxproj         | 224 +++++++++++++++++++++++++++++++-
 win/cglm.sln                  |  34 ++++-
 win/cglm.vcxproj              | 234 +++++++++++++++++++++++++++++++++-
 8 files changed, 501 insertions(+), 14 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 5b62438..c6af4c0 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,5 @@
 {
   "C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools",
-  "restructuredtext.confPath": "${workspaceFolder}/docs/source"
+  "restructuredtext.confPath": "${workspaceFolder}/docs/source",
+  "workbench.colorTheme": "Visual Studio Dark - C++"
 }
\ No newline at end of file
diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h
index 50cec46..fdb13f0 100644
--- a/include/cglm/simd/arm.h
+++ b/include/cglm/simd/arm.h
@@ -38,6 +38,12 @@
 #define glmm_combine_lh(x, y) vcombine_f32(vget_low_f32(x),  vget_high_f32(y))
 #define glmm_combine_hh(x, y) vcombine_f32(vget_high_f32(x), vget_high_f32(y))
 
+#if defined(_WIN32) && defined(_MSC_VER)
+#  define glmm_float32x4_init(x, y, z, w) { .n128_f32 = { x, y, z, w } }
+#else
+#  define glmm_float32x4_init(x, y, z, w) { x, y, z, w }
+#endif
+
 static inline
 float32x4_t
 glmm_abs(float32x4_t v) {
diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h
index a6ca5b0..8f2fd55 100644
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -17,7 +17,13 @@
 #    ifndef __SSE__
 #      define __SSE__
 #    endif
+#  elif defined(_M_ARM64)
+#  ifndef __ARM_NEON
+/* TODO: is this valid */
+#    define __ARM_NEON
+#    define __ARM_NEON_FP
 #  endif
+#endif
 /* do not use alignment for older visual studio versions */
 #  if _MSC_VER < 1913     /* Visual Studio 2017 version 15.6 */
 #    define CGLM_ALL_UNALIGNED
@@ -63,7 +69,7 @@
 #endif
 
 /* ARM Neon */
-#if defined(__ARM_NEON)
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
 #  include <arm_neon.h>
 #  if defined(__ARM_NEON_FP)
 #    define CGLM_NEON_FP 1
diff --git a/include/cglm/simd/neon/mat4.h b/include/cglm/simd/neon/mat4.h
index 5b9f014..6691c4f 100644
--- a/include/cglm/simd/neon/mat4.h
+++ b/include/cglm/simd/neon/mat4.h
@@ -108,7 +108,7 @@ glm_mat4_det_neon(mat4 mat) {
   float32x4_t   r0, r1, r2, r3, x0, x1, x2;
   float32x2_t   ij, op, mn, kl, nn, mm, jj, ii, gh, ef, t12, t34;
   float32x4x2_t a1;
-  float32x4_t   x3 = { 0.f, -0.f, 0.f, -0.f };
+  float32x4_t   x3 = glmm_float32x4_init(0.f, -0.f, 0.f, -0.f);
 
   /* 127 <- 0, [square] det(A) = det(At) */
   r0 = glmm_load(mat[0]);              /* d c b a */
@@ -181,7 +181,7 @@ glm_mat4_inv_neon(mat4 mat, mat4 dest) {
                 x0, x1, x2, x3, x4, x5, x6, x7, x8;
   float32x4x2_t a1;
   float32x2_t   lp, ko, hg, jn, im, fe, ae, bf, cg, dh;
-  float32x4_t   x9 = { -0.f, 0.f, -0.f, 0.f };
+  float32x4_t   x9 = glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f);
 
   x8 = vrev64q_f32(x9);
 
diff --git a/include/cglm/simd/neon/quat.h b/include/cglm/simd/neon/quat.h
index f6b9e99..e5adf61 100644
--- a/include/cglm/simd/neon/quat.h
+++ b/include/cglm/simd/neon/quat.h
@@ -23,7 +23,7 @@ glm_quat_mul_neon(versor p, versor q, versor dest) {
    */
 
   glmm_128 xp, xq, xqr, r, x, y, z, s2, s3;
-  glmm_128 s1 = {-0.f, 0.f, 0.f, -0.f};
+  glmm_128 s1 = glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f);
   float32x2_t   qh, ql;
   
   xp  = glmm_load(p); /* 3 2 1 0 */
diff --git a/win/cglm-test.vcxproj b/win/cglm-test.vcxproj
index e38d77d..bdcdf8a 100644
--- a/win/cglm-test.vcxproj
+++ b/win/cglm-test.vcxproj
@@ -1,10 +1,34 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM">
+      <Configuration>Release</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -70,26 +94,65 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
@@ -107,19 +170,55 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
   </PropertyGroup>
@@ -144,6 +243,69 @@
       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -180,6 +342,60 @@
       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <PrecompiledHeader>
diff --git a/win/cglm.sln b/win/cglm.sln
index 04f08b4..b34e272 100644
--- a/win/cglm.sln
+++ b/win/cglm.sln
@@ -1,7 +1,7 @@
 ﻿
 Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.29123.88
+# Visual Studio Version 17
+VisualStudioVersion = 17.6.33417.168
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cglm", "cglm.vcxproj", "{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}"
 EndProject
@@ -9,24 +9,54 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cglm-test", "cglm-test.vcxp
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|ARM = Debug|ARM
+		Debug|ARM64 = Debug|ARM64
+		Debug|ARM64EC = Debug|ARM64EC
 		Debug|x64 = Debug|x64
 		Debug|x86 = Debug|x86
+		Release|ARM = Release|ARM
+		Release|ARM64 = Release|ARM64
+		Release|ARM64EC = Release|ARM64EC
 		Release|x64 = Release|x64
 		Release|x86 = Release|x86
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM.ActiveCfg = Debug|ARM
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM.Build.0 = Debug|ARM
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64.Build.0 = Debug|ARM64
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x64.ActiveCfg = Debug|x64
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x64.Build.0 = Debug|x64
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x86.ActiveCfg = Debug|Win32
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Debug|x86.Build.0 = Debug|Win32
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM.ActiveCfg = Release|ARM
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM.Build.0 = Release|ARM
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64.ActiveCfg = Release|ARM64
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64.Build.0 = Release|ARM64
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|ARM64EC.Build.0 = Release|ARM64EC
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x64.ActiveCfg = Release|x64
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x64.Build.0 = Release|x64
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x86.ActiveCfg = Release|Win32
 		{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}.Release|x86.Build.0 = Release|Win32
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM.ActiveCfg = Debug|ARM
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM.Build.0 = Debug|ARM
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64.Build.0 = Debug|ARM64
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x64.ActiveCfg = Debug|x64
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x64.Build.0 = Debug|x64
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x86.ActiveCfg = Debug|Win32
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Debug|x86.Build.0 = Debug|Win32
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM.ActiveCfg = Release|ARM
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM.Build.0 = Release|ARM
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64.ActiveCfg = Release|ARM64
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64.Build.0 = Release|ARM64
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|ARM64EC.Build.0 = Release|ARM64EC
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|x64.ActiveCfg = Release|x64
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|x64.Build.0 = Release|x64
 		{200E0DF1-7532-44E6-8273-84FB92C5557E}.Release|x86.ActiveCfg = Release|Win32
diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj
index 60c4af0..fea6218 100644
--- a/win/cglm.vcxproj
+++ b/win/cglm.vcxproj
@@ -1,10 +1,34 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM">
+      <Configuration>Release</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -213,7 +237,7 @@
     <ProjectGuid>{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>cglm</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -235,6 +259,24 @@
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -242,6 +284,27 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -256,9 +319,27 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
@@ -269,6 +350,18 @@
     <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
     <RunCodeAnalysis>true</RunCodeAnalysis>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
@@ -279,6 +372,21 @@
     <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
     <RunCodeAnalysis>true</RunCodeAnalysis>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <LinkIncremental>false</LinkIncremental>
+    <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -314,6 +422,63 @@
       <SubSystem>Windows</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <CompileAs>CompileAsC</CompileAs>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <EnablePREfast>true</EnablePREfast>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <CompileAs>CompileAsC</CompileAs>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <EnablePREfast>true</EnablePREfast>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <CompileAs>CompileAsC</CompileAs>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <EnablePREfast>true</EnablePREfast>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -354,7 +519,70 @@
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <CompileAs>CompileAsC</CompileAs>
+      <EnablePREfast>true</EnablePREfast>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <CompileAs>CompileAsC</CompileAs>
+      <EnablePREfast>true</EnablePREfast>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_WINDOWS;_USRDLL;CGLM_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DebugInformationFormat>None</DebugInformationFormat>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <CompileAs>CompileAsC</CompileAs>
+      <EnablePREfast>true</EnablePREfast>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>

From a30baffafa0b5f0536098bedbe3896f434706512 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Thu, 9 Mar 2023 21:56:25 +0300
Subject: [PATCH 2/8] arm: update ARM/NEON macros

---
 include/cglm/simd/arm.h    |  4 ----
 include/cglm/simd/intrin.h | 20 ++++++++++++++------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h
index fdb13f0..676270c 100644
--- a/include/cglm/simd/arm.h
+++ b/include/cglm/simd/arm.h
@@ -10,10 +10,6 @@
 #include "intrin.h"
 #ifdef CGLM_SIMD_ARM
 
-#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || defined(__aarch64__)
-# define CGLM_ARM64 1
-#endif
-
 #define glmm_load(p)      vld1q_f32(p)
 #define glmm_store(p, a)  vst1q_f32(p, a)
 
diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h
index 8f2fd55..1d955d0 100644
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -69,13 +69,21 @@
 #endif
 
 /* ARM Neon */
-#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+/* TODO: check _M_ARM and compiling should work if there is no ARM64 and NEON */
+#if defined(__ARM_NEON) || defined(__ARM_NEON__) \
+    || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || defined(__aarch64__)
 #  include <arm_neon.h>
-#  if defined(__ARM_NEON_FP)
-#    define CGLM_NEON_FP 1
-#    ifndef CGLM_SIMD_ARM
-#      define CGLM_SIMD_ARM
-#    endif
+#  ifndef __ARM_NEON
+#    define __ARM_NEON
+#  endif
+#  ifndef __ARM_NEON_FP
+#    define __ARM_NEON_FP 1
+#  endif
+#  ifndef CGLM_ARM64
+#    define CGLM_ARM64 1
+#  endif
+#  ifndef CGLM_SIMD_ARM
+#    define CGLM_SIMD_ARM
 #  endif
 #endif
 

From 9ed325018d51775b337caaf91c29fd74106e0a28 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Thu, 9 Mar 2023 22:06:20 +0300
Subject: [PATCH 3/8] Update intrin.h

---
 include/cglm/simd/intrin.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h
index 1d955d0..f4c0fe9 100644
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -17,12 +17,6 @@
 #    ifndef __SSE__
 #      define __SSE__
 #    endif
-#  elif defined(_M_ARM64)
-#  ifndef __ARM_NEON
-/* TODO: is this valid */
-#    define __ARM_NEON
-#    define __ARM_NEON_FP
-#  endif
 #endif
 /* do not use alignment for older visual studio versions */
 #  if _MSC_VER < 1913     /* Visual Studio 2017 version 15.6 */

From 4a6fc485fd458e7f43d8f4868767501ccf42fb5c Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Thu, 9 Mar 2023 22:16:39 +0300
Subject: [PATCH 4/8] use CGLM_SIMD_NEON instead of CGLM_NEON_FP

---
 include/cglm/affine-mat.h       |  8 +++----
 include/cglm/mat2.h             |  6 +++---
 include/cglm/mat4.h             | 20 ++++++++---------
 include/cglm/quat.h             |  4 ++--
 include/cglm/simd/intrin.h      |  6 +++++-
 include/cglm/simd/neon/affine.h |  2 +-
 include/cglm/simd/neon/mat2.h   |  2 +-
 include/cglm/simd/neon/mat4.h   |  2 +-
 include/cglm/simd/neon/quat.h   |  2 +-
 include/cglm/vec4-ext.h         |  2 +-
 include/cglm/vec4.h             | 38 ++++++++++++++++-----------------
 11 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/include/cglm/affine-mat.h b/include/cglm/affine-mat.h
index 75607e7..51b5742 100644
--- a/include/cglm/affine-mat.h
+++ b/include/cglm/affine-mat.h
@@ -26,7 +26,7 @@
 #  include "simd/avx/affine.h"
 #endif
 
-#ifdef CGLM_NEON_FP
+#ifdef CGLM_SIMD_NEON
 #  include "simd/neon/affine.h"
 #endif
 
@@ -53,7 +53,7 @@ glm_mul(mat4 m1, mat4 m2, mat4 dest) {
   glm_mul_avx(m1, m2, dest);
 #elif defined( __SSE__ ) || defined( __SSE2__ )
   glm_mul_sse2(m1, m2, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mul_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
@@ -109,7 +109,7 @@ void
 glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mul_rot_sse2(m1, m2, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mul_rot_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
@@ -158,7 +158,7 @@ void
 glm_inv_tr(mat4 mat) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_inv_tr_sse2(mat);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_inv_tr_neon(mat);
 #else
   CGLM_ALIGN_MAT mat3 r;
diff --git a/include/cglm/mat2.h b/include/cglm/mat2.h
index 871d6bd..f76382b 100644
--- a/include/cglm/mat2.h
+++ b/include/cglm/mat2.h
@@ -40,7 +40,7 @@
 #  include "simd/sse2/mat2.h"
 #endif
 
-#ifdef CGLM_NEON_FP
+#ifdef CGLM_SIMD_NEON
 #  include "simd/neon/mat2.h"
 #endif
 
@@ -134,7 +134,7 @@ void
 glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat2_mul_sse2(m1, m2, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat2_mul_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1],
@@ -224,7 +224,7 @@ void
 glm_mat2_scale(mat2 m, float s) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(m[0], _mm_mul_ps(_mm_loadu_ps(m[0]), _mm_set1_ps(s)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), vdupq_n_f32(s)));
 #else
   m[0][0] = m[0][0] * s;
diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h
index c7c8abd..b73c888 100644
--- a/include/cglm/mat4.h
+++ b/include/cglm/mat4.h
@@ -60,7 +60,7 @@
 #  include "simd/avx/mat4.h"
 #endif
 
-#ifdef CGLM_NEON_FP
+#ifdef CGLM_SIMD_NEON
 #  include "simd/neon/mat4.h"
 #endif
 
@@ -129,7 +129,7 @@ glm_mat4_copy(mat4 mat, mat4 dest) {
   glmm_store(dest[1], glmm_load(mat[1]));
   glmm_store(dest[2], glmm_load(mat[2]));
   glmm_store(dest[3], glmm_load(mat[3]));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest[0], vld1q_f32(mat[0]));
   vst1q_f32(dest[1], vld1q_f32(mat[1]));
   vst1q_f32(dest[2], vld1q_f32(mat[2]));
@@ -199,7 +199,7 @@ glm_mat4_zero(mat4 mat) {
   glmm_store(mat[1], x0);
   glmm_store(mat[2], x0);
   glmm_store(mat[3], x0);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glmm_128 x0;
   x0 = vdupq_n_f32(0.0f);
   vst1q_f32(mat[0], x0);
@@ -301,7 +301,7 @@ glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) {
   glm_mat4_mul_avx(m1, m2, dest);
 #elif defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_mul_sse2(m1, m2, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat4_mul_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
@@ -379,7 +379,7 @@ void
 glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_mulv_sse2(m, v, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat4_mulv_neon(m, v, dest);
 #else
   vec4 res;
@@ -499,7 +499,7 @@ void
 glm_mat4_transpose_to(mat4 m, mat4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_transp_sse2(m, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat4_transp_neon(m, dest);
 #else
   dest[0][0] = m[0][0]; dest[1][0] = m[0][1];
@@ -523,7 +523,7 @@ void
 glm_mat4_transpose(mat4 m) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_transp_sse2(m, m);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat4_transp_neon(m, m);
 #else
   mat4 d;
@@ -564,7 +564,7 @@ glm_mat4_scale(mat4 m, float s) {
   glm_mat4_scale_avx(m, s);
 #elif defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_scale_sse2(m, s);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat4_scale_neon(m, s);
 #else
   glm_mat4_scale_p(m, s);
@@ -583,7 +583,7 @@ float
 glm_mat4_det(mat4 mat) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   return glm_mat4_det_sse2(mat);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   return glm_mat4_det_neon(mat);
 #else
   /* [square] det(A) = det(At) */
@@ -618,7 +618,7 @@ void
 glm_mat4_inv(mat4 mat, mat4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_inv_sse2(mat, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_mat4_inv_neon(mat, dest);
 #else
   float t[6];
diff --git a/include/cglm/quat.h b/include/cglm/quat.h
index c76fa03..9488e23 100644
--- a/include/cglm/quat.h
+++ b/include/cglm/quat.h
@@ -66,7 +66,7 @@
 #  include "simd/sse2/quat.h"
 #endif
 
-#ifdef CGLM_NEON_FP
+#ifdef CGLM_SIMD_NEON
 #  include "simd/neon/quat.h"
 #endif
 
@@ -440,7 +440,7 @@ glm_quat_mul(versor p, versor q, versor dest) {
    */
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_quat_mul_sse2(p, q, dest);
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   glm_quat_mul_neon(p, q, dest);
 #else
   dest[0] = p[3] * q[0] + p[0] * q[3] + p[1] * q[2] - p[2] * q[1];
diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h
index f4c0fe9..73fb675 100644
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -72,6 +72,7 @@
 #  endif
 #  ifndef __ARM_NEON_FP
 #    define __ARM_NEON_FP 1
+#    define CGLM_NEON_FP 1
 #  endif
 #  ifndef CGLM_ARM64
 #    define CGLM_ARM64 1
@@ -79,9 +80,12 @@
 #  ifndef CGLM_SIMD_ARM
 #    define CGLM_SIMD_ARM
 #  endif
+#  ifndef CGLM_SIMD_NEON
+#    define CGLM_SIMD_NEON 1
+#  endif
 #endif
 
-#if defined(CGLM_SIMD_x86) || defined(CGLM_NEON_FP)
+#if defined(CGLM_SIMD_x86) || defined(CGLM_SIMD_NEON)
 #  ifndef CGLM_SIMD
 #    define CGLM_SIMD
 #  endif
diff --git a/include/cglm/simd/neon/affine.h b/include/cglm/simd/neon/affine.h
index da0a350..e55ea6f 100644
--- a/include/cglm/simd/neon/affine.h
+++ b/include/cglm/simd/neon/affine.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_affine_neon_h
 #define cglm_affine_neon_h
-#if defined(__ARM_NEON_FP)
+#if defined(CGLM_SIMD_NEON)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/simd/neon/mat2.h b/include/cglm/simd/neon/mat2.h
index 471ebea..d73e411 100644
--- a/include/cglm/simd/neon/mat2.h
+++ b/include/cglm/simd/neon/mat2.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_mat2_neon_h
 #define cglm_mat2_neon_h
-#if defined(__ARM_NEON_FP)
+#if defined(CGLM_SIMD_NEON)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/simd/neon/mat4.h b/include/cglm/simd/neon/mat4.h
index 6691c4f..e9f3f8a 100644
--- a/include/cglm/simd/neon/mat4.h
+++ b/include/cglm/simd/neon/mat4.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_mat4_neon_h
 #define cglm_mat4_neon_h
-#if defined(__ARM_NEON_FP)
+#if defined(CGLM_SIMD_NEON)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/simd/neon/quat.h b/include/cglm/simd/neon/quat.h
index e5adf61..f73988d 100644
--- a/include/cglm/simd/neon/quat.h
+++ b/include/cglm/simd/neon/quat.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_quat_neon_h
 #define cglm_quat_neon_h
-#if defined(__ARM_NEON_FP)
+#if defined(CGLM_SIMD_NEON)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/vec4-ext.h b/include/cglm/vec4-ext.h
index e4e20cb..43d7214 100644
--- a/include/cglm/vec4-ext.h
+++ b/include/cglm/vec4-ext.h
@@ -249,7 +249,7 @@ void
 glm_vec4_abs(vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, glmm_abs(glmm_load(v)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vabsq_f32(vld1q_f32(v)));
 #else
   dest[0] = fabsf(v[0]);
diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h
index 8e95ec5..2272556 100644
--- a/include/cglm/vec4.h
+++ b/include/cglm/vec4.h
@@ -139,7 +139,7 @@ void
 glm_vec4_copy(vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, glmm_load(v));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vld1q_f32(v));
 #else
   dest[0] = v[0];
@@ -176,7 +176,7 @@ void
 glm_vec4_zero(vec4 v) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(v, _mm_setzero_ps());
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(v, vdupq_n_f32(0.0f));
 #else
   v[0] = 0.0f;
@@ -196,7 +196,7 @@ void
 glm_vec4_one(vec4 v) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(v, _mm_set1_ps(1.0f));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(v, vdupq_n_f32(1.0f));
 #else
   v[0] = 1.0f;
@@ -322,7 +322,7 @@ void
 glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = a[0] + b[0];
@@ -344,7 +344,7 @@ void
 glm_vec4_adds(vec4 v, float s, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s)));
 #else
   dest[0] = v[0] + s;
@@ -366,7 +366,7 @@ void
 glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = a[0] - b[0];
@@ -388,7 +388,7 @@ void
 glm_vec4_subs(vec4 v, float s, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s)));
 #else
   dest[0] = v[0] - s;
@@ -410,7 +410,7 @@ void
 glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = a[0] * b[0];
@@ -432,7 +432,7 @@ void
 glm_vec4_scale(vec4 v, float s, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s)));
 #else
   dest[0] = v[0] * s;
@@ -516,7 +516,7 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_add_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vaddq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -544,7 +544,7 @@ glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_sub_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vsubq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -616,7 +616,7 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_max_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vmaxq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -644,7 +644,7 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_min_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vminq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -667,7 +667,7 @@ void
 glm_vec4_negate_to(vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vnegq_f32(vld1q_f32(v)));
 #else
   dest[0] = -v[0];
@@ -748,7 +748,7 @@ float
 glm_vec4_distance(vec4 a, vec4 b) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   return glmm_norm(_mm_sub_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   return glmm_norm(vsubq_f32(glmm_load(a), glmm_load(b)));
 #else
   return sqrtf(glm_pow2(a[0] - b[0])
@@ -770,7 +770,7 @@ float
 glm_vec4_distance2(vec4 a, vec4 b) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   return glmm_norm2(_mm_sub_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   return glmm_norm2(vsubq_f32(glmm_load(a), glmm_load(b)));
 #else
   return glm_pow2(a[0] - b[0])
@@ -792,7 +792,7 @@ void
 glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = glm_max(a[0], b[0]);
@@ -814,7 +814,7 @@ void
 glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = glm_min(a[0], b[0]);
@@ -837,7 +837,7 @@ glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
                            _mm_set1_ps(maxVal)));
-#elif defined(CGLM_NEON_FP)
+#elif defined(CGLM_SIMD_NEON)
   vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
                          vdupq_n_f32(maxVal)));
 #else

From ba993b3ea9937c4d5e6cefd193f29e7c2e85ba56 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Sat, 11 Mar 2023 14:15:30 +0300
Subject: [PATCH 5/8] arm: use intrin to set/init vec4 as @gottfriedleibniz
 suggests

---
 include/cglm/simd/arm.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h
index 676270c..01525d3 100644
--- a/include/cglm/simd/arm.h
+++ b/include/cglm/simd/arm.h
@@ -35,7 +35,13 @@
 #define glmm_combine_hh(x, y) vcombine_f32(vget_high_f32(x), vget_high_f32(y))
 
 #if defined(_WIN32) && defined(_MSC_VER)
-#  define glmm_float32x4_init(x, y, z, w) { .n128_f32 = { x, y, z, w } }
+/* #  define glmm_float32x4_init(x, y, z, w) { .n128_f32 = { x, y, z, w } } */
+CGLM_INLINE
+float32x4_t
+glmm_float32x4_init(float x, float y, float z, float w) {
+  CGLM_ALIGN(16) float v[4] = {x, y, z, w};
+  return vld1q_f32(v);
+}
 #else
 #  define glmm_float32x4_init(x, y, z, w) { x, y, z, w }
 #endif

From 13ed79a61a5d79831babeecf2570a43f407bb027 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Sun, 12 Mar 2023 16:43:47 +0300
Subject: [PATCH 6/8] arm: fix checking arm64

---
 include/cglm/affine-mat.h       |  8 ++---
 include/cglm/mat2.h             |  6 ++--
 include/cglm/mat4.h             | 20 ++++++-------
 include/cglm/quat.h             |  4 +--
 include/cglm/simd/arm.h         |  6 ++++
 include/cglm/simd/intrin.h      | 52 +++++++++++++++++++++------------
 include/cglm/simd/neon/affine.h |  2 +-
 include/cglm/simd/neon/mat2.h   |  2 +-
 include/cglm/simd/neon/mat4.h   |  2 +-
 include/cglm/simd/neon/quat.h   |  2 +-
 include/cglm/vec4-ext.h         |  2 +-
 include/cglm/vec4.h             | 38 ++++++++++++------------
 12 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/include/cglm/affine-mat.h b/include/cglm/affine-mat.h
index 51b5742..75607e7 100644
--- a/include/cglm/affine-mat.h
+++ b/include/cglm/affine-mat.h
@@ -26,7 +26,7 @@
 #  include "simd/avx/affine.h"
 #endif
 
-#ifdef CGLM_SIMD_NEON
+#ifdef CGLM_NEON_FP
 #  include "simd/neon/affine.h"
 #endif
 
@@ -53,7 +53,7 @@ glm_mul(mat4 m1, mat4 m2, mat4 dest) {
   glm_mul_avx(m1, m2, dest);
 #elif defined( __SSE__ ) || defined( __SSE2__ )
   glm_mul_sse2(m1, m2, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mul_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
@@ -109,7 +109,7 @@ void
 glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mul_rot_sse2(m1, m2, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mul_rot_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
@@ -158,7 +158,7 @@ void
 glm_inv_tr(mat4 mat) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_inv_tr_sse2(mat);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_inv_tr_neon(mat);
 #else
   CGLM_ALIGN_MAT mat3 r;
diff --git a/include/cglm/mat2.h b/include/cglm/mat2.h
index f76382b..871d6bd 100644
--- a/include/cglm/mat2.h
+++ b/include/cglm/mat2.h
@@ -40,7 +40,7 @@
 #  include "simd/sse2/mat2.h"
 #endif
 
-#ifdef CGLM_SIMD_NEON
+#ifdef CGLM_NEON_FP
 #  include "simd/neon/mat2.h"
 #endif
 
@@ -134,7 +134,7 @@ void
 glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat2_mul_sse2(m1, m2, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat2_mul_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1],
@@ -224,7 +224,7 @@ void
 glm_mat2_scale(mat2 m, float s) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(m[0], _mm_mul_ps(_mm_loadu_ps(m[0]), _mm_set1_ps(s)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), vdupq_n_f32(s)));
 #else
   m[0][0] = m[0][0] * s;
diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h
index b73c888..c7c8abd 100644
--- a/include/cglm/mat4.h
+++ b/include/cglm/mat4.h
@@ -60,7 +60,7 @@
 #  include "simd/avx/mat4.h"
 #endif
 
-#ifdef CGLM_SIMD_NEON
+#ifdef CGLM_NEON_FP
 #  include "simd/neon/mat4.h"
 #endif
 
@@ -129,7 +129,7 @@ glm_mat4_copy(mat4 mat, mat4 dest) {
   glmm_store(dest[1], glmm_load(mat[1]));
   glmm_store(dest[2], glmm_load(mat[2]));
   glmm_store(dest[3], glmm_load(mat[3]));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest[0], vld1q_f32(mat[0]));
   vst1q_f32(dest[1], vld1q_f32(mat[1]));
   vst1q_f32(dest[2], vld1q_f32(mat[2]));
@@ -199,7 +199,7 @@ glm_mat4_zero(mat4 mat) {
   glmm_store(mat[1], x0);
   glmm_store(mat[2], x0);
   glmm_store(mat[3], x0);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glmm_128 x0;
   x0 = vdupq_n_f32(0.0f);
   vst1q_f32(mat[0], x0);
@@ -301,7 +301,7 @@ glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) {
   glm_mat4_mul_avx(m1, m2, dest);
 #elif defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_mul_sse2(m1, m2, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat4_mul_neon(m1, m2, dest);
 #else
   float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
@@ -379,7 +379,7 @@ void
 glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_mulv_sse2(m, v, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat4_mulv_neon(m, v, dest);
 #else
   vec4 res;
@@ -499,7 +499,7 @@ void
 glm_mat4_transpose_to(mat4 m, mat4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_transp_sse2(m, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat4_transp_neon(m, dest);
 #else
   dest[0][0] = m[0][0]; dest[1][0] = m[0][1];
@@ -523,7 +523,7 @@ void
 glm_mat4_transpose(mat4 m) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_transp_sse2(m, m);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat4_transp_neon(m, m);
 #else
   mat4 d;
@@ -564,7 +564,7 @@ glm_mat4_scale(mat4 m, float s) {
   glm_mat4_scale_avx(m, s);
 #elif defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_scale_sse2(m, s);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat4_scale_neon(m, s);
 #else
   glm_mat4_scale_p(m, s);
@@ -583,7 +583,7 @@ float
 glm_mat4_det(mat4 mat) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   return glm_mat4_det_sse2(mat);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   return glm_mat4_det_neon(mat);
 #else
   /* [square] det(A) = det(At) */
@@ -618,7 +618,7 @@ void
 glm_mat4_inv(mat4 mat, mat4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_mat4_inv_sse2(mat, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_mat4_inv_neon(mat, dest);
 #else
   float t[6];
diff --git a/include/cglm/quat.h b/include/cglm/quat.h
index 9488e23..c76fa03 100644
--- a/include/cglm/quat.h
+++ b/include/cglm/quat.h
@@ -66,7 +66,7 @@
 #  include "simd/sse2/quat.h"
 #endif
 
-#ifdef CGLM_SIMD_NEON
+#ifdef CGLM_NEON_FP
 #  include "simd/neon/quat.h"
 #endif
 
@@ -440,7 +440,7 @@ glm_quat_mul(versor p, versor q, versor dest) {
    */
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glm_quat_mul_sse2(p, q, dest);
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   glm_quat_mul_neon(p, q, dest);
 #else
   dest[0] = p[3] * q[0] + p[0] * q[3] + p[1] * q[2] - p[2] * q[1];
diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h
index 01525d3..8ba5494 100644
--- a/include/cglm/simd/arm.h
+++ b/include/cglm/simd/arm.h
@@ -10,6 +10,12 @@
 #include "intrin.h"
 #ifdef CGLM_SIMD_ARM
 
+#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || defined(__aarch64__)
+# define CGLM_ARM64 1
+#else
+# define CGLM_ARM64 0
+#endif
+
 #define glmm_load(p)      vld1q_f32(p)
 #define glmm_store(p, a)  vst1q_f32(p, a)
 
diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h
index 73fb675..80ef95e 100644
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -63,29 +63,43 @@
 #endif
 
 /* ARM Neon */
-/* TODO: check _M_ARM and compiling should work if there is no ARM64 and NEON */
-#if defined(__ARM_NEON) || defined(__ARM_NEON__) \
-    || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || defined(__aarch64__)
-#  include <arm_neon.h>
-#  ifndef __ARM_NEON
-#    define __ARM_NEON
+#if defined(_WIN32)
+/* TODO: non-ARM stuff already inported, will this be better option */
+/* #  include <intrin.h> */
+
+#  if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)
+#    include <arm64intr.h>
+#    include <arm64_neon.h>
+#    ifndef CGLM_NEON_FP
+#      define CGLM_NEON_FP  1
+#    endif
+#    ifndef CGLM_SIMD_ARM
+#      define CGLM_SIMD_ARM
+#    endif
+#  elif defined(_M_ARM)
+#    include <armintr.h>
+#    include <arm_neon.h>
+#    if defined(CGLM_NEON_FP) && (defined(__ARM_NEON_FP) || defined(vaddq_f32)) /* vaddq_f32 is defined as macro, we pick it */
+#      define CGLM_NEON_FP 1
+#    endif
+#    ifndef CGLM_SIMD_ARM
+#      define CGLM_SIMD_ARM
+#    endif
 #  endif
-#  ifndef __ARM_NEON_FP
-#    define __ARM_NEON_FP 1
-#    define CGLM_NEON_FP 1
-#  endif
-#  ifndef CGLM_ARM64
-#    define CGLM_ARM64 1
-#  endif
-#  ifndef CGLM_SIMD_ARM
-#    define CGLM_SIMD_ARM
-#  endif
-#  ifndef CGLM_SIMD_NEON
-#    define CGLM_SIMD_NEON 1
+
+#else /* non-windows */
+#  if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#    include <arm_neon.h>
+#    if defined(__ARM_NEON_FP)
+#      define CGLM_NEON_FP 1
+#    endif
+#    ifndef CGLM_SIMD_ARM
+#      define CGLM_SIMD_ARM
+#    endif
 #  endif
 #endif
 
-#if defined(CGLM_SIMD_x86) || defined(CGLM_SIMD_NEON)
+#if defined(CGLM_SIMD_x86) || defined(CGLM_NEON_FP)
 #  ifndef CGLM_SIMD
 #    define CGLM_SIMD
 #  endif
diff --git a/include/cglm/simd/neon/affine.h b/include/cglm/simd/neon/affine.h
index e55ea6f..b0a65a6 100644
--- a/include/cglm/simd/neon/affine.h
+++ b/include/cglm/simd/neon/affine.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_affine_neon_h
 #define cglm_affine_neon_h
-#if defined(CGLM_SIMD_NEON)
+#if defined(CGLM_NEON_FP)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/simd/neon/mat2.h b/include/cglm/simd/neon/mat2.h
index d73e411..7d0d9eb 100644
--- a/include/cglm/simd/neon/mat2.h
+++ b/include/cglm/simd/neon/mat2.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_mat2_neon_h
 #define cglm_mat2_neon_h
-#if defined(CGLM_SIMD_NEON)
+#if defined(CGLM_NEON_FP)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/simd/neon/mat4.h b/include/cglm/simd/neon/mat4.h
index e9f3f8a..2d1184e 100644
--- a/include/cglm/simd/neon/mat4.h
+++ b/include/cglm/simd/neon/mat4.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_mat4_neon_h
 #define cglm_mat4_neon_h
-#if defined(CGLM_SIMD_NEON)
+#if defined(CGLM_NEON_FP)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/simd/neon/quat.h b/include/cglm/simd/neon/quat.h
index f73988d..fbaf390 100644
--- a/include/cglm/simd/neon/quat.h
+++ b/include/cglm/simd/neon/quat.h
@@ -7,7 +7,7 @@
 
 #ifndef cglm_quat_neon_h
 #define cglm_quat_neon_h
-#if defined(CGLM_SIMD_NEON)
+#if defined(CGLM_NEON_FP)
 
 #include "../../common.h"
 #include "../intrin.h"
diff --git a/include/cglm/vec4-ext.h b/include/cglm/vec4-ext.h
index 43d7214..e4e20cb 100644
--- a/include/cglm/vec4-ext.h
+++ b/include/cglm/vec4-ext.h
@@ -249,7 +249,7 @@ void
 glm_vec4_abs(vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, glmm_abs(glmm_load(v)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vabsq_f32(vld1q_f32(v)));
 #else
   dest[0] = fabsf(v[0]);
diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h
index 2272556..8e95ec5 100644
--- a/include/cglm/vec4.h
+++ b/include/cglm/vec4.h
@@ -139,7 +139,7 @@ void
 glm_vec4_copy(vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, glmm_load(v));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vld1q_f32(v));
 #else
   dest[0] = v[0];
@@ -176,7 +176,7 @@ void
 glm_vec4_zero(vec4 v) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(v, _mm_setzero_ps());
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(v, vdupq_n_f32(0.0f));
 #else
   v[0] = 0.0f;
@@ -196,7 +196,7 @@ void
 glm_vec4_one(vec4 v) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(v, _mm_set1_ps(1.0f));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(v, vdupq_n_f32(1.0f));
 #else
   v[0] = 1.0f;
@@ -322,7 +322,7 @@ void
 glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = a[0] + b[0];
@@ -344,7 +344,7 @@ void
 glm_vec4_adds(vec4 v, float s, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s)));
 #else
   dest[0] = v[0] + s;
@@ -366,7 +366,7 @@ void
 glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = a[0] - b[0];
@@ -388,7 +388,7 @@ void
 glm_vec4_subs(vec4 v, float s, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s)));
 #else
   dest[0] = v[0] - s;
@@ -410,7 +410,7 @@ void
 glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = a[0] * b[0];
@@ -432,7 +432,7 @@ void
 glm_vec4_scale(vec4 v, float s, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s)));
 #else
   dest[0] = v[0] * s;
@@ -516,7 +516,7 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_add_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vaddq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -544,7 +544,7 @@ glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_sub_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vsubq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -616,7 +616,7 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_max_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vmaxq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -644,7 +644,7 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
   glmm_store(dest, _mm_add_ps(glmm_load(dest),
                               _mm_min_ps(glmm_load(a),
                                          glmm_load(b))));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
                             vminq_f32(vld1q_f32(a),
                                       vld1q_f32(b))));
@@ -667,7 +667,7 @@ void
 glm_vec4_negate_to(vec4 v, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vnegq_f32(vld1q_f32(v)));
 #else
   dest[0] = -v[0];
@@ -748,7 +748,7 @@ float
 glm_vec4_distance(vec4 a, vec4 b) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   return glmm_norm(_mm_sub_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   return glmm_norm(vsubq_f32(glmm_load(a), glmm_load(b)));
 #else
   return sqrtf(glm_pow2(a[0] - b[0])
@@ -770,7 +770,7 @@ float
 glm_vec4_distance2(vec4 a, vec4 b) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   return glmm_norm2(_mm_sub_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   return glmm_norm2(vsubq_f32(glmm_load(a), glmm_load(b)));
 #else
   return glm_pow2(a[0] - b[0])
@@ -792,7 +792,7 @@ void
 glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = glm_max(a[0], b[0]);
@@ -814,7 +814,7 @@ void
 glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
 #else
   dest[0] = glm_min(a[0], b[0]);
@@ -837,7 +837,7 @@ glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
 #if defined( __SSE__ ) || defined( __SSE2__ )
   glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
                            _mm_set1_ps(maxVal)));
-#elif defined(CGLM_SIMD_NEON)
+#elif defined(CGLM_NEON_FP)
   vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
                          vdupq_n_f32(maxVal)));
 #else

From e276b5b4059266fea9911a4694c64eee8fc31515 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Tue, 14 Mar 2023 09:54:32 +0300
Subject: [PATCH 7/8] Update intrin.h

---
 include/cglm/simd/intrin.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/cglm/simd/intrin.h b/include/cglm/simd/intrin.h
index 80ef95e..bfdc94e 100644
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -79,7 +79,7 @@
 #  elif defined(_M_ARM)
 #    include <armintr.h>
 #    include <arm_neon.h>
-#    if defined(CGLM_NEON_FP) && (defined(__ARM_NEON_FP) || defined(vaddq_f32)) /* vaddq_f32 is defined as macro, we pick it */
+#    ifndef CGLM_NEON_FP
 #      define CGLM_NEON_FP 1
 #    endif
 #    ifndef CGLM_SIMD_ARM
@@ -99,7 +99,7 @@
 #  endif
 #endif
 
-#if defined(CGLM_SIMD_x86) || defined(CGLM_NEON_FP)
+#if defined(CGLM_SIMD_x86) || defined(CGLM_SIMD_ARM)
 #  ifndef CGLM_SIMD
 #    define CGLM_SIMD
 #  endif

From 77b4c5cffbe6db7c52ec35b46d726af72d65ff88 Mon Sep 17 00:00:00 2001
From: Recep Aslantas <info@recp.me>
Date: Thu, 16 Mar 2023 13:16:24 +0300
Subject: [PATCH 8/8] reset visual studio tool verisons

---
 win/cglm-test.vcxproj | 20 ++++++++++----------
 win/cglm.sln          |  4 ++--
 win/cglm.vcxproj      |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/win/cglm-test.vcxproj b/win/cglm-test.vcxproj
index bdcdf8a..97a3eda 100644
--- a/win/cglm-test.vcxproj
+++ b/win/cglm-test.vcxproj
@@ -94,65 +94,65 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
diff --git a/win/cglm.sln b/win/cglm.sln
index b34e272..22f929b 100644
--- a/win/cglm.sln
+++ b/win/cglm.sln
@@ -1,7 +1,7 @@
 ﻿
 Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.6.33417.168
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.29123.88
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cglm", "cglm.vcxproj", "{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}"
 EndProject
diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj
index fea6218..a0a9282 100644
--- a/win/cglm.vcxproj
+++ b/win/cglm.vcxproj
@@ -237,7 +237,7 @@
     <ProjectGuid>{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>cglm</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">