diff --git a/GPatch/CopyBinaries.bat b/GPatch/CopyBinaries.bat
deleted file mode 100644
index 9b290f9..0000000
--- a/GPatch/CopyBinaries.bat
+++ /dev/null
@@ -1,19 +0,0 @@
-xcopy "%GIANTS_PATH%\gg_dx7r.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\gg_dx9r.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\gg_null.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\dedicated.exe" "Files\" /Y
-xcopy "%GIANTS_PATH%\Giants.exe" "Files\" /Y
-xcopy "%GIANTS_PATH%\GiantsMain.exe" "Files\" /Y
-xcopy "%GIANTS_PATH%\GiantsDedicated.exe" "Files\" /Y
-xcopy "%GIANTS_PATH%\gs_ds.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\Giants.WebApi.Clients.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\fmt.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\crashrpt_lang.ini" "Files\" /Y
-xcopy "%GIANTS_PATH%\CrashRpt1403.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\CrashSender1403.exe" "Files\" /Y
-xcopy "%GIANTS_PATH%\dbghelp.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\cpprest_2_10.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\Newtonsoft.Json.dll" "Files\" /Y
-xcopy "%GIANTS_PATH%\zlib1.dll" "Files\" /Y
-
-pause
\ No newline at end of file
diff --git a/GPatch/DotNetChecker.nsh b/GPatch/DotNetChecker.nsh
deleted file mode 100644
index abc84e5..0000000
--- a/GPatch/DotNetChecker.nsh
+++ /dev/null
@@ -1,127 +0,0 @@
-!macro CheckNetFramework FrameworkVersion
- Var /GLOBAL dotNetUrl${FrameworkVersion}
- Var /GLOBAL dotNetReadableVersion${FrameworkVersion}
-
- !ifndef DOTNET472_URL
- !define DOTNET472_URL "https://go.microsoft.com/fwlink/?LinkId=863265"
- !define DOTNET471_URL "https://go.microsoft.com/fwlink/?LinkId=852104"
- !define DOTNET47_URL "https://go.microsoft.com/fwlink/?LinkId=825302"
- !define DOTNET462_URL "https://go.microsoft.com/fwlink/?LinkId=780600"
- !define DOTNET461_URL "https://go.microsoft.com/fwlink/?LinkId=671743"
- !define DOTNET46_URL "https://go.microsoft.com/fwlink/?LinkId=528232"
- !define DOTNET452_URL "https://go.microsoft.com/fwlink/?LinkId=397708"
- !define DOTNET451_URL "https://go.microsoft.com/fwlink/?LinkId=322116"
- !define DOTNET45_URL "https://go.microsoft.com/fwlink/?LinkId=225702"
- !define DOTNET40Full_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=0a391abd-25c1-4fc0-919f-b21f31ab88b7&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2f9%2f5%2fA%2f95A9616B-7A37-4AF6-BC36-D6EA96C8DAAE%2fdotNetFx40_Full_x86_x64.exe"
- !define DOTNET40Client_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=e5ad0459-cbcc-4b4f-97b6-fb17111cf544&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2f5%2f6%2f2%2f562A10F9-C9F4-4313-A044-9C94E0A8FAC8%2fdotNetFx40_Client_x86_x64.exe"
- !define DOTNET35_URL "https://download.microsoft.com/download/2/0/e/20e90413-712f-438c-988e-fdaa79a8ac3d/dotnetfx35.exe"
- !define DOTNET30_URL "https://download.microsoft.com/download/2/0/e/20e90413-712f-438c-988e-fdaa79a8ac3d/dotnetfx35.exe"
- !define DOTNET20_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=0856eacb-4362-4b0d-8edd-aab15c5e04f5&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2f5%2f6%2f7%2f567758a3-759e-473e-bf8f-52154438565a%2fdotnetfx.exe"
- !define DOTNET11_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=262d25e3-f589-4842-8157-034d1e7cf3a3&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2fa%2fa%2fc%2faac39226-8825-44ce-90e3-bf8203e74006%2fdotnetfx.exe"
- !define DOTNET10_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=262d25e3-f589-4842-8157-034d1e7cf3a3&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2fa%2fa%2fc%2faac39226-8825-44ce-90e3-bf8203e74006%2fdotnetfx.exe"
- !endif
-
- ${If} ${FrameworkVersion} == "472"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET472_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.7.2"
- ${ElseIf} ${FrameworkVersion} == "471"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET471_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.7.1"
- ${ElseIf} ${FrameworkVersion} == "47"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET47_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.7"
- ${ElseIf} ${FrameworkVersion} == "462"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET462_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.6.2"
- ${ElseIf} ${FrameworkVersion} == "461"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET461_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.6.1"
- ${ElseIf} ${FrameworkVersion} == "46"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET46_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.6"
- ${ElseIf} ${FrameworkVersion} == "452"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET452_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.52"
- ${ElseIf} ${FrameworkVersion} == "451"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET451_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.51"
- ${ElseIf} ${FrameworkVersion} == "45"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET45_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.5"
- ${ElseIf} ${FrameworkVersion} == "40Full"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET40Full_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.0 Full"
- ${ElseIf} ${FrameworkVersion} == "40Client"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET40Client_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "4.0 Client"
- ${ElseIf} ${FrameworkVersion} == "35"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET35_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "3.5"
- ${ElseIf} ${FrameworkVersion} == "30"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET30_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "3.0"
- ${ElseIf} ${FrameworkVersion} == "20"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET20_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "2.0"
- ${ElseIf} ${FrameworkVersion} == "11"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET11_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "1.1"
- ${ElseIf} ${FrameworkVersion} == "10"
- StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET10_URL}
- StrCpy $dotNetReadableVersion${FrameworkVersion} "1.0"
- ${EndIf}
-
- DetailPrint "Checking .NET Framework version..."
-
- Push $0
- Push $1
- Push $2
- Push $3
- Push $4
- Push $5
- Push $6
- Push $7
-
- DotNetChecker::IsDotNet${FrameworkVersion}Installed
- Pop $0
-
- ${If} $0 == "false"
- ${OrIf} $0 == "f" ; if script is compiled in ANSI mode then we get only an "f" https://github.com/ReVolly/NsisDotNetChecker/issues/4
- DetailPrint ".NET Framework $dotNetReadableVersion${FrameworkVersion} not found, download is required for program to run."
- Goto NoDotNET${FrameworkVersion}
- ${Else}
- DetailPrint ".NET Framework $dotNetReadableVersion${FrameworkVersion} found, no need to install."
- Goto NewDotNET${FrameworkVersion}
- ${EndIf}
-
-NoDotNET${FrameworkVersion}:
- MessageBox MB_YESNOCANCEL|MB_ICONEXCLAMATION \
- ".NET Framework not installed. Required version: $dotNetReadableVersion${FrameworkVersion}.$\nInstall now?" \
- /SD IDYES IDYES InstallDotNET${FrameworkVersion} IDNO NewDotNET${FrameworkVersion}
- goto GiveUpDotNET${FrameworkVersion} ;IDCANCEL
-
-InstallDotNET${FrameworkVersion}:
- DetailPrint "Starting Microsoft .NET Framework v${NETVersion} Setup..."
- ExecWait "$TEMP\${NETInstallerFileName}"
-
- DetailPrint "Completed .NET Framework install/update. Removing .NET Framework installer."
- Delete "$TEMP\${NETInstallerFileName}"
-
- DetailPrint ".NET Framework installer removed."
- goto NewDotNet${FrameworkVersion}
-
-GiveUpDotNET${FrameworkVersion}:
- Abort "Installation canceled by user."
-
-NewDotNET${FrameworkVersion}:
- DetailPrint "Proceeding with remainder of installation."
- Pop $7
- Pop $6
- Pop $5
- Pop $4
- Pop $3
- Pop $2
- Pop $1
- Pop $0
-
-!macroend
diff --git a/GPatch/GPatch.ico b/GPatch/GPatch.ico
deleted file mode 100644
index 012e6a8..0000000
Binary files a/GPatch/GPatch.ico and /dev/null differ
diff --git a/GPatch/GPatch.nsi b/GPatch/GPatch.nsi
deleted file mode 100644
index d5d9199..0000000
--- a/GPatch/GPatch.nsi
+++ /dev/null
@@ -1,128 +0,0 @@
-Unicode True
-SetCompressor /SOLID zlib ; LZMA compresses about 20% better but is more likely to trigger AV false positives
-
-!define PRODUCT_NAME "Giants: Citizen Kabuto"
-!define PRODUCT_VERSION "1.499"
-
-; MUI 1.67 compatible ------
-!include "MUI2.nsh"
-!include "DotNetChecker.nsh"
-
-; MUI Settings
-!define MUI_ABORTWARNING
-!define MUI_ICON "GPatch.ico"
-
-; Welcome page
-; Directory page
-!insertmacro MUI_PAGE_DIRECTORY
-; Instfiles page
-!insertmacro MUI_PAGE_INSTFILES
-; Finish page
-;!define MUI_FINISHPAGE_SHOWREADME $INSTDIR\readme.txt
-
-!define MUI_FINISHPAGE_SHOWREADME_NOTCHECKED
-!insertmacro MUI_PAGE_FINISH
-
-!define MUI_LANGDLL_REGISTRY_ROOT "HKCU"
-!define MUI_LANGDLL_REGISTRY_KEY "Software\PlanetMoon\Giants"
-!define MUI_LANGDLL_REGISTRY_VALUENAME "SetupLanguage"
-
-; Language files
-!insertmacro MUI_LANGUAGE "English"
-!insertmacro MUI_LANGUAGE "French"
-!insertmacro MUI_LANGUAGE "German"
-!insertmacro MUI_LANGUAGE "Italian"
-!insertmacro MUI_LANGUAGE "Spanish"
-
-; Language selection settings
-!define MUI_LANGDLL_WINDOWTITLE "Setup Language"
-
-!include LogicLib.nsh
-
-; MUI end ------
-
-Name "${PRODUCT_NAME} ${PRODUCT_VERSION}"
-OutFile "Output\GPatch1_499_0_0.exe"
-InstallDir "$PROGRAMFILES\Giants\"
-InstallDirRegKey HKCU "SOFTWARE\PlanetMoon\Giants" "DestDir"
-ShowInstDetails hide
-
-;Request application privileges for Windows Vista+
-RequestExecutionLevel admin
-
-Section
- SetDetailsView hide
- SectionIn RO
- SetOverwrite on
-
- nsExec::Exec "taskkill /F /IM Giants.exe"
- nsExec::Exec "taskkill /F /IM GiantsMain.exe"
-
- ; Install DX redist for DX9 renderer
- SetOutPath "$INSTDIR\Redist"
- File /r "Files\Redist\*.*"
- ExecWait "$INSTDIR\Redist\dxsetup.exe /silent" $0
-
- ${If} $0 != 0
- MessageBox MB_OK "Setup failed to update DirectX ($0). Please visit www.microsoft.com and download the latest version of the DirectX end user redistributable."
- ${EndIf}
-
- ExecWait "$INSTDIR\Redist\VC_redist.x86.exe /install /quiet /norestart /log $\"$Temp\GPatch_VCRedist.txt$\"" $0
- ${If} $0 != 0
- ${AndIf} $0 != 1638 ;0x666 - Newer version installed
- MessageBox MB_OK "Setup failed to install the Visual C++ Runtime. Please visit www.microsoft.com and download the latest version of the Visual C++ 2019 redistributable."
- ${EndIf}
-
- RMDir /r "$INSTDIR\Redist" ; Delete temporary files
-
- ; Delete old files
- Delete $INSTDIR\bin\Shaders\*.*
- Delete $INSTDIR\gg_dx7r.dll
- Delete $INSTDIR\gg_dx8r.dll
- Delete $INSTDIR\gg_dx9r.dll
- Delete $INSTDIR\gg_null.dll
- Delete $INSTDIR\Giants.exe
- Delete $INSTDIR\BugTrap.dll
- Delete $INSTDIR\GiantsMain.exe
- Delete $INSTDIR\*.vso
- Delete $INSTDIR\*.pso
-
- SetOutPath "$INSTDIR"
- File /r "Files\*.*"
-
- ; remove old mods (may have compatibility issues)
- Delete $INSTDIR\bin\worldlist2.bin
- Delete $INSTDIR\bin\worldlist3.bin
- Delete $INSTDIR\bin\worldlist4.bin
- Delete $INSTDIR\bin\worldlist5.bin
- Delete $INSTDIR\bin\mappack1.gzp
- Delete $INSTDIR\bin\A-GRM1.gzp
-
-SectionEnd
-
-!define NETVersion "4.7.2"
-!define NETInstallerFileName "NDP472-KB4054531-Web.exe"
-!define NETInstallerPath "Files\Redist\NDP472-KB4054531-Web.exe"
-
-Section "MS .NET Framework v${NETVersion}" SecFramework
- IfFileExists "$WINDIR\Microsoft.NET\Framework\v${NETVersion}" NETFrameworkInstalled 0
- File /oname=$TEMP\${NETInstallerFileName} "${NETInstallerPath}"
-
- !insertmacro CheckNetFramework 472
- Return
-
- NETFrameworkInstalled:
- DetailPrint "Microsoft .NET Framework is already installed!"
-SectionEnd
-
-
-;--------------------------------
-;Installer Functions
-
-Function .onInit
-
- !insertmacro MUI_LANGDLL_DISPLAY
-
-FunctionEnd
-
-;--------------------------------
\ No newline at end of file
diff --git a/GPatch/Launcher/GPatch.ico b/GPatch/Launcher/GPatch.ico
deleted file mode 100644
index 012e6a8..0000000
Binary files a/GPatch/Launcher/GPatch.ico and /dev/null differ
diff --git a/GPatch/Launcher/Launcher.nsi b/GPatch/Launcher/Launcher.nsi
deleted file mode 100644
index 5c0e3e2..0000000
--- a/GPatch/Launcher/Launcher.nsi
+++ /dev/null
@@ -1,75 +0,0 @@
-SetCompressor /SOLID lzma
-
-!define PRODUCT_NAME "Giants Launcher"
-!define PRODUCT_VERSION "1.0.0.2"
-
-; MUI 1.67 compatible ------
-!include "MUI.nsh"
-
-; MUI Settings
-!define MUI_ABORTWARNING
-!define MUI_ICON "GPatch.ico"
-
-; Welcome page
-;!insertmacro MUI_PAGE_WELCOME
-; Directory page
-!insertmacro MUI_PAGE_DIRECTORY
-; Instfiles page
-!insertmacro MUI_PAGE_INSTFILES
-
-!define MUI_LANGDLL_REGISTRY_ROOT "HKCU"
-!define MUI_LANGDLL_REGISTRY_KEY "Software\PlanetMoon\Giants"
-!define MUI_LANGDLL_REGISTRY_VALUENAME "SetupLanguage"
-
-; Language files
-!insertmacro MUI_LANGUAGE "English"
-
-; MUI end ------
-
-Name "Giants Launcher Update"
-OutFile "LauncherUpdate_1002.exe"
-InstallDir "C:\Program Files\Giants"
-InstallDirRegKey HKCU "SOFTWARE\PlanetMoon\Giants" "DestDir"
-ShowInstDetails hide
-
-;Request application privileges for Windows Vista
-RequestExecutionLevel admin
-
-Section
- SetDetailsView hide
- SectionIn RO
- SetOverwrite on
-
-
- SetOutPath "$INSTDIR"
- File /r "Giants.exe"
-
-
-SectionEnd
-
-Function .onInit
- Processes::KillProcess "Giants.exe"
- Processes::FindProcess "Giants.exe"
- ${If} $R0 == 1
- MessageBox MB_OK "Please close the Giants launcher before installing this update."
- Abort
- ${EndIf}
-
- ClearErrors
- FileOpen $R0 "$INSTDIR\Giants.exe" w
- ${If} ${Errors}
- MessageBox MB_OK "Could not write to Giants.exe. Please ensure the Giants launcher is closed."
- Abort
- ${Else}
- FileClose $R0
- ${EndIf}
-FunctionEnd
-
-Function .onInstFailed
- MessageBox MB_OK "Update failed. Please visit www.giantswd.org and download the latest version manually."
-FunctionEnd
-
-Function .onInstSuccess
- MessageBox MB_OK "Update complete!"
- Exec "$INSTDIR\Giants.exe"
-FunctionEnd
\ No newline at end of file
diff --git a/Sdk/External/DirectXMath/.nuget/directxmath.nuspec b/Sdk/External/DirectXMath/.nuget/directxmath.nuspec
new file mode 100644
index 0000000..c7848ee
--- /dev/null
+++ b/Sdk/External/DirectXMath/.nuget/directxmath.nuspec
@@ -0,0 +1,31 @@
+
+
+
+ directxmath
+ 0.0.0-SpecifyVersionOnCommandline
+ DirectXMath
+ Microsoft
+ microsoft,directxtk
+ DirectXMath is an all inline SIMD C++ linear algebra library for use in games and graphics apps.
+ The DirectXMath API provides SIMD-friendly C++ types and functions for common linear algebra and graphics math operations common to DirectX applications. The library provides optimized versions for Windows 32-bit (x86), Windows 64-bit (x64), and Windows on ARM through SSE2 and ARM-NEON intrinsics support in the Visual Studio compiler.
+ Matches the August 2020 release.
+ http://go.microsoft.com/fwlink/?LinkID=615560
+ images\icon.jpg
+ MIT
+ false
+ © Microsoft Corporation. All rights reserved.
+ C++ native DirectX math nativepackage
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Sdk/External/DirectXMath/.nuget/directxmath.targets b/Sdk/External/DirectXMath/.nuget/directxmath.targets
new file mode 100644
index 0000000..0a31f57
--- /dev/null
+++ b/Sdk/External/DirectXMath/.nuget/directxmath.targets
@@ -0,0 +1,11 @@
+
+
+
+
+
+ HAS_DIRECTXMATH;%(PreprocessorDefinitions)
+ $(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories)
+
+
+
+
diff --git a/Sdk/External/DirectXMath/.nuget/icon.jpg b/Sdk/External/DirectXMath/.nuget/icon.jpg
new file mode 100644
index 0000000..08fe1fa
Binary files /dev/null and b/Sdk/External/DirectXMath/.nuget/icon.jpg differ
diff --git a/Sdk/External/DirectXMath/.nuget/signconfig.xml b/Sdk/External/DirectXMath/.nuget/signconfig.xml
new file mode 100644
index 0000000..f32a6a4
--- /dev/null
+++ b/Sdk/External/DirectXMath/.nuget/signconfig.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h
new file mode 100644
index 0000000..bdcaec0
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h
@@ -0,0 +1,275 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathAVX.h -- AVX (version 1) extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error AVX not supported on ARM platform
+#endif
+
+#include
+
+namespace DirectX
+{
+
+namespace AVX
+{
+
+inline bool XMVerifyAVXSupport()
+{
+ // Should return true for AMD Bulldozer, Intel "Sandy Bridge", and Intel "Ivy Bridge" or later processors
+ // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = {-1};
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid( CPUInfo, 0 );
+#endif
+
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1 );
+#endif
+
+ // We check for AVX, OSXSAVE, SSSE4.1, and SSE3
+ return ( (CPUInfo[2] & 0x18080001) == 0x18080001 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue )
+{
+ return _mm_broadcast_ss( pValue );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(0, 0, 0, 0) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 )
+{
+ assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+ _Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+
+ unsigned int elem[4] = { E0, E1, E2, E3 };
+ __m128i vControl = _mm_loadu_si128( reinterpret_cast(&elem[0]) );
+ return _mm_permutevar_ps( V, vControl );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW )
+{
+ assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+ _Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+
+ static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } };
+
+ XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
+ __m128i vControl = _mm_load_si128( reinterpret_cast(&elem[0]) );
+
+ __m128i vSelect = _mm_cmpgt_epi32( vControl, three );
+ vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) );
+
+ __m128 shuffled1 = _mm_permutevar_ps( V1, vControl );
+ __m128 shuffled2 = _mm_permutevar_ps( V2, vControl );
+
+ __m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 );
+ __m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 );
+
+ return _mm_or_ps( masked1, masked2 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return AVX::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return AVX::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return AVX::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Permute Templates
+//-------------------------------------------------------------------------------------
+
+namespace Internal
+{
+ // Slow path fallback for permutes that do not map to a single SSE opcode.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2)
+ {
+ static const XMVECTORU32 selectMask =
+ {
+ WhichX ? 0xFFFFFFFF : 0,
+ WhichY ? 0xFFFFFFFF : 0,
+ WhichZ ? 0xFFFFFFFF : 0,
+ WhichW ? 0xFFFFFFFF : 0,
+ };
+
+ XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle);
+ XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle);
+
+ XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
+ XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
+
+ return _mm_or_ps(masked1, masked2);
+ }
+ };
+
+ // Fast path for permutes that only read from the first vector.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); }
+ };
+
+ // Fast path for permutes that only read from the second vector.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); }
+ };
+
+ // Fast path for permutes that read XY from the first vector, ZW from the second.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
+ };
+
+ // Fast path for permutes that read XY from the second vector, ZW from the first.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
+ };
+};
+
+// General permute template
+template
+ inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
+{
+ static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
+ static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
+ static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
+ static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
+
+ const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
+
+ const bool WhichX = PermuteX > 3;
+ const bool WhichY = PermuteY > 3;
+ const bool WhichZ = PermuteZ > 3;
+ const bool WhichW = PermuteW > 3;
+
+ return AVX::Internal::PermuteHelper::Permute(V1, V2);
+}
+
+// Special-case permute templates
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR) { return V1; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR, FXMVECTOR V2) { return V2; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); }
+
+
+//-------------------------------------------------------------------------------------
+// Swizzle Templates
+//-------------------------------------------------------------------------------------
+
+// General swizzle template
+template
+ inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V)
+{
+ static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
+ static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
+ static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
+ static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
+
+ return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
+}
+
+// Specialized swizzles
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
+
+
+//-------------------------------------------------------------------------------------
+// Other Templates
+//-------------------------------------------------------------------------------------
+
+template
+ inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+ return AVX::XMVectorPermute(V1, V2);
+}
+
+template
+ inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+ return AVX::XMVectorSwizzle(V);
+}
+
+template
+ inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+ return AVX::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
+}
+
+} // namespace AVX
+
+} // namespace DirectX;
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h
new file mode 100644
index 0000000..329849b
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h
@@ -0,0 +1,1037 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathAVX2.h -- AVX2 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error AVX2 not supported on ARM platform
+#endif
+
+#include
+#include
+
+namespace DirectX
+{
+
+namespace AVX2
+{
+
+inline bool XMVerifyAVX2Support()
+{
+ // Should return true for AMD "Excavator", Intel "Haswell" or later processors
+ // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = {-1};
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+
+ if ( CPUInfo[0] < 7 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // We check for F16C, FMA3, AVX, OSXSAVE, SSSE4.1, and SSE3
+ if ( (CPUInfo[2] & 0x38081001) != 0x38081001 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid_count(7, 0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuidex(CPUInfo, 7, 0);
+#endif
+
+ return ( (CPUInfo[1] & 0x20 ) == 0x20 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue )
+{
+ return _mm_broadcast_ss( pValue );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V )
+{
+ return _mm_broadcastss_ps( V );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V )
+{
+ return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ return _mm_fmadd_ps( V1, V2, V3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ return _mm_fnmadd_ps( V1, V2, V3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 )
+{
+ assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+ _Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+
+ unsigned int elem[4] = { E0, E1, E2, E3 };
+ __m128i vControl = _mm_loadu_si128( reinterpret_cast(&elem[0]) );
+ return _mm_permutevar_ps( V, vControl );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW )
+{
+ assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+ _Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+
+ static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } };
+
+ XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
+ __m128i vControl = _mm_load_si128( reinterpret_cast(&elem[0]) );
+
+ __m128i vSelect = _mm_cmpgt_epi32( vControl, three );
+ vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) );
+
+ __m128 shuffled1 = _mm_permutevar_ps( V1, vControl );
+ __m128 shuffled2 = _mm_permutevar_ps( V2, vControl );
+
+ __m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 );
+ __m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 );
+
+ return _mm_or_ps( masked1, masked2 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return AVX2::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return AVX2::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return AVX2::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
+ XMVECTOR vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
+ XMVECTOR vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+ vResult = _mm_div_ps( vResult, W );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_mul_ps( vResult, M.r[1] );
+ XMVECTOR vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+ vResult = _mm_div_ps( vResult, W );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_mul_ps( vResult, M.r[2] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
+
+inline XMVECTOR XM_CALLCONV XMVector3Project
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ const float HalfViewportWidth = ViewportWidth * 0.5f;
+ const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+ XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
+ XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+ XMMATRIX Transform = AVX2::XMMatrixMultiply(World, View);
+ Transform = AVX2::XMMatrixMultiply(Transform, Projection);
+
+ XMVECTOR Result = AVX2::XMVector3TransformCoord(V, Transform);
+
+ Result = AVX2::XMVectorMultiplyAdd(Result, Scale, Offset);
+
+ return Result;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Unproject
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
+
+ XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+ Scale = XMVectorReciprocal(Scale);
+
+ XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+ Offset = AVX2::XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+ XMMATRIX Transform = AVX2::XMMatrixMultiply(World, View);
+ Transform = AVX2::XMMatrixMultiply(Transform, Projection);
+ Transform = XMMatrixInverse(nullptr, Transform);
+
+ XMVECTOR Result = AVX2::XMVectorMultiplyAdd(V, Scale, Offset);
+
+ return AVX2::XMVector3TransformCoord(Result, Transform);
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
+ vResult = _mm_mul_ps( vResult, M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_broadcastss_ps(V); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Matrix
+//-------------------------------------------------------------------------------------
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiply
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+ XMMATRIX mResult;
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = _mm_broadcastss_ps(vW);
+ XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[0] = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = _mm_broadcastss_ps(vW);
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[1] = vX;
+ vW = M1.r[2];
+ vX = _mm_broadcastss_ps(vW);
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[2] = vX;
+ vW = M1.r[3];
+ vX = _mm_broadcastss_ps(vW);
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[3] = vX;
+ return mResult;
+}
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
+(
+ FXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = _mm_broadcastss_ps(vW);
+ XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r0 = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = _mm_broadcastss_ps(vW);
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r1 = vX;
+ vW = M1.r[2];
+ vX = _mm_broadcastss_ps(vW);
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r2 = vX;
+ vW = M1.r[3];
+ vX = _mm_broadcastss_ps(vW);
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r3 = vX;
+
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+ XMMATRIX mResult;
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Permute Templates
+//-------------------------------------------------------------------------------------
+
+namespace Internal
+{
+ // Slow path fallback for permutes that do not map to a single SSE opcode.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2)
+ {
+ static const XMVECTORU32 selectMask =
+ {
+ WhichX ? 0xFFFFFFFF : 0,
+ WhichY ? 0xFFFFFFFF : 0,
+ WhichZ ? 0xFFFFFFFF : 0,
+ WhichW ? 0xFFFFFFFF : 0,
+ };
+
+ XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle);
+ XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle);
+
+ XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
+ XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
+
+ return _mm_or_ps(masked1, masked2);
+ }
+ };
+
+ // Fast path for permutes that only read from the first vector.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); }
+ };
+
+ // Fast path for permutes that only read from the second vector.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); }
+ };
+
+ // Fast path for permutes that read XY from the first vector, ZW from the second.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
+ };
+
+ // Fast path for permutes that read XY from the second vector, ZW from the first.
+ template struct PermuteHelper
+ {
+ static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
+ };
+};
+
+// General permute template
+template
+ inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
+{
+ static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
+ static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
+ static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
+ static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
+
+ const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
+
+ const bool WhichX = PermuteX > 3;
+ const bool WhichY = PermuteY > 3;
+ const bool WhichZ = PermuteZ > 3;
+ const bool WhichW = PermuteW > 3;
+
+ return AVX2::Internal::PermuteHelper::Permute(V1, V2);
+}
+
+// Special-case permute templates
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR) { return V1; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR, FXMVECTOR V2) { return V2; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); }
+
+
+//-------------------------------------------------------------------------------------
+// Swizzle Templates
+//-------------------------------------------------------------------------------------
+
+// General swizzle template
+template
+ inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V)
+{
+ static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
+ static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
+ static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
+ static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
+
+ return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
+}
+
+// Specialized swizzles
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,0,0>(FXMVECTOR V) { return _mm_broadcastss_ps(V); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); }
+template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); }
+
+
+//-------------------------------------------------------------------------------------
+// Other Templates
+//-------------------------------------------------------------------------------------
+
+template
+ inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+ return AVX2::XMVectorPermute(V1, V2);
+}
+
+template
+ inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+ return AVX2::XMVectorSwizzle(V);
+}
+
+template
+ inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+ return AVX2::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
+}
+
+//-------------------------------------------------------------------------------------
+// Data conversion
+//-------------------------------------------------------------------------------------
+
+inline float XMConvertHalfToFloat( PackedVector::HALF Value )
+{
+ __m128i V1 = _mm_cvtsi32_si128( static_cast(Value) );
+ __m128 V2 = _mm_cvtph_ps( V1 );
+ return _mm_cvtss_f32( V2 );
+}
+
+inline PackedVector::HALF XMConvertFloatToHalf( float Value )
+{
+ __m128 V1 = _mm_set_ss( Value );
+ __m128i V2 = _mm_cvtps_ph( V1, 0 );
+ return static_cast( _mm_cvtsi128_si32(V2) );
+}
+
+inline float* XMConvertHalfToFloatStream
+(
+ _Out_writes_bytes_(sizeof(float)+OutputStride*(HalfCount-1)) float* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(2+InputStride*(HalfCount-1)) const PackedVector::HALF* pInputStream,
+ _In_ size_t InputStride,
+ _In_ size_t HalfCount
+)
+{
+ using namespace PackedVector;
+
+ assert(pOutputStream);
+ assert(pInputStream);
+
+ assert(InputStride >= sizeof(HALF));
+ assert(OutputStride >= sizeof(float));
+
+ auto pHalf = reinterpret_cast(pInputStream);
+ auto pFloat = reinterpret_cast(pOutputStream);
+
+ size_t i = 0;
+ size_t four = HalfCount >> 2;
+ if (four > 0)
+ {
+ if (InputStride == sizeof(HALF))
+ {
+ if (OutputStride == sizeof(float))
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Packed input, aligned & packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf));
+ pHalf += InputStride * 4;
+
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_stream_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Packed input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf));
+ pHalf += InputStride * 4;
+
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_storeu_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+ }
+ }
+ else
+ {
+ // Packed input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf));
+ pHalf += InputStride * 4;
+
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_store_ss(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3);
+ pFloat += OutputStride;
+ i += 4;
+ }
+ }
+ }
+ else if (OutputStride == sizeof(float))
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Scattered input, aligned & packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ uint16_t H1 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H2 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H3 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H4 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+
+ __m128i HV = _mm_setzero_si128();
+ HV = _mm_insert_epi16(HV, H1, 0);
+ HV = _mm_insert_epi16(HV, H2, 1);
+ HV = _mm_insert_epi16(HV, H3, 2);
+ HV = _mm_insert_epi16(HV, H4, 3);
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_stream_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Scattered input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ uint16_t H1 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H2 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H3 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H4 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+
+ __m128i HV = _mm_setzero_si128();
+ HV = _mm_insert_epi16(HV, H1, 0);
+ HV = _mm_insert_epi16(HV, H2, 1);
+ HV = _mm_insert_epi16(HV, H3, 2);
+ HV = _mm_insert_epi16(HV, H4, 3);
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_storeu_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+
+ }
+ }
+ else
+ {
+ // Scattered input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ uint16_t H1 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H2 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H3 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H4 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+
+ __m128i HV = _mm_setzero_si128();
+ HV = _mm_insert_epi16(HV, H1, 0);
+ HV = _mm_insert_epi16(HV, H2, 1);
+ HV = _mm_insert_epi16(HV, H3, 2);
+ HV = _mm_insert_epi16(HV, H4, 3);
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_store_ss(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3);
+ pFloat += OutputStride;
+ i += 4;
+ }
+ }
+ }
+
+ for (; i < HalfCount; ++i)
+ {
+ *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]);
+ pHalf += InputStride;
+ pFloat += OutputStride;
+ }
+
+ return pOutputStream;
+}
+
+
+inline PackedVector::HALF* XMConvertFloatToHalfStream
+(
+ _Out_writes_bytes_(2+OutputStride*(FloatCount-1)) PackedVector::HALF* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(float)+InputStride*(FloatCount-1)) const float* pInputStream,
+ _In_ size_t InputStride,
+ _In_ size_t FloatCount
+)
+{
+ using namespace PackedVector;
+
+ assert(pOutputStream);
+ assert(pInputStream);
+
+ assert(InputStride >= sizeof(float));
+ assert(OutputStride >= sizeof(HALF));
+
+ auto pFloat = reinterpret_cast(pInputStream);
+ auto pHalf = reinterpret_cast(pOutputStream);
+
+ size_t i = 0;
+ size_t four = FloatCount >> 2;
+ if (four > 0)
+ {
+ if (InputStride == sizeof(float))
+ {
+ if (OutputStride == sizeof(HALF))
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Aligned and packed input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_load_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+ pHalf += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Packed input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+ pHalf += OutputStride * 4;
+ i += 4;
+ }
+ }
+ }
+ else
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Aligned & packed input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_load_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3));
+ pHalf += OutputStride;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Packed input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3));
+ pHalf += OutputStride;
+ i += 4;
+ }
+ }
+ }
+ }
+ else if (OutputStride == sizeof(HALF))
+ {
+ // Scattered input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
+ __m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
+ FV = _mm_blend_ps(FV, FT, 0xC);
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+ pHalf += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Scattered input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
+ __m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
+ FV = _mm_blend_ps(FV, FT, 0xC);
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3));
+ pHalf += OutputStride;
+ i += 4;
+ }
+ }
+ }
+
+ for (; i < FloatCount; ++i)
+ {
+ *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]);
+ pFloat += InputStride;
+ pHalf += OutputStride;
+ }
+
+ return pOutputStream;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Half2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource )
+{
+ assert(pSource);
+ __m128 V = _mm_load_ss( reinterpret_cast(pSource) );
+ return _mm_cvtph_ps( _mm_castps_si128( V ) );
+}
+
+inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V )
+{
+ assert(pDestination);
+ __m128i V1 = _mm_cvtps_ph( V, 0 );
+ _mm_store_ss( reinterpret_cast(pDestination), _mm_castsi128_ps(V1) );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Half4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource )
+{
+ assert(pSource);
+ __m128i V = _mm_loadl_epi64( reinterpret_cast(pSource) );
+ return _mm_cvtph_ps( V );
+}
+
+inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V )
+{
+ assert(pDestination);
+ __m128i V1 = _mm_cvtps_ph( V, 0 );
+ _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 );
+}
+
+} // namespace AVX2
+
+} // namespace DirectX;
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathBE.h b/Sdk/External/DirectXMath/Extensions/DirectXMathBE.h
new file mode 100644
index 0000000..dca2705
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathBE.h
@@ -0,0 +1,95 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathBE.h -- Big-endian swap extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
+#include
+#endif
+
+#include
+
+namespace DirectX
+{
+
+inline XMVECTOR XM_CALLCONV XMVectorEndian
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } };
+
+ uint8x8x2_t tbl;
+ tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V));
+ tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V));
+
+ const uint8x8_t rL = vtbl2_u8(tbl, vget_low_u32(idx));
+ const uint8x8_t rH = vtbl2_u8(tbl, vget_high_u32(idx));
+ return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH));
+#else
+ XMVECTORU32 E;
+ E.v = V;
+ uint32_t value = E.u[0];
+ E.u[0] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+ value = E.u[1];
+ E.u[1] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+ value = E.u[2];
+ E.u[2] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+ value = E.u[3];
+ E.u[3] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) );
+ return E.v;
+#endif
+}
+
+
+#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
+namespace SSSE3
+{
+
+inline bool XMVerifySSSE3Support()
+{
+ // Should return true on AMD Bulldozer, Intel Core i7/i5/i3, Intel Atom, or later processors
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = { -1 };
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // Check for SSSE3 instruction set.
+ return ( (CPUInfo[2] & 0x200) != 0 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorEndian
+(
+ FXMVECTOR V
+)
+{
+ static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } };
+
+ __m128i Result = _mm_shuffle_epi8( _mm_castps_si128(V), idx );
+ return _mm_castsi128_ps( Result );
+}
+
+} // namespace SSSE3
+#endif // X86 || X64
+
+} // namespace DirectX
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h b/Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h
new file mode 100644
index 0000000..6305eca
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h
@@ -0,0 +1,471 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathF16C.h -- F16C/CVT16 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error F16C not supported on ARM platform
+#endif
+
+#include
+#include
+
+namespace DirectX
+{
+
+namespace F16C
+{
+
+inline bool XMVerifyF16CSupport()
+{
+ // Should return true for AMD "Piledriver" and Intel "Ivy Bridge" processors
+ // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = { -1 };
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // We check for F16C, AVX, OSXSAVE, and SSE4.1
+ return ( (CPUInfo[2] & 0x38080000 ) == 0x38080000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Data conversion
+//-------------------------------------------------------------------------------------
+
+inline float XMConvertHalfToFloat( PackedVector::HALF Value )
+{
+ __m128i V1 = _mm_cvtsi32_si128( static_cast(Value) );
+ __m128 V2 = _mm_cvtph_ps( V1 );
+ return _mm_cvtss_f32( V2 );
+}
+
+inline PackedVector::HALF XMConvertFloatToHalf( float Value )
+{
+ __m128 V1 = _mm_set_ss( Value );
+ __m128i V2 = _mm_cvtps_ph( V1, 0 );
+ return static_cast( _mm_cvtsi128_si32(V2) );
+}
+
+inline float* XMConvertHalfToFloatStream
+(
+ _Out_writes_bytes_(sizeof(float) + OutputStride * (HalfCount - 1)) float* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(2 + InputStride * (HalfCount - 1)) const PackedVector::HALF* pInputStream,
+ _In_ size_t InputStride,
+ _In_ size_t HalfCount
+)
+{
+ using namespace PackedVector;
+
+ assert(pOutputStream);
+ assert(pInputStream);
+
+ assert(InputStride >= sizeof(HALF));
+ assert(OutputStride >= sizeof(float));
+
+ auto pHalf = reinterpret_cast(pInputStream);
+ auto pFloat = reinterpret_cast(pOutputStream);
+
+ size_t i = 0;
+ size_t four = HalfCount >> 2;
+ if (four > 0)
+ {
+ if (InputStride == sizeof(HALF))
+ {
+ if (OutputStride == sizeof(float))
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Packed input, aligned & packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf));
+ pHalf += InputStride * 4;
+
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_stream_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Packed input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf));
+ pHalf += InputStride * 4;
+
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_storeu_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+ }
+ }
+ else
+ {
+ // Packed input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf));
+ pHalf += InputStride * 4;
+
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_store_ss(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3);
+ pFloat += OutputStride;
+ i += 4;
+ }
+ }
+ }
+ else if (OutputStride == sizeof(float))
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Scattered input, aligned & packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ uint16_t H1 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H2 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H3 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H4 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+
+ __m128i HV = _mm_setzero_si128();
+ HV = _mm_insert_epi16(HV, H1, 0);
+ HV = _mm_insert_epi16(HV, H2, 1);
+ HV = _mm_insert_epi16(HV, H3, 2);
+ HV = _mm_insert_epi16(HV, H4, 3);
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_stream_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Scattered input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ uint16_t H1 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H2 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H3 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H4 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+
+ __m128i HV = _mm_setzero_si128();
+ HV = _mm_insert_epi16(HV, H1, 0);
+ HV = _mm_insert_epi16(HV, H2, 1);
+ HV = _mm_insert_epi16(HV, H3, 2);
+ HV = _mm_insert_epi16(HV, H4, 3);
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_storeu_ps(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride * 4;
+ i += 4;
+ }
+
+ }
+ }
+ else
+ {
+ // Scattered input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ uint16_t H1 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H2 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H3 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+ uint16_t H4 = *reinterpret_cast(pHalf);
+ pHalf += InputStride;
+
+ __m128i HV = _mm_setzero_si128();
+ HV = _mm_insert_epi16(HV, H1, 0);
+ HV = _mm_insert_epi16(HV, H2, 1);
+ HV = _mm_insert_epi16(HV, H3, 2);
+ HV = _mm_insert_epi16(HV, H4, 3);
+ __m128 FV = _mm_cvtph_ps(HV);
+
+ _mm_store_ss(reinterpret_cast(pFloat), FV);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2);
+ pFloat += OutputStride;
+ *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3);
+ pFloat += OutputStride;
+ i += 4;
+ }
+ }
+ }
+
+ for (; i < HalfCount; ++i)
+ {
+ *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]);
+ pHalf += InputStride;
+ pFloat += OutputStride;
+ }
+
+ return pOutputStream;
+}
+
+
+inline PackedVector::HALF* XMConvertFloatToHalfStream
+(
+ _Out_writes_bytes_(2 + OutputStride * (FloatCount - 1)) PackedVector::HALF* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(float) + InputStride * (FloatCount - 1)) const float* pInputStream,
+ _In_ size_t InputStride,
+ _In_ size_t FloatCount
+)
+{
+ using namespace PackedVector;
+
+ assert(pOutputStream);
+ assert(pInputStream);
+
+ assert(InputStride >= sizeof(float));
+ assert(OutputStride >= sizeof(HALF));
+
+ auto pFloat = reinterpret_cast(pInputStream);
+ auto pHalf = reinterpret_cast(pOutputStream);
+
+ size_t i = 0;
+ size_t four = FloatCount >> 2;
+ if (four > 0)
+ {
+ if (InputStride == sizeof(float))
+ {
+ if (OutputStride == sizeof(HALF))
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Aligned and packed input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_load_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+ pHalf += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Packed input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+ pHalf += OutputStride * 4;
+ i += 4;
+ }
+ }
+ }
+ else
+ {
+ if ((reinterpret_cast(pFloat) & 0xF) == 0)
+ {
+ // Aligned & packed input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_load_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3));
+ pHalf += OutputStride;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Packed input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat));
+ pFloat += InputStride * 4;
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3));
+ pHalf += OutputStride;
+ i += 4;
+ }
+ }
+ }
+ }
+ else if (OutputStride == sizeof(HALF))
+ {
+ // Scattered input, packed output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
+ __m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
+ FV = _mm_blend_ps(FV, FT, 0xC);
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV);
+ pHalf += OutputStride * 4;
+ i += 4;
+ }
+ }
+ else
+ {
+ // Scattered input, scattered output
+ for (size_t j = 0; j < four; ++j)
+ {
+ __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat));
+ pFloat += InputStride;
+
+ __m128 FV = _mm_blend_ps(FV1, FV2, 0x2);
+ __m128 FT = _mm_blend_ps(FV3, FV4, 0x8);
+ FV = _mm_blend_ps(FV, FT, 0xC);
+
+ __m128i HV = _mm_cvtps_ph(FV, 0);
+
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2));
+ pHalf += OutputStride;
+ *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3));
+ pHalf += OutputStride;
+ i += 4;
+ }
+ }
+ }
+
+ for (; i < FloatCount; ++i)
+ {
+ *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]);
+ pFloat += InputStride;
+ pHalf += OutputStride;
+ }
+
+ return pOutputStream;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Half2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource )
+{
+ assert(pSource);
+ __m128 V = _mm_load_ss( reinterpret_cast(pSource) );
+ return _mm_cvtph_ps( _mm_castps_si128( V ) );
+}
+
+inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V )
+{
+ assert(pDestination);
+ __m128i V1 = _mm_cvtps_ph( V, 0 );
+ _mm_store_ss( reinterpret_cast(pDestination), _mm_castsi128_ps(V1) );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Half4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource )
+{
+ assert(pSource);
+ __m128i V = _mm_loadl_epi64( reinterpret_cast(pSource) );
+ return _mm_cvtph_ps( V );
+}
+
+inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V )
+{
+ assert(pDestination);
+ __m128i V1 = _mm_cvtps_ph( V, 0 );
+ _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 );
+}
+
+} // namespace F16C
+
+} // namespace DirectX
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h
new file mode 100644
index 0000000..20c6a09
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h
@@ -0,0 +1,391 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathFMA3.h -- FMA3 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error FMA3 not supported on ARM platform
+#endif
+
+#include
+
+namespace DirectX
+{
+
+namespace FMA3
+{
+
+inline bool XMVerifyFMA3Support()
+{
+ // Should return true for AMD "Pildriver" and Intel "Haswell" processors
+ // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = {-1};
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // We check for FMA3, AVX, OSXSAVE
+ return ( (CPUInfo[2] & 0x18001000) == 0x18001000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ return _mm_fmadd_ps( V1, V2, V3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ return _mm_fnmadd_ps( V1, V2, V3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+ vResult = _mm_div_ps( vResult, W );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_mul_ps( vResult, M.r[1] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+ vResult = _mm_div_ps( vResult, W );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_mul_ps( vResult, M.r[2] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
+
+inline XMVECTOR XM_CALLCONV XMVector3Project
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ const float HalfViewportWidth = ViewportWidth * 0.5f;
+ const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+ XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
+ XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+ XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
+ Transform = FMA3::XMMatrixMultiply(Transform, Projection);
+
+ XMVECTOR Result = FMA3::XMVector3TransformCoord(V, Transform);
+
+ Result = FMA3::XMVectorMultiplyAdd(Result, Scale, Offset);
+
+ return Result;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Unproject
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
+
+ XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+ Scale = XMVectorReciprocal(Scale);
+
+ XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+ Offset = FMA3::XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+ XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View);
+ Transform = FMA3::XMMatrixMultiply(Transform, Projection);
+ Transform = XMMatrixInverse(nullptr, Transform);
+
+ XMVECTOR Result = FMA3::XMVectorMultiplyAdd(V, Scale, Offset);
+
+ return FMA3::XMVector3TransformCoord(Result, Transform);
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
+ vResult = _mm_mul_ps( vResult, M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Matrix
+//-------------------------------------------------------------------------------------
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiply
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+ XMMATRIX mResult;
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[0] = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[1] = vX;
+ vW = M1.r[2];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[2] = vX;
+ vW = M1.r[3];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ mResult.r[3] = vX;
+ return mResult;
+}
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
+(
+ FXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r0 = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r1 = vX;
+ vW = M1.r[2];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r2 = vX;
+ vW = M1.r[3];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_fmadd_ps(vY,M2.r[1],vX);
+ vX = _mm_fmadd_ps(vZ,M2.r[2],vX);
+ vX = _mm_fmadd_ps(vW,M2.r[3],vX);
+ __m128 r3 = vX;
+
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+ XMMATRIX mResult;
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+}
+
+} // namespace FMA3
+
+} // namespace DirectX;
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h
new file mode 100644
index 0000000..38783d1
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h
@@ -0,0 +1,415 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error FMA4 not supported on ARM platform
+#endif
+
+#include
+#include
+
+#ifdef __GNUC__
+#include
+#endif
+
+namespace DirectX
+{
+
+namespace FMA4
+{
+
+inline bool XMVerifyFMA4Support()
+{
+ // Should return true for AMD Bulldozer processors
+ // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = {-1};
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // We check for AVX, OSXSAVE (required to access FMA4)
+ if ( (CPUInfo[2] & 0x18000000) != 0x18000000 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0x80000000);
+#endif
+
+ if ( uint32_t(CPUInfo[0]) < 0x80000001u )
+ return false;
+
+ // We check for FMA4
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0x80000001, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0x80000001);
+#endif
+
+ return ( CPUInfo[2] & 0x10000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ return _mm_macc_ps( V1, V2, V3 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ return _mm_nmacc_ps( V1, V2, V3 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+ vResult = _mm_div_ps( vResult, W );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_mul_ps( vResult, M.r[1] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
+ vResult = _mm_div_ps( vResult, W );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_mul_ps( vResult, M.r[2] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
+
+inline XMVECTOR XM_CALLCONV XMVector3Project
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ const float HalfViewportWidth = ViewportWidth * 0.5f;
+ const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+ XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
+ XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+ XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
+ Transform = FMA4::XMMatrixMultiply(Transform, Projection);
+
+ XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform);
+
+ Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset);
+
+ return Result;
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Unproject
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
+
+ XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+ Scale = XMVectorReciprocal(Scale);
+
+ XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+ Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+ XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
+ Transform = FMA4::XMMatrixMultiply(Transform, Projection);
+ Transform = XMMatrixInverse(nullptr, Transform);
+
+ XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset);
+
+ return FMA4::XMVector3TransformCoord(Result, Transform);
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+ XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
+ vResult = _mm_mul_ps( vResult, M.r[3] );
+ XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
+ vResult = _mm_macc_ps( vTemp, M.r[2], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
+ vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
+ vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
+ vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Matrix
+//-------------------------------------------------------------------------------------
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiply
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+ XMMATRIX mResult;
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ mResult.r[0] = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ mResult.r[1] = vX;
+ vW = M1.r[2];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ mResult.r[2] = vX;
+ vW = M1.r[3];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ mResult.r[3] = vX;
+ return mResult;
+}
+
+inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
+(
+ FXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ __m128 r0 = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ __m128 r1 = vX;
+ vW = M1.r[2];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ __m128 r2 = vX;
+ vW = M1.r[3];
+ vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vX = _mm_macc_ps(vY,M2.r[1],vX);
+ vX = _mm_macc_ps(vZ,M2.r[2],vX);
+ vX = _mm_macc_ps(vW,M2.r[3],vX);
+ __m128 r3 = vX;
+
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+ XMMATRIX mResult;
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+}
+
+} // namespace FMA4
+
+} // namespace DirectX;
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h
new file mode 100644
index 0000000..72a3dbb
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h
@@ -0,0 +1,111 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error SSE3 not supported on ARM platform
+#endif
+
+#include
+
+#include
+
+namespace DirectX
+{
+
+namespace SSE3
+{
+
+inline bool XMVerifySSE3Support()
+{
+ // Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = { -1 };
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // We only check for SSE3 instruction set. SSSE3 instructions are not used.
+ return ( (CPUInfo[2] & 0x1) != 0 );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2Dot
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ XMVECTOR vTemp = _mm_mul_ps(V1,V2);
+ vTemp = _mm_hadd_ps(vTemp,vTemp);
+ return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0));
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
+{
+ return SSE3::XMVector2Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Dot
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ XMVECTOR vTemp = _mm_mul_ps(V1,V2);
+ vTemp = _mm_and_ps( vTemp, g_XMMask3 );
+ vTemp = _mm_hadd_ps(vTemp,vTemp);
+ return _mm_hadd_ps(vTemp,vTemp);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
+{
+ return SSE3::XMVector3Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4Dot
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ XMVECTOR vTemp = _mm_mul_ps(V1,V2);
+ vTemp = _mm_hadd_ps( vTemp, vTemp );
+ return _mm_hadd_ps( vTemp, vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
+{
+ return SSE3::XMVector4Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V )
+{
+ return _mm_moveldup_ps(V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V )
+{
+ return _mm_movehdup_ps(V);
+}
+
+} // namespace SSE3
+
+} // namespace DirectX
diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h
new file mode 100644
index 0000000..16378cb
--- /dev/null
+++ b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h
@@ -0,0 +1,417 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathSSE4.h -- SSE4.1 extensions for SIMD C++ Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
+#error SSE4 not supported on ARM platform
+#endif
+
+#include
+
+#include
+
+namespace DirectX
+{
+
+namespace SSE4
+{
+
+inline bool XMVerifySSE4Support()
+{
+ // Should return true on AMD Bulldozer, Intel Core 2 ("Penryn"), and Intel Core i7 ("Nehalem") or later processors
+
+ // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
+ int CPUInfo[4] = { -1 };
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 0);
+#endif
+ if ( CPUInfo[0] < 1 )
+ return false;
+
+#if defined(__clang__) || defined(__GNUC__)
+ __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
+#else
+ __cpuid(CPUInfo, 1);
+#endif
+
+ // We only check for SSE4.1 instruction set. SSE4.2 instructions are not used.
+ return ( (CPUInfo[2] & 0x80000) == 0x80000 );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector
+//-------------------------------------------------------------------------------------
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast"
+#endif
+
+inline void XM_CALLCONV XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V)
+{
+ assert( y != nullptr );
+ *reinterpret_cast(y) = _mm_extract_ps( V, 1 );
+}
+
+inline void XM_CALLCONV XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V)
+{
+ assert( z != nullptr );
+ *reinterpret_cast(z) = _mm_extract_ps( V, 2 );
+}
+
+inline void XM_CALLCONV XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V)
+{
+ assert( w != nullptr );
+ *reinterpret_cast(w) = _mm_extract_ps( V, 3 );
+}
+
+inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V)
+{
+ __m128i V1 = _mm_castps_si128( V );
+ return static_cast( _mm_extract_epi32( V1, 1 ) );
+}
+
+inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V)
+{
+ __m128i V1 = _mm_castps_si128( V );
+ return static_cast( _mm_extract_epi32( V1, 2 ) );
+}
+
+inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V)
+{
+ __m128i V1 = _mm_castps_si128( V );
+ return static_cast( _mm_extract_epi32( V1, 3 ) );
+}
+
+inline void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V)
+{
+ assert( y != nullptr );
+ __m128i V1 = _mm_castps_si128( V );
+ *y = static_cast( _mm_extract_epi32( V1, 1 ) );
+}
+
+inline void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V)
+{
+ assert( z != nullptr );
+ __m128i V1 = _mm_castps_si128( V );
+ *z = static_cast( _mm_extract_epi32( V1, 2 ) );
+}
+
+inline void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V)
+{
+ assert( w != nullptr );
+ __m128i V1 = _mm_castps_si128( V );
+ *w = static_cast( _mm_extract_epi32( V1, 3 ) );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y)
+{
+ XMVECTOR vResult = _mm_set_ss(y);
+ vResult = _mm_insert_ps( V, vResult, 0x10 );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z)
+{
+ XMVECTOR vResult = _mm_set_ss(z);
+ vResult = _mm_insert_ps( V, vResult, 0x20 );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w)
+{
+ XMVECTOR vResult = _mm_set_ss(w);
+ vResult = _mm_insert_ps( V, vResult, 0x30 );
+ return vResult;
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y)
+{
+ __m128i vResult = _mm_castps_si128( V );
+ vResult = _mm_insert_epi32( vResult, static_cast(y), 1 );
+ return _mm_castsi128_ps( vResult );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z)
+{
+ __m128i vResult = _mm_castps_si128( V );
+ vResult = _mm_insert_epi32( vResult, static_cast(z), 2 );
+ return _mm_castsi128_ps( vResult );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w)
+{
+ __m128i vResult = _mm_castps_si128( V );
+ vResult = _mm_insert_epi32( vResult, static_cast(w), 3 );
+ return _mm_castsi128_ps( vResult );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorRound( FXMVECTOR V )
+{
+ return _mm_round_ps( V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorTruncate( FXMVECTOR V )
+{
+ return _mm_round_ps( V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorFloor( FXMVECTOR V )
+{
+ return _mm_floor_ps( V );
+}
+
+inline XMVECTOR XM_CALLCONV XMVectorCeiling( FXMVECTOR V )
+{
+ return _mm_ceil_ps( V );
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector2
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector2Dot( FXMVECTOR V1, FXMVECTOR V2 )
+{
+ return _mm_dp_ps( V1, V2, 0x3f );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V )
+{
+ return SSE4::XMVector2Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+ return _mm_rsqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+ XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
+ return _mm_div_ps( g_XMOne, vLengthSq );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2LengthEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+ return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2Length( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+ return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f );
+ XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+ return _mm_mul_ps(vResult, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector2Normalize( FXMVECTOR V )
+{
+ XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x3f );
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Create zero with a single instruction
+ XMVECTOR vZeroMask = _mm_setzero_ps();
+ // Test for a divide by zero (Must be FP to detect -0.0)
+ vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Reciprocal mul to perform the normalization
+ vResult = _mm_div_ps(V,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vZeroMask);
+ // Select qnan or result based on infinite length
+ XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+ XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+ vResult = _mm_or_ps(vTemp1,vTemp2);
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector3
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector3Dot( FXMVECTOR V1, FXMVECTOR V2 )
+{
+ return _mm_dp_ps( V1, V2, 0x7f );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V )
+{
+ return SSE4::XMVector3Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+ return _mm_rsqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+ XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
+ return _mm_div_ps( g_XMOne, vLengthSq );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3LengthEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+ return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Length( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+ return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f );
+ XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+ return _mm_mul_ps(vResult, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector3Normalize( FXMVECTOR V )
+{
+ XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x7f );
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Create zero with a single instruction
+ XMVECTOR vZeroMask = _mm_setzero_ps();
+ // Test for a divide by zero (Must be FP to detect -0.0)
+ vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Divide to perform the normalization
+ vResult = _mm_div_ps(V,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vZeroMask);
+ // Select qnan or result based on infinite length
+ XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+ XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+ vResult = _mm_or_ps(vTemp1,vTemp2);
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Vector4
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMVector4Dot( FXMVECTOR V1, FXMVECTOR V2 )
+{
+ return _mm_dp_ps( V1, V2, 0xff );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V )
+{
+ return SSE4::XMVector4Dot(V, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+ return _mm_rsqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+ XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp );
+ return _mm_div_ps( g_XMOne, vLengthSq );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4LengthEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+ return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4Length( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+ return _mm_sqrt_ps( vTemp );
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst( FXMVECTOR V )
+{
+ XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff );
+ XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+ return _mm_mul_ps(vResult, V);
+}
+
+inline XMVECTOR XM_CALLCONV XMVector4Normalize( FXMVECTOR V )
+{
+ XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0xff );
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Create zero with a single instruction
+ XMVECTOR vZeroMask = _mm_setzero_ps();
+ // Test for a divide by zero (Must be FP to detect -0.0)
+ vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Divide to perform the normalization
+ vResult = _mm_div_ps(V,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vZeroMask);
+ // Select qnan or result based on infinite length
+ XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+ XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+ vResult = _mm_or_ps(vTemp1,vTemp2);
+ return vResult;
+}
+
+
+//-------------------------------------------------------------------------------------
+// Plane
+//-------------------------------------------------------------------------------------
+
+inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst( FXMVECTOR P )
+{
+ XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f );
+ XMVECTOR vResult = _mm_rsqrt_ps( vTemp );
+ return _mm_mul_ps(vResult, P);
+}
+
+inline XMVECTOR XM_CALLCONV XMPlaneNormalize( FXMVECTOR P )
+{
+ XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f );
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Reciprocal mul to perform the normalization
+ vResult = _mm_div_ps(P,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vLengthSq);
+ return vResult;
+}
+
+} // namespace SSE4
+
+} // namespace DirectX
diff --git a/Sdk/External/DirectXMath/HISTORY.md b/Sdk/External/DirectXMath/HISTORY.md
new file mode 100644
index 0000000..1d6f5fe
--- /dev/null
+++ b/Sdk/External/DirectXMath/HISTORY.md
@@ -0,0 +1,172 @@
+# DirectXMath
+
+https://github.com/Microsoft/DirectXMath
+
+Release available for download on [GitHub](https://github.com/microsoft/DirectXMath/releases)
+
+## Release History
+
+### August 2020 (3.16)
+* Added ``XMVectorLog10`` / ``XMVectorExp10``
+* Added ``XMColorRGBToYUV_UHD`` / ``XMColorYUVToRGB_UHD`` for Rec. 2020 YUV
+* Added optional ``rhcoords`` parameter for BoundingFrustum ``CreateFromMatrix``
+* Added use of Intel® Short Vector Matrix Library (SVML) supported by VS 2019
+ * Opt-in with ``_XM_SVML_INTRINSICS_``; opt-out with ``_XM_DISABLE_INTEL_SVML_``
+* Fixed denorm handling for ``XMConvertFloatToHalf``
+* Fixed flush (too small for denorm) handling for ``XMStoreFloat3PK``
+* Fixed clamping bug in ``XMStoreByteN4``
+* Cleaned up ARM-NEON intrinsics type issues for improved portability on GNUC
+* Fixed ``GXMVECTOR`` for x86 ``__vectorcall``
+* Code review
+
+### April 2020 (3.15)
+* Added ``XMMatrixVectorTensorProduct`` for creating a matrix from two vectors
+* Use of m256 registers and FMA3 with ``/arch:AVX2`` for stream and some matrix functions
+* Optimized load/stores for SSE2 float2 & float3 functions
+* Optimized some instruction choices for better AMD CPU support
+* Improved conformance for clang/LLVM, GCC, and MinGW compilers
+* Code review (``constexpr`` / ``noexcept`` usage)
+* Retired VS 2015 support
+
+### August 2019 (3.14)
+* Added float control around IsNan functions to resolve issue with VS 2019 with ``/fp:fast``
+* XMVerifyCPUSupport updated for clang/LLVM cpuid implementation on x86/x64
+* Added support for clang/LLVM built-in platform defines as well as the MSVC ones
+* Cleaned up ARM-NEON intrinsics type issues for improved portability
+* Removed unneeded malloc.h include in DirectXMath.h
+* Whitespace cleanup
+
+### July 2018 (3.13)
+* ``XMFLOAT3X4``, ``XMFLOAT3X4A``, and associated Load/Store functions
+* Move/copy constructors and assignment operators for C++ types
+* Minor fix for XMVectorClamp behavior with NaN
+* Fixed compilation warnings with VS 2017 (15.7 update), Intel C++ 18.0 compiler, and clang 6
+* Retired VS 2013 support
+* Minor code cleanup
+
+### February 2018 (3.12)
+* ARM64 use of fused multiply-accumulate intriniscs
+* Conformance fix for XMConvertFloatToHalf
+* Minor code cleanup
+
+### June 2017 (3.11)
+* AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose
+* AVX2 optimization for XMVectorSplatX
+* FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2)
+* Conformance fixes to support compilation with Clang 3.7
+
+### January 2017 (3.10)
+* Added XMVectorSum for horizontal adds
+* ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion)
+* Added SSE3 codepaths using opt-in ``_XM_SSE3_INTRINSICS_``
+* XMVectorRound fix for no-intrinsics to match round to nearest (even)
+* XMStoreFloat3SE fix when max channel isn't a perfect power of 2
+* constexpr conformance fix and workaround for compiler bug in VS 2015 RTM
+* Remove support for VS 2012 compilers
+* Remove ``__vector4i`` deprecated type
+
+### June 2016 (3.09)
+* Includes support for additional optimizations when built with /arch:AVX or /arch:AVX2
+* Added use of constexpr for type constructors, XMConvertToRadians, and XMConvertToDegrees
+* Marked ``__vector4i``, ``XMXDEC4``, ``XMDECN4``, ``XMDEC4``, and associated Load & Store functions as deprecated.
+ + These are vestiges of Xbox 360 support and will be removed in a future release
+* Renamed parameter in XMMatrixPerspectiveFov* to reduce user confusion when relying on IntelliSense
+* XMU565, XMUNIBBLE4 constructors take uint8_t instead of int8_t
+
+### May 2016
+* DirectXMath 3.08 released under the MIT license
+
+### November 2015 (3.08)
+* Added use of ``_mm_sfence`` for Stream methods
+* Fixed bug with non-uniform scaling transforms for BoundingOrientedBox
+* Added asserts for Near/FarZ in XMMatrix* methods
+* Added use of ``=default`` for PODs with VS 2013/2015
+* Additional SSE and ARM-NEON optimizations for PackedVector functions
+
+### April 2015 (3.07)
+* Fix customer reported bugs in BoundingBox methods
+* Fix customer reported bug in XMStoreFloat3SE
+* Fix customer reported bug in XMVectorATan2, XMVectorATan2Est
+* Fix customer reported bug in XMVectorRound
+
+### October 2013 (3.06)
+* Fixed load/store of XMFLOAT3SE to properly match the ``DXGI_FORMAT_R9G9B9E5_SHAREDEXP``
+* Added ``XMLoadUDecN4_XR`` and ``XMStoreUDecN4_XR`` to match ``DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM``
+* Added ``XMColorRGBToSRGB`` and ``XMColorSRGBToRGB`` to convert linear RGB <-> sRGB
+
+### July 2013 (3.05)
+* Use x86/x64 ``__vectorcall`` calling-convention when available (``XM_CALLCONV``, ``HXMVECTOR``, ``FXMMATRIX`` introduced)
+* Fixed bug with XMVectorFloor and XMVectorCeiling when given whole odd numbers (i.e. 105.0)
+* Improved XMVectorRound algorithm
+* ARM-NEON optimizations for XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE
+* ARM-NEON code paths use multiply-by-scalar intrinsics when supported
+* Additional optimizations for ARM-NEON Stream functions
+* Fixed potential warning C4723 using ``operator/`` or ``operator/=``
+
+### March 2013 (3.04)
+* ``XMVectorExp2``, ``XMVectorLog2``, ``XMVectorExpE``, and ``XMVectorLogE`` functions added to provide base-e support in addition to the existing base-2 support
+* ``XMVectorExp`` and ``XMVectorLog`` are now aliases for XMVectorExp2 and XMVectorLog2
+* Additional optimizations for Stream functions
+* XMVector3Cross now ensures w component is zero on ARM
+* XMConvertHalfToFloat and XMConvertFloatToHalf now use IEEE 754 standard float16 behavior for INF/QNAN
+* Updated matrix version Transform for BoundingOrientedBox and BoundingFrustum to handle scaling
+
+### March 2012 (3.03)
+* *breaking change* Removed union members from XMMATRIX type to make it a fully 'opaque' type
+* Marked single-parameter C++ constructors for XMFLOAT2, XMFLOAT2A, XMFLOAT3, XMFLOAT3A, XMFLOAT4, and XMFLOAT4A explicit
+
+### February 2012 (3.02)
+* ARM-NEON intrinsics (selected by default for the ARM platform)
+* Reworked XMVectorPermute, change of ``XM_PERMUTE_`` defines, removal of XMVectorPermuteControl
+* Addition of ``XM_SWIZZLE_`` defines
+* Optimizations for transcendental functions
+* Template forms for permute, swizzle, shift-left, rotate-left, rotation-right, and insert
+* Removal of deprecated types and functions
+ + ``XM_CACHE_LINE_SIZE`` define, XMVectorExpEst, XMVectorLogEst, XMVectorPowEst, XMVectorSinHEs, XMVectorCosHEst, XMVectorTanHEst, XMVector2InBoundsR, XMVector3InBoundsR, XMVector4InBoundsR
+* Removed ``XM_STRICT_VECTOR4``; XMVECTOR in NO-INTRINSICS always defined without .x, .y, .z, .w, .v, or .u
+* Additional bounding types
+* SAL fixes and improvements
+
+### September 2011 (3.00)
+* Renamed and reorganized the headers
+* Introduced C++ namespaces
+* Removed the Xbox 360-specific GPU types
+ + HENDN3, XMHEND3, XMUHENDN3, XMUHEND3, XMDHENN3, XMDHEN3, XMUDHENN3, XMUDHEN3, XMXICON4, XMXICO4, XMICON4, XMICO4, XMUICON4, XMUICO4
+
+### July 2012 (XNAMath 2.05)
+* Template forms have been added for `XMVectorPermute`, `XMVectorSwizzle`, `XMVectorShiftLeft`, `XMVectorRotateLeft`, `XMVectorRotateRight`, and `XMVectorInsert`
+* The `XM_STRICT_XMMATRIX` compilation define has been added for opaque `XMMATRIX`.
+* Stream stride and count arguments have been changed to `size_t`
+* The ``pDeterminant`` parameter of `XMMatrixInverse` is now optional
+* Additional operator= overloads for `XMBYTEN4`, `XMBYTE4`, `XMUBYTEN4`, and `XMUBYTE4` types are now available
+
+### February 2011 (XNAMath 2.04)
+* Addition of new data types and associated load-store functions:
+ + `XMBYTEN2, XMBYTE2, XMUBYTEN2, XMUBYTE2`
+ + `XMLoadByteN2, XMLoadByte2, XMLoadUByteN2, XMLoadUByte2`
+ + `XMStoreByteN2, XMStoreByte2, XMStoreUByteN2, XMStoreUByte2`
+ + `XMINT2, XMUINT2, XMINT3, XMUINT3, XMINT4, XMUINT4`
+ + `XMLoadSInt2, XMLoadUInt2, XMLoadSInt3, XMLoadUInt3, XMLoadSInt4, XMLoadUInt4`
+ + `XMStoreSInt2, XMStoreUInt2, XMStoreSInt3, XMStoreUInt3, XMStoreSInt4, XMStoreUInt4`
+* Marked most single-parameter C++ constructors with `explicit` keyword
+* Corrected range issues with SSE implementations of `XMVectorFloor` and `XMVectorCeiling`
+
+
+### June 2010 (XNAMath 2.03)
+* Addition of ``XMVectorDivide`` to optimize SSE2 vector division operations
+* Unified handling of floating-point specials between the Windows SSE2 and no-intrinsics implementations
+* Use of Visual Studio style SAL annotations
+* Modifications to the C++ declarations for `XMFLOAT2A/3A/4A/4X3A/4X4A` to better support these types in C++ templates
+
+### February 2010 (XNAMath 2.02)
+* Fixes to `XMStoreColor`, `XMQuaternionRotationMatrix`, `XMVectorATan2`, and `XMVectorATan2Est`
+
+### August 2009 (XNAMath 2.01)
+* Adds ``XM_STRICT_VECTOR4``. This opt-in directive disallows the usage of XboxMath-like member accessors such as .x, .y, and .z. This makes it easier to write portable XNA Math code.
+* Added conversion support for the following Windows graphics formats:
+ + 16-bit color formats (565, 555X, 5551)
+ + 4-bits per channel color formats (4444)
+ + Unique Direct3D 10/11 formats (``DXGI_FORMAT_R9G9B9E5_SHAREDEXP`` and ``DXGI_FORMAT_R11G11B10_FLOAT``)
+
+### March 2009 (XNAMath 2.00)
+* Initial release (based on the Xbox 360 Xbox math library)
diff --git a/Sdk/External/DirectXMath/Inc/DirectXCollision.h b/Sdk/External/DirectXMath/Inc/DirectXCollision.h
new file mode 100644
index 0000000..94777bd
--- /dev/null
+++ b/Sdk/External/DirectXMath/Inc/DirectXCollision.h
@@ -0,0 +1,353 @@
+//-------------------------------------------------------------------------------------
+// DirectXCollision.h -- C++ Collision Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+
+ enum ContainmentType
+ {
+ DISJOINT = 0,
+ INTERSECTS = 1,
+ CONTAINS = 2
+ };
+
+ enum PlaneIntersectionType
+ {
+ FRONT = 0,
+ INTERSECTING = 1,
+ BACK = 2
+ };
+
+ struct BoundingBox;
+ struct BoundingOrientedBox;
+ struct BoundingFrustum;
+
+#pragma warning(push)
+#pragma warning(disable:4324 4820)
+ // C4324: alignment padding warnings
+ // C4820: Off by default noise
+
+ //-------------------------------------------------------------------------------------
+ // Bounding sphere
+ //-------------------------------------------------------------------------------------
+ struct BoundingSphere
+ {
+ XMFLOAT3 Center; // Center of the sphere.
+ float Radius; // Radius of the sphere.
+
+ // Creators
+ BoundingSphere() noexcept : Center(0, 0, 0), Radius(1.f) {}
+
+ BoundingSphere(const BoundingSphere&) = default;
+ BoundingSphere& operator=(const BoundingSphere&) = default;
+
+ BoundingSphere(BoundingSphere&&) = default;
+ BoundingSphere& operator=(BoundingSphere&&) = default;
+
+ constexpr BoundingSphere(_In_ const XMFLOAT3& center, _In_ float radius) noexcept
+ : Center(center), Radius(radius) {}
+
+ // Methods
+ void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ FXMMATRIX M) const noexcept;
+ void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+ // Transform the sphere
+
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
+ ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+ bool Intersects(_In_ const BoundingBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ // Triangle-sphere test
+
+ PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
+ // Plane-sphere test
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+ // Ray-sphere test
+
+ ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+ // Test sphere against six planes (see BoundingFrustum::GetPlanes)
+
+ // Static methods
+ static void CreateMerged(_Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2) noexcept;
+
+ static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingBox& box) noexcept;
+ static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box) noexcept;
+
+ static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count,
+ _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
+
+ static void CreateFromFrustum(_Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr) noexcept;
+ };
+
+ //-------------------------------------------------------------------------------------
+ // Axis-aligned bounding box
+ //-------------------------------------------------------------------------------------
+ struct BoundingBox
+ {
+ static const size_t CORNER_COUNT = 8;
+
+ XMFLOAT3 Center; // Center of the box.
+ XMFLOAT3 Extents; // Distance from the center to each side.
+
+ // Creators
+ BoundingBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {}
+
+ BoundingBox(const BoundingBox&) = default;
+ BoundingBox& operator=(const BoundingBox&) = default;
+
+ BoundingBox(BoundingBox&&) = default;
+ BoundingBox& operator=(BoundingBox&&) = default;
+
+ constexpr BoundingBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents) noexcept
+ : Center(center), Extents(extents) {}
+
+ // Methods
+ void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ FXMMATRIX M) const noexcept;
+ void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+
+ void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
+ // Gets the 8 corners of the box
+
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
+ ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+ bool Intersects(_In_ const BoundingBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ // Triangle-Box test
+
+ PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
+ // Plane-box test
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+ // Ray-Box test
+
+ ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+ // Test box against six planes (see BoundingFrustum::GetPlanes)
+
+ // Static methods
+ static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2) noexcept;
+
+ static void CreateFromSphere(_Out_ BoundingBox& Out, _In_ const BoundingSphere& sh) noexcept;
+
+ static void XM_CALLCONV CreateFromPoints(_Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2) noexcept;
+ static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count,
+ _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
+ };
+
+ //-------------------------------------------------------------------------------------
+ // Oriented bounding box
+ //-------------------------------------------------------------------------------------
+ struct BoundingOrientedBox
+ {
+ static const size_t CORNER_COUNT = 8;
+
+ XMFLOAT3 Center; // Center of the box.
+ XMFLOAT3 Extents; // Distance from the center to each side.
+ XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world).
+
+ // Creators
+ BoundingOrientedBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {}
+
+ BoundingOrientedBox(const BoundingOrientedBox&) = default;
+ BoundingOrientedBox& operator=(const BoundingOrientedBox&) = default;
+
+ BoundingOrientedBox(BoundingOrientedBox&&) = default;
+ BoundingOrientedBox& operator=(BoundingOrientedBox&&) = default;
+
+ constexpr BoundingOrientedBox(_In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation) noexcept
+ : Center(_Center), Extents(_Extents), Orientation(_Orientation) {}
+
+ // Methods
+ void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M) const noexcept;
+ void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+
+ void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
+ // Gets the 8 corners of the box
+
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept;
+ ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+ bool Intersects(_In_ const BoundingBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ // Triangle-OrientedBox test
+
+ PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
+ // Plane-OrientedBox test
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+ // Ray-OrientedBox test
+
+ ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+ // Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
+
+ // Static methods
+ static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box) noexcept;
+
+ static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, _In_ size_t Count,
+ _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept;
+ };
+
+ //-------------------------------------------------------------------------------------
+ // Bounding frustum
+ //-------------------------------------------------------------------------------------
+ struct BoundingFrustum
+ {
+ static const size_t CORNER_COUNT = 8;
+
+ XMFLOAT3 Origin; // Origin of the frustum (and projection).
+ XMFLOAT4 Orientation; // Quaternion representing rotation.
+
+ float RightSlope; // Positive X (X/Z)
+ float LeftSlope; // Negative X
+ float TopSlope; // Positive Y (Y/Z)
+ float BottomSlope; // Negative Y
+ float Near, Far; // Z of the near plane and far plane.
+
+ // Creators
+ BoundingFrustum() noexcept :
+ Origin(0, 0, 0), Orientation(0, 0, 0, 1.f), RightSlope(1.f), LeftSlope(-1.f),
+ TopSlope(1.f), BottomSlope(-1.f), Near(0), Far(1.f) {}
+
+ BoundingFrustum(const BoundingFrustum&) = default;
+ BoundingFrustum& operator=(const BoundingFrustum&) = default;
+
+ BoundingFrustum(BoundingFrustum&&) = default;
+ BoundingFrustum& operator=(BoundingFrustum&&) = default;
+
+ constexpr BoundingFrustum(_In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation,
+ _In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope,
+ _In_ float _Near, _In_ float _Far) noexcept
+ : Origin(_Origin), Orientation(_Orientation),
+ RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope),
+ Near(_Near), Far(_Far) {}
+ BoundingFrustum(_In_ CXMMATRIX Projection, bool rhcoords = false) noexcept;
+
+ // Methods
+ void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX M) const noexcept;
+ void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept;
+
+ void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept;
+ // Gets the 8 corners of the frustum
+
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept;
+ ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ ContainmentType Contains(_In_ const BoundingSphere& sp) const noexcept;
+ ContainmentType Contains(_In_ const BoundingBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept;
+ ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept;
+ // Frustum-Frustum test
+
+ bool Intersects(_In_ const BoundingSphere& sh) const noexcept;
+ bool Intersects(_In_ const BoundingBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept;
+ bool Intersects(_In_ const BoundingFrustum& fr) const noexcept;
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept;
+ // Triangle-Frustum test
+
+ PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept;
+ // Plane-Frustum test
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept;
+ // Ray-Frustum test
+
+ ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept;
+ // Test frustum against six planes (see BoundingFrustum::GetPlanes)
+
+ void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
+ _Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane) const noexcept;
+ // Create 6 Planes representation of Frustum
+
+ // Static methods
+ static void XM_CALLCONV CreateFromMatrix(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection, bool rhcoords = false) noexcept;
+ };
+
+ //-----------------------------------------------------------------------------
+ // Triangle intersection testing routines.
+ //-----------------------------------------------------------------------------
+ namespace TriangleTests
+ {
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist) noexcept;
+ // Ray-Triangle
+
+ bool XM_CALLCONV Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2) noexcept;
+ // Triangle-Triangle
+
+ PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane) noexcept;
+ // Plane-Triangle
+
+ ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
+ _In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2,
+ _In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5) noexcept;
+ // Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
+ }
+
+#pragma warning(pop)
+
+ /****************************************************************************
+ *
+ * Implementation
+ *
+ ****************************************************************************/
+
+#pragma warning(push)
+#pragma warning(disable : 4068 4365 4616 6001)
+ // C4068/4616: ignore unknown pragmas
+ // C4365: Off by default noise
+ // C6001: False positives
+
+#ifdef _PREFAST_
+#pragma prefast(push)
+#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
+#pragma prefast(disable : 26495, "Union initialization confuses /analyze")
+#endif
+
+#include "DirectXCollision.inl"
+
+#ifdef _PREFAST_
+#pragma prefast(pop)
+#endif
+
+#pragma warning(pop)
+
+} // namespace DirectX
+
diff --git a/Sdk/External/DirectXMath/Inc/DirectXCollision.inl b/Sdk/External/DirectXMath/Inc/DirectXCollision.inl
new file mode 100644
index 0000000..c65ef54
--- /dev/null
+++ b/Sdk/External/DirectXMath/Inc/DirectXCollision.inl
@@ -0,0 +1,4816 @@
+//-------------------------------------------------------------------------------------
+// DirectXCollision.inl -- C++ Collision Math library
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// http://go.microsoft.com/fwlink/?LinkID=615560
+//-------------------------------------------------------------------------------------
+
+#pragma once
+
+XMGLOBALCONST XMVECTORF32 g_BoxOffset[8] =
+{
+ { { { -1.0f, -1.0f, 1.0f, 0.0f } } },
+ { { { 1.0f, -1.0f, 1.0f, 0.0f } } },
+ { { { 1.0f, 1.0f, 1.0f, 0.0f } } },
+ { { { -1.0f, 1.0f, 1.0f, 0.0f } } },
+ { { { -1.0f, -1.0f, -1.0f, 0.0f } } },
+ { { { 1.0f, -1.0f, -1.0f, 0.0f } } },
+ { { { 1.0f, 1.0f, -1.0f, 0.0f } } },
+ { { { -1.0f, 1.0f, -1.0f, 0.0f } } },
+};
+
+XMGLOBALCONST XMVECTORF32 g_RayEpsilon = { { { 1e-20f, 1e-20f, 1e-20f, 1e-20f } } };
+XMGLOBALCONST XMVECTORF32 g_RayNegEpsilon = { { { -1e-20f, -1e-20f, -1e-20f, -1e-20f } } };
+XMGLOBALCONST XMVECTORF32 g_FltMin = { { { -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX } } };
+XMGLOBALCONST XMVECTORF32 g_FltMax = { { { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX } } };
+
+namespace Internal
+{
+
+ //-----------------------------------------------------------------------------
+ // Return true if any of the elements of a 3 vector are equal to 0xffffffff.
+ // Slightly more efficient than using XMVector3EqualInt.
+ //-----------------------------------------------------------------------------
+ inline bool XMVector3AnyTrue(_In_ FXMVECTOR V) noexcept
+ {
+ // Duplicate the fourth element from the first element.
+ XMVECTOR C = XMVectorSwizzle(V);
+
+ return XMComparisonAnyTrue(XMVector4EqualIntR(C, XMVectorTrueInt()));
+ }
+
+
+ //-----------------------------------------------------------------------------
+ // Return true if all of the elements of a 3 vector are equal to 0xffffffff.
+ // Slightly more efficient than using XMVector3EqualInt.
+ //-----------------------------------------------------------------------------
+ inline bool XMVector3AllTrue(_In_ FXMVECTOR V) noexcept
+ {
+ // Duplicate the fourth element from the first element.
+ XMVECTOR C = XMVectorSwizzle(V);
+
+ return XMComparisonAllTrue(XMVector4EqualIntR(C, XMVectorTrueInt()));
+ }
+
+#if defined(_PREFAST_) || !defined(NDEBUG)
+
+ XMGLOBALCONST XMVECTORF32 g_UnitVectorEpsilon = { { { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f } } };
+ XMGLOBALCONST XMVECTORF32 g_UnitQuaternionEpsilon = { { { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f } } };
+ XMGLOBALCONST XMVECTORF32 g_UnitPlaneEpsilon = { { { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f } } };
+
+ //-----------------------------------------------------------------------------
+ // Return true if the vector is a unit vector (length == 1).
+ //-----------------------------------------------------------------------------
+ inline bool XMVector3IsUnit(_In_ FXMVECTOR V) noexcept
+ {
+ XMVECTOR Difference = XMVectorSubtract(XMVector3Length(V), XMVectorSplatOne());
+ return XMVector4Less(XMVectorAbs(Difference), g_UnitVectorEpsilon);
+ }
+
+ //-----------------------------------------------------------------------------
+ // Return true if the quaterion is a unit quaternion.
+ //-----------------------------------------------------------------------------
+ inline bool XMQuaternionIsUnit(_In_ FXMVECTOR Q) noexcept
+ {
+ XMVECTOR Difference = XMVectorSubtract(XMVector4Length(Q), XMVectorSplatOne());
+ return XMVector4Less(XMVectorAbs(Difference), g_UnitQuaternionEpsilon);
+ }
+
+ //-----------------------------------------------------------------------------
+ // Return true if the plane is a unit plane.
+ //-----------------------------------------------------------------------------
+ inline bool XMPlaneIsUnit(_In_ FXMVECTOR Plane) noexcept
+ {
+ XMVECTOR Difference = XMVectorSubtract(XMVector3Length(Plane), XMVectorSplatOne());
+ return XMVector4Less(XMVectorAbs(Difference), g_UnitPlaneEpsilon);
+ }
+
+#endif // _PREFAST_ || !NDEBUG
+
+ //-----------------------------------------------------------------------------
+ inline XMVECTOR XMPlaneTransform(_In_ FXMVECTOR Plane, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) noexcept
+ {
+ XMVECTOR vNormal = XMVector3Rotate(Plane, Rotation);
+ XMVECTOR vD = XMVectorSubtract(XMVectorSplatW(Plane), XMVector3Dot(vNormal, Translation));
+
+ return XMVectorInsert<0, 0, 0, 0, 1>(vNormal, vD);
+ }
+
+ //-----------------------------------------------------------------------------
+ // Return the point on the line segement (S1, S2) nearest the point P.
+ //-----------------------------------------------------------------------------
+ inline XMVECTOR PointOnLineSegmentNearestPoint(_In_ FXMVECTOR S1, _In_ FXMVECTOR S2, _In_ FXMVECTOR P) noexcept
+ {
+ XMVECTOR Dir = XMVectorSubtract(S2, S1);
+ XMVECTOR Projection = XMVectorSubtract(XMVector3Dot(P, Dir), XMVector3Dot(S1, Dir));
+ XMVECTOR LengthSq = XMVector3Dot(Dir, Dir);
+
+ XMVECTOR t = XMVectorMultiply(Projection, XMVectorReciprocal(LengthSq));
+ XMVECTOR Point = XMVectorMultiplyAdd(t, Dir, S1);
+
+ // t < 0
+ XMVECTOR SelectS1 = XMVectorLess(Projection, XMVectorZero());
+ Point = XMVectorSelect(Point, S1, SelectS1);
+
+ // t > 1
+ XMVECTOR SelectS2 = XMVectorGreater(Projection, LengthSq);
+ Point = XMVectorSelect(Point, S2, SelectS2);
+
+ return Point;
+ }
+
+ //-----------------------------------------------------------------------------
+ // Test if the point (P) on the plane of the triangle is inside the triangle
+ // (V0, V1, V2).
+ //-----------------------------------------------------------------------------
+ inline XMVECTOR XM_CALLCONV PointOnPlaneInsideTriangle(_In_ FXMVECTOR P, _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ GXMVECTOR V2) noexcept
+ {
+ // Compute the triangle normal.
+ XMVECTOR N = XMVector3Cross(XMVectorSubtract(V2, V0), XMVectorSubtract(V1, V0));
+
+ // Compute the cross products of the vector from the base of each edge to
+ // the point with each edge vector.
+ XMVECTOR C0 = XMVector3Cross(XMVectorSubtract(P, V0), XMVectorSubtract(V1, V0));
+ XMVECTOR C1 = XMVector3Cross(XMVectorSubtract(P, V1), XMVectorSubtract(V2, V1));
+ XMVECTOR C2 = XMVector3Cross(XMVectorSubtract(P, V2), XMVectorSubtract(V0, V2));
+
+ // If the cross product points in the same direction as the normal the the
+ // point is inside the edge (it is zero if is on the edge).
+ XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Inside0 = XMVectorGreaterOrEqual(XMVector3Dot(C0, N), Zero);
+ XMVECTOR Inside1 = XMVectorGreaterOrEqual(XMVector3Dot(C1, N), Zero);
+ XMVECTOR Inside2 = XMVectorGreaterOrEqual(XMVector3Dot(C2, N), Zero);
+
+ // If the point inside all of the edges it is inside.
+ return XMVectorAndInt(XMVectorAndInt(Inside0, Inside1), Inside2);
+ }
+
+ //-----------------------------------------------------------------------------
+ inline bool SolveCubic(_In_ float e, _In_ float f, _In_ float g, _Out_ float* t, _Out_ float* u, _Out_ float* v) noexcept
+ {
+ float p, q, h, rc, d, theta, costh3, sinth3;
+
+ p = f - e * e / 3.0f;
+ q = g - e * f / 3.0f + e * e * e * 2.0f / 27.0f;
+ h = q * q / 4.0f + p * p * p / 27.0f;
+
+ if (h > 0)
+ {
+ *t = *u = *v = 0.f;
+ return false; // only one real root
+ }
+
+ if ((h == 0) && (q == 0)) // all the same root
+ {
+ *t = -e / 3;
+ *u = -e / 3;
+ *v = -e / 3;
+
+ return true;
+ }
+
+ d = sqrtf(q * q / 4.0f - h);
+ if (d < 0)
+ rc = -powf(-d, 1.0f / 3.0f);
+ else
+ rc = powf(d, 1.0f / 3.0f);
+
+ theta = XMScalarACos(-q / (2.0f * d));
+ costh3 = XMScalarCos(theta / 3.0f);
+ sinth3 = sqrtf(3.0f) * XMScalarSin(theta / 3.0f);
+ *t = 2.0f * rc * costh3 - e / 3.0f;
+ *u = -rc * (costh3 + sinth3) - e / 3.0f;
+ *v = -rc * (costh3 - sinth3) - e / 3.0f;
+
+ return true;
+ }
+
+ //-----------------------------------------------------------------------------
+ inline XMVECTOR CalculateEigenVector(_In_ float m11, _In_ float m12, _In_ float m13,
+ _In_ float m22, _In_ float m23, _In_ float m33, _In_ float e) noexcept
+ {
+ float fTmp[3];
+ fTmp[0] = m12 * m23 - m13 * (m22 - e);
+ fTmp[1] = m13 * m12 - m23 * (m11 - e);
+ fTmp[2] = (m11 - e) * (m22 - e) - m12 * m12;
+
+ XMVECTOR vTmp = XMLoadFloat3(reinterpret_cast(fTmp));
+
+ if (XMVector3Equal(vTmp, XMVectorZero())) // planar or linear
+ {
+ float f1, f2, f3;
+
+ // we only have one equation - find a valid one
+ if ((m11 - e != 0) || (m12 != 0) || (m13 != 0))
+ {
+ f1 = m11 - e; f2 = m12; f3 = m13;
+ }
+ else if ((m12 != 0) || (m22 - e != 0) || (m23 != 0))
+ {
+ f1 = m12; f2 = m22 - e; f3 = m23;
+ }
+ else if ((m13 != 0) || (m23 != 0) || (m33 - e != 0))
+ {
+ f1 = m13; f2 = m23; f3 = m33 - e;
+ }
+ else
+ {
+ // error, we'll just make something up - we have NO context
+ f1 = 1.0f; f2 = 0.0f; f3 = 0.0f;
+ }
+
+ if (f1 == 0)
+ vTmp = XMVectorSetX(vTmp, 0.0f);
+ else
+ vTmp = XMVectorSetX(vTmp, 1.0f);
+
+ if (f2 == 0)
+ vTmp = XMVectorSetY(vTmp, 0.0f);
+ else
+ vTmp = XMVectorSetY(vTmp, 1.0f);
+
+ if (f3 == 0)
+ {
+ vTmp = XMVectorSetZ(vTmp, 0.0f);
+ // recalculate y to make equation work
+ if (m12 != 0)
+ vTmp = XMVectorSetY(vTmp, -f1 / f2);
+ }
+ else
+ {
+ vTmp = XMVectorSetZ(vTmp, (f2 - f1) / f3);
+ }
+ }
+
+ if (XMVectorGetX(XMVector3LengthSq(vTmp)) > 1e-5f)
+ {
+ return XMVector3Normalize(vTmp);
+ }
+ else
+ {
+ // Multiply by a value large enough to make the vector non-zero.
+ vTmp = XMVectorScale(vTmp, 1e5f);
+ return XMVector3Normalize(vTmp);
+ }
+ }
+
+ //-----------------------------------------------------------------------------
+ inline bool CalculateEigenVectors(_In_ float m11, _In_ float m12, _In_ float m13,
+ _In_ float m22, _In_ float m23, _In_ float m33,
+ _In_ float e1, _In_ float e2, _In_ float e3,
+ _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, _Out_ XMVECTOR* pV3) noexcept
+ {
+ *pV1 = DirectX::Internal::CalculateEigenVector(m11, m12, m13, m22, m23, m33, e1);
+ *pV2 = DirectX::Internal::CalculateEigenVector(m11, m12, m13, m22, m23, m33, e2);
+ *pV3 = DirectX::Internal::CalculateEigenVector(m11, m12, m13, m22, m23, m33, e3);
+
+ bool v1z = false;
+ bool v2z = false;
+ bool v3z = false;
+
+ XMVECTOR Zero = XMVectorZero();
+
+ if (XMVector3Equal(*pV1, Zero))
+ v1z = true;
+
+ if (XMVector3Equal(*pV2, Zero))
+ v2z = true;
+
+ if (XMVector3Equal(*pV3, Zero))
+ v3z = true;
+
+ bool e12 = (fabsf(XMVectorGetX(XMVector3Dot(*pV1, *pV2))) > 0.1f); // check for non-orthogonal vectors
+ bool e13 = (fabsf(XMVectorGetX(XMVector3Dot(*pV1, *pV3))) > 0.1f);
+ bool e23 = (fabsf(XMVectorGetX(XMVector3Dot(*pV2, *pV3))) > 0.1f);
+
+ if ((v1z && v2z && v3z) || (e12 && e13 && e23) ||
+ (e12 && v3z) || (e13 && v2z) || (e23 && v1z)) // all eigenvectors are 0- any basis set
+ {
+ *pV1 = g_XMIdentityR0.v;
+ *pV2 = g_XMIdentityR1.v;
+ *pV3 = g_XMIdentityR2.v;
+ return true;
+ }
+
+ if (v1z && v2z)
+ {
+ XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV3);
+ if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f)
+ {
+ vTmp = XMVector3Cross(g_XMIdentityR0, *pV3);
+ }
+ *pV1 = XMVector3Normalize(vTmp);
+ *pV2 = XMVector3Cross(*pV3, *pV1);
+ return true;
+ }
+
+ if (v3z && v1z)
+ {
+ XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV2);
+ if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f)
+ {
+ vTmp = XMVector3Cross(g_XMIdentityR0, *pV2);
+ }
+ *pV3 = XMVector3Normalize(vTmp);
+ *pV1 = XMVector3Cross(*pV2, *pV3);
+ return true;
+ }
+
+ if (v2z && v3z)
+ {
+ XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV1);
+ if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f)
+ {
+ vTmp = XMVector3Cross(g_XMIdentityR0, *pV1);
+ }
+ *pV2 = XMVector3Normalize(vTmp);
+ *pV3 = XMVector3Cross(*pV1, *pV2);
+ return true;
+ }
+
+ if ((v1z) || e12)
+ {
+ *pV1 = XMVector3Cross(*pV2, *pV3);
+ return true;
+ }
+
+ if ((v2z) || e23)
+ {
+ *pV2 = XMVector3Cross(*pV3, *pV1);
+ return true;
+ }
+
+ if ((v3z) || e13)
+ {
+ *pV3 = XMVector3Cross(*pV1, *pV2);
+ return true;
+ }
+
+ return true;
+ }
+
+ //-----------------------------------------------------------------------------
+ inline bool CalculateEigenVectorsFromCovarianceMatrix(_In_ float Cxx, _In_ float Cyy, _In_ float Czz,
+ _In_ float Cxy, _In_ float Cxz, _In_ float Cyz,
+ _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, _Out_ XMVECTOR* pV3) noexcept
+ {
+ // Calculate the eigenvalues by solving a cubic equation.
+ float e = -(Cxx + Cyy + Czz);
+ float f = Cxx * Cyy + Cyy * Czz + Czz * Cxx - Cxy * Cxy - Cxz * Cxz - Cyz * Cyz;
+ float g = Cxy * Cxy * Czz + Cxz * Cxz * Cyy + Cyz * Cyz * Cxx - Cxy * Cyz * Cxz * 2.0f - Cxx * Cyy * Czz;
+
+ float ev1, ev2, ev3;
+ if (!DirectX::Internal::SolveCubic(e, f, g, &ev1, &ev2, &ev3))
+ {
+ // set them to arbitrary orthonormal basis set
+ *pV1 = g_XMIdentityR0.v;
+ *pV2 = g_XMIdentityR1.v;
+ *pV3 = g_XMIdentityR2.v;
+ return false;
+ }
+
+ return DirectX::Internal::CalculateEigenVectors(Cxx, Cxy, Cxz, Cyy, Cyz, Czz, ev1, ev2, ev3, pV1, pV2, pV3);
+ }
+
+ //-----------------------------------------------------------------------------
+ inline void XM_CALLCONV FastIntersectTrianglePlane(
+ FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2,
+ GXMVECTOR Plane,
+ XMVECTOR& Outside, XMVECTOR& Inside) noexcept
+ {
+ // Plane0
+ XMVECTOR Dist0 = XMVector4Dot(V0, Plane);
+ XMVECTOR Dist1 = XMVector4Dot(V1, Plane);
+ XMVECTOR Dist2 = XMVector4Dot(V2, Plane);
+
+ XMVECTOR MinDist = XMVectorMin(Dist0, Dist1);
+ MinDist = XMVectorMin(MinDist, Dist2);
+
+ XMVECTOR MaxDist = XMVectorMax(Dist0, Dist1);
+ MaxDist = XMVectorMax(MaxDist, Dist2);
+
+ XMVECTOR Zero = XMVectorZero();
+
+ // Outside the plane?
+ Outside = XMVectorGreater(MinDist, Zero);
+
+ // Fully inside the plane?
+ Inside = XMVectorLess(MaxDist, Zero);
+ }
+
+ //-----------------------------------------------------------------------------
+ inline void FastIntersectSpherePlane(_In_ FXMVECTOR Center, _In_ FXMVECTOR Radius, _In_ FXMVECTOR Plane,
+ _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept
+ {
+ XMVECTOR Dist = XMVector4Dot(Center, Plane);
+
+ // Outside the plane?
+ Outside = XMVectorGreater(Dist, Radius);
+
+ // Fully inside the plane?
+ Inside = XMVectorLess(Dist, XMVectorNegate(Radius));
+ }
+
+ //-----------------------------------------------------------------------------
+ inline void FastIntersectAxisAlignedBoxPlane(_In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Plane,
+ _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept
+ {
+ // Compute the distance to the center of the box.
+ XMVECTOR Dist = XMVector4Dot(Center, Plane);
+
+ // Project the axes of the box onto the normal of the plane. Half the
+ // length of the projection (sometime called the "radius") is equal to
+ // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w))
+ // where h(i) are extents of the box, n is the plane normal, and b(i) are the
+ // axes of the box. In this case b(i) = [(1,0,0), (0,1,0), (0,0,1)].
+ XMVECTOR Radius = XMVector3Dot(Extents, XMVectorAbs(Plane));
+
+ // Outside the plane?
+ Outside = XMVectorGreater(Dist, Radius);
+
+ // Fully inside the plane?
+ Inside = XMVectorLess(Dist, XMVectorNegate(Radius));
+ }
+
+ //-----------------------------------------------------------------------------
+ inline void XM_CALLCONV FastIntersectOrientedBoxPlane(
+ _In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Axis0,
+ _In_ GXMVECTOR Axis1,
+ _In_ HXMVECTOR Axis2, _In_ HXMVECTOR Plane,
+ _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept
+ {
+ // Compute the distance to the center of the box.
+ XMVECTOR Dist = XMVector4Dot(Center, Plane);
+
+ // Project the axes of the box onto the normal of the plane. Half the
+ // length of the projection (sometime called the "radius") is equal to
+ // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w))
+ // where h(i) are extents of the box, n is the plane normal, and b(i) are the
+ // axes of the box.
+ XMVECTOR Radius = XMVector3Dot(Plane, Axis0);
+ Radius = XMVectorInsert<0, 0, 1, 0, 0>(Radius, XMVector3Dot(Plane, Axis1));
+ Radius = XMVectorInsert<0, 0, 0, 1, 0>(Radius, XMVector3Dot(Plane, Axis2));
+ Radius = XMVector3Dot(Extents, XMVectorAbs(Radius));
+
+ // Outside the plane?
+ Outside = XMVectorGreater(Dist, Radius);
+
+ // Fully inside the plane?
+ Inside = XMVectorLess(Dist, XMVectorNegate(Radius));
+ }
+
+ //-----------------------------------------------------------------------------
+ inline void XM_CALLCONV FastIntersectFrustumPlane(
+ _In_ FXMVECTOR Point0, _In_ FXMVECTOR Point1, _In_ FXMVECTOR Point2,
+ _In_ GXMVECTOR Point3,
+ _In_ HXMVECTOR Point4, _In_ HXMVECTOR Point5,
+ _In_ CXMVECTOR Point6, _In_ CXMVECTOR Point7, _In_ CXMVECTOR Plane,
+ _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept
+ {
+ // Find the min/max projection of the frustum onto the plane normal.
+ XMVECTOR Min, Max, Dist;
+
+ Min = Max = XMVector3Dot(Plane, Point0);
+
+ Dist = XMVector3Dot(Plane, Point1);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ Dist = XMVector3Dot(Plane, Point2);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ Dist = XMVector3Dot(Plane, Point3);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ Dist = XMVector3Dot(Plane, Point4);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ Dist = XMVector3Dot(Plane, Point5);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ Dist = XMVector3Dot(Plane, Point6);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ Dist = XMVector3Dot(Plane, Point7);
+ Min = XMVectorMin(Min, Dist);
+ Max = XMVectorMax(Max, Dist);
+
+ XMVECTOR PlaneDist = XMVectorNegate(XMVectorSplatW(Plane));
+
+ // Outside the plane?
+ Outside = XMVectorGreater(Min, PlaneDist);
+
+ // Fully inside the plane?
+ Inside = XMVectorLess(Max, PlaneDist);
+ }
+
+} // namespace Internal
+
+
+/****************************************************************************
+ *
+ * BoundingSphere
+ *
+ ****************************************************************************/
+
+ //-----------------------------------------------------------------------------
+ // Transform a sphere by an angle preserving transform.
+ //-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingSphere::Transform(BoundingSphere& Out, FXMMATRIX M) const noexcept
+{
+ // Load the center of the sphere.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+
+ // Transform the center of the sphere.
+ XMVECTOR C = XMVector3Transform(vCenter, M);
+
+ XMVECTOR dX = XMVector3Dot(M.r[0], M.r[0]);
+ XMVECTOR dY = XMVector3Dot(M.r[1], M.r[1]);
+ XMVECTOR dZ = XMVector3Dot(M.r[2], M.r[2]);
+
+ XMVECTOR d = XMVectorMax(dX, XMVectorMax(dY, dZ));
+
+ // Store the center sphere.
+ XMStoreFloat3(&Out.Center, C);
+
+ // Scale the radius of the pshere.
+ float Scale = sqrtf(XMVectorGetX(d));
+ Out.Radius = Radius * Scale;
+}
+
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingSphere::Transform(BoundingSphere& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept
+{
+ // Load the center of the sphere.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+
+ // Transform the center of the sphere.
+ vCenter = XMVectorAdd(XMVector3Rotate(XMVectorScale(vCenter, Scale), Rotation), Translation);
+
+ // Store the center sphere.
+ XMStoreFloat3(&Out.Center, vCenter);
+
+ // Scale the radius of the pshere.
+ Out.Radius = Radius * Scale;
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingSphere::Contains(FXMVECTOR Point) const noexcept
+{
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+
+ XMVECTOR DistanceSquared = XMVector3LengthSq(XMVectorSubtract(Point, vCenter));
+ XMVECTOR RadiusSquared = XMVectorMultiply(vRadius, vRadius);
+
+ return XMVector3LessOrEqual(DistanceSquared, RadiusSquared) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingSphere::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept
+{
+ if (!Intersects(V0, V1, V2))
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+ XMVECTOR RadiusSquared = XMVectorMultiply(vRadius, vRadius);
+
+ XMVECTOR DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V0, vCenter));
+ XMVECTOR Inside = XMVectorLessOrEqual(DistanceSquared, RadiusSquared);
+
+ DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V1, vCenter));
+ Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared));
+
+ DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V2, vCenter));
+ Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared));
+
+ return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere in sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains(const BoundingSphere& sh) const noexcept
+{
+ XMVECTOR Center1 = XMLoadFloat3(&Center);
+ float r1 = Radius;
+
+ XMVECTOR Center2 = XMLoadFloat3(&sh.Center);
+ float r2 = sh.Radius;
+
+ XMVECTOR V = XMVectorSubtract(Center2, Center1);
+
+ XMVECTOR Dist = XMVector3Length(V);
+
+ float d = XMVectorGetX(Dist);
+
+ return (r1 + r2 >= d) ? ((r1 - r2 >= d) ? CONTAINS : INTERSECTS) : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis-aligned box in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains(const BoundingBox& box) const noexcept
+{
+ if (!box.Intersects(*this))
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+ XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius);
+
+ XMVECTOR boxCenter = XMLoadFloat3(&box.Center);
+ XMVECTOR boxExtents = XMLoadFloat3(&box.Extents);
+
+ XMVECTOR InsideAll = XMVectorTrueInt();
+
+ XMVECTOR offset = XMVectorSubtract(boxCenter, vCenter);
+
+ for (size_t i = 0; i < BoundingBox::CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVectorMultiplyAdd(boxExtents, g_BoxOffset[i], offset);
+ XMVECTOR d = XMVector3LengthSq(C);
+ InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq));
+ }
+
+ return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented box in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains(const BoundingOrientedBox& box) const noexcept
+{
+ if (!box.Intersects(*this))
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+ XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius);
+
+ XMVECTOR boxCenter = XMLoadFloat3(&box.Center);
+ XMVECTOR boxExtents = XMLoadFloat3(&box.Extents);
+ XMVECTOR boxOrientation = XMLoadFloat4(&box.Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(boxOrientation));
+
+ XMVECTOR InsideAll = XMVectorTrueInt();
+
+ for (size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(boxExtents, g_BoxOffset[i]), boxOrientation), boxCenter);
+ XMVECTOR d = XMVector3LengthSq(XMVectorSubtract(vCenter, C));
+ InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq));
+ }
+
+ return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains(const BoundingFrustum& fr) const noexcept
+{
+ if (!fr.Intersects(*this))
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+ XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius);
+
+ XMVECTOR vOrigin = XMLoadFloat3(&fr.Origin);
+ XMVECTOR vOrientation = XMLoadFloat4(&fr.Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation));
+
+ // Build the corners of the frustum.
+ XMVECTOR vRightTop = XMVectorSet(fr.RightSlope, fr.TopSlope, 1.0f, 0.0f);
+ XMVECTOR vRightBottom = XMVectorSet(fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f);
+ XMVECTOR vLeftTop = XMVectorSet(fr.LeftSlope, fr.TopSlope, 1.0f, 0.0f);
+ XMVECTOR vLeftBottom = XMVectorSet(fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f);
+ XMVECTOR vNear = XMVectorReplicatePtr(&fr.Near);
+ XMVECTOR vFar = XMVectorReplicatePtr(&fr.Far);
+
+ XMVECTOR Corners[BoundingFrustum::CORNER_COUNT];
+ Corners[0] = XMVectorMultiply(vRightTop, vNear);
+ Corners[1] = XMVectorMultiply(vRightBottom, vNear);
+ Corners[2] = XMVectorMultiply(vLeftTop, vNear);
+ Corners[3] = XMVectorMultiply(vLeftBottom, vNear);
+ Corners[4] = XMVectorMultiply(vRightTop, vFar);
+ Corners[5] = XMVectorMultiply(vRightBottom, vFar);
+ Corners[6] = XMVectorMultiply(vLeftTop, vFar);
+ Corners[7] = XMVectorMultiply(vLeftBottom, vFar);
+
+ XMVECTOR InsideAll = XMVectorTrueInt();
+ for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVectorAdd(XMVector3Rotate(Corners[i], vOrientation), vOrigin);
+ XMVECTOR d = XMVector3LengthSq(XMVectorSubtract(vCenter, C));
+ InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq));
+ }
+
+ return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere vs. sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects(const BoundingSphere& sh) const noexcept
+{
+ // Load A.
+ XMVECTOR vCenterA = XMLoadFloat3(&Center);
+ XMVECTOR vRadiusA = XMVectorReplicatePtr(&Radius);
+
+ // Load B.
+ XMVECTOR vCenterB = XMLoadFloat3(&sh.Center);
+ XMVECTOR vRadiusB = XMVectorReplicatePtr(&sh.Radius);
+
+ // Distance squared between centers.
+ XMVECTOR Delta = XMVectorSubtract(vCenterB, vCenterA);
+ XMVECTOR DistanceSquared = XMVector3LengthSq(Delta);
+
+ // Sum of the radii squared.
+ XMVECTOR RadiusSquared = XMVectorAdd(vRadiusA, vRadiusB);
+ RadiusSquared = XMVectorMultiply(RadiusSquared, RadiusSquared);
+
+ return XMVector3LessOrEqual(DistanceSquared, RadiusSquared);
+}
+
+
+//-----------------------------------------------------------------------------
+// Box vs. sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects(const BoundingBox& box) const noexcept
+{
+ return box.Intersects(*this);
+}
+
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects(const BoundingOrientedBox& box) const noexcept
+{
+ return box.Intersects(*this);
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum vs. sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects(const BoundingFrustum& fr) const noexcept
+{
+ return fr.Intersects(*this);
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool XM_CALLCONV BoundingSphere::Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept
+{
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+
+ // Compute the plane of the triangle (has to be normalized).
+ XMVECTOR N = XMVector3Normalize(XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0)));
+
+ // Assert that the triangle is not degenerate.
+ assert(!XMVector3Equal(N, XMVectorZero()));
+
+ // Find the nearest feature on the triangle to the sphere.
+ XMVECTOR Dist = XMVector3Dot(XMVectorSubtract(vCenter, V0), N);
+
+ // If the center of the sphere is farther from the plane of the triangle than
+ // the radius of the sphere, then there cannot be an intersection.
+ XMVECTOR NoIntersection = XMVectorLess(Dist, XMVectorNegate(vRadius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Dist, vRadius));
+
+ // Project the center of the sphere onto the plane of the triangle.
+ XMVECTOR Point = XMVectorNegativeMultiplySubtract(N, Dist, vCenter);
+
+ // Is it inside all the edges? If so we intersect because the distance
+ // to the plane is less than the radius.
+ XMVECTOR Intersection = DirectX::Internal::PointOnPlaneInsideTriangle(Point, V0, V1, V2);
+
+ // Find the nearest point on each edge.
+ XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius);
+
+ // Edge 0,1
+ Point = DirectX::Internal::PointOnLineSegmentNearestPoint(V0, V1, vCenter);
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersection = XMVectorOrInt(Intersection, XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), RadiusSq));
+
+ // Edge 1,2
+ Point = DirectX::Internal::PointOnLineSegmentNearestPoint(V1, V2, vCenter);
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersection = XMVectorOrInt(Intersection, XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), RadiusSq));
+
+ // Edge 2,0
+ Point = DirectX::Internal::PointOnLineSegmentNearestPoint(V2, V0, vCenter);
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersection = XMVectorOrInt(Intersection, XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), RadiusSq));
+
+ return XMVector4EqualInt(XMVectorAndCInt(Intersection, NoIntersection), XMVectorTrueInt());
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere-plane intersection
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType XM_CALLCONV BoundingSphere::Intersects(FXMVECTOR Plane) const noexcept
+{
+ assert(DirectX::Internal::XMPlaneIsUnit(Plane));
+
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne());
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane, Outside, Inside);
+
+ // If the sphere is outside any plane it is outside.
+ if (XMVector4EqualInt(Outside, XMVectorTrueInt()))
+ return FRONT;
+
+ // If the sphere is inside all planes it is inside.
+ if (XMVector4EqualInt(Inside, XMVectorTrueInt()))
+ return BACK;
+
+ // The sphere is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Compute the intersection of a ray (Origin, Direction) with a sphere.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool XM_CALLCONV BoundingSphere::Intersects(FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept
+{
+ assert(DirectX::Internal::XMVector3IsUnit(Direction));
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+
+ // l is the vector from the ray origin to the center of the sphere.
+ XMVECTOR l = XMVectorSubtract(vCenter, Origin);
+
+ // s is the projection of the l onto the ray direction.
+ XMVECTOR s = XMVector3Dot(l, Direction);
+
+ XMVECTOR l2 = XMVector3Dot(l, l);
+
+ XMVECTOR r2 = XMVectorMultiply(vRadius, vRadius);
+
+ // m2 is squared distance from the center of the sphere to the projection.
+ XMVECTOR m2 = XMVectorNegativeMultiplySubtract(s, s, l2);
+
+ XMVECTOR NoIntersection;
+
+ // If the ray origin is outside the sphere and the center of the sphere is
+ // behind the ray origin there is no intersection.
+ NoIntersection = XMVectorAndInt(XMVectorLess(s, XMVectorZero()), XMVectorGreater(l2, r2));
+
+ // If the squared distance from the center of the sphere to the projection
+ // is greater than the radius squared the ray will miss the sphere.
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(m2, r2));
+
+ // The ray hits the sphere, compute the nearest intersection point.
+ XMVECTOR q = XMVectorSqrt(XMVectorSubtract(r2, m2));
+ XMVECTOR t1 = XMVectorSubtract(s, q);
+ XMVECTOR t2 = XMVectorAdd(s, q);
+
+ XMVECTOR OriginInside = XMVectorLessOrEqual(l2, r2);
+ XMVECTOR t = XMVectorSelect(t1, t2, OriginInside);
+
+ if (XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt()))
+ {
+ // Store the x-component to *pDist.
+ XMStoreFloat(&Dist, t);
+ return true;
+ }
+
+ Dist = 0.f;
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test a sphere vs 6 planes (typically forming a frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingSphere::ContainedBy(
+ FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2,
+ GXMVECTOR Plane3,
+ HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept
+{
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vRadius = XMVectorReplicatePtr(&Radius);
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne());
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane0, Outside, Inside);
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane1, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane2, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane3, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane4, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane5, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ // If the sphere is outside any plane it is outside.
+ if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt()))
+ return DISJOINT;
+
+ // If the sphere is inside all planes it is inside.
+ if (XMVector4EqualInt(AllInside, XMVectorTrueInt()))
+ return CONTAINS;
+
+ // The sphere is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Creates a bounding sphere that contains two other bounding spheres
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateMerged(BoundingSphere& Out, const BoundingSphere& S1, const BoundingSphere& S2) noexcept
+{
+ XMVECTOR Center1 = XMLoadFloat3(&S1.Center);
+ float r1 = S1.Radius;
+
+ XMVECTOR Center2 = XMLoadFloat3(&S2.Center);
+ float r2 = S2.Radius;
+
+ XMVECTOR V = XMVectorSubtract(Center2, Center1);
+
+ XMVECTOR Dist = XMVector3Length(V);
+
+ float d = XMVectorGetX(Dist);
+
+ if (r1 + r2 >= d)
+ {
+ if (r1 - r2 >= d)
+ {
+ Out = S1;
+ return;
+ }
+ else if (r2 - r1 >= d)
+ {
+ Out = S2;
+ return;
+ }
+ }
+
+ XMVECTOR N = XMVectorDivide(V, Dist);
+
+ float t1 = XMMin(-r1, d - r2);
+ float t2 = XMMax(r1, d + r2);
+ float t_5 = (t2 - t1) * 0.5f;
+
+ XMVECTOR NCenter = XMVectorAdd(Center1, XMVectorMultiply(N, XMVectorReplicate(t_5 + t1)));
+
+ XMStoreFloat3(&Out.Center, NCenter);
+ Out.Radius = t_5;
+}
+
+
+//-----------------------------------------------------------------------------
+// Create sphere enscribing bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromBoundingBox(BoundingSphere& Out, const BoundingBox& box) noexcept
+{
+ Out.Center = box.Center;
+ XMVECTOR vExtents = XMLoadFloat3(&box.Extents);
+ Out.Radius = XMVectorGetX(XMVector3Length(vExtents));
+}
+
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromBoundingBox(BoundingSphere& Out, const BoundingOrientedBox& box) noexcept
+{
+ // Bounding box orientation is irrelevant because a sphere is rotationally invariant
+ Out.Center = box.Center;
+ XMVECTOR vExtents = XMLoadFloat3(&box.Extents);
+ Out.Radius = XMVectorGetX(XMVector3Length(vExtents));
+}
+
+
+//-----------------------------------------------------------------------------
+// Find the approximate smallest enclosing bounding sphere for a set of
+// points. Exact computation of the smallest enclosing bounding sphere is
+// possible but is slower and requires a more complex algorithm.
+// The algorithm is based on Jack Ritter, "An Efficient Bounding Sphere",
+// Graphics Gems.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromPoints(BoundingSphere& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride) noexcept
+{
+ assert(Count > 0);
+ assert(pPoints);
+
+ // Find the points with minimum and maximum x, y, and z
+ XMVECTOR MinX, MaxX, MinY, MaxY, MinZ, MaxZ;
+
+ MinX = MaxX = MinY = MaxY = MinZ = MaxZ = XMLoadFloat3(pPoints);
+
+ for (size_t i = 1; i < Count; ++i)
+ {
+ XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride));
+
+ float px = XMVectorGetX(Point);
+ float py = XMVectorGetY(Point);
+ float pz = XMVectorGetZ(Point);
+
+ if (px < XMVectorGetX(MinX))
+ MinX = Point;
+
+ if (px > XMVectorGetX(MaxX))
+ MaxX = Point;
+
+ if (py < XMVectorGetY(MinY))
+ MinY = Point;
+
+ if (py > XMVectorGetY(MaxY))
+ MaxY = Point;
+
+ if (pz < XMVectorGetZ(MinZ))
+ MinZ = Point;
+
+ if (pz > XMVectorGetZ(MaxZ))
+ MaxZ = Point;
+ }
+
+ // Use the min/max pair that are farthest apart to form the initial sphere.
+ XMVECTOR DeltaX = XMVectorSubtract(MaxX, MinX);
+ XMVECTOR DistX = XMVector3Length(DeltaX);
+
+ XMVECTOR DeltaY = XMVectorSubtract(MaxY, MinY);
+ XMVECTOR DistY = XMVector3Length(DeltaY);
+
+ XMVECTOR DeltaZ = XMVectorSubtract(MaxZ, MinZ);
+ XMVECTOR DistZ = XMVector3Length(DeltaZ);
+
+ XMVECTOR vCenter;
+ XMVECTOR vRadius;
+
+ if (XMVector3Greater(DistX, DistY))
+ {
+ if (XMVector3Greater(DistX, DistZ))
+ {
+ // Use min/max x.
+ vCenter = XMVectorLerp(MaxX, MinX, 0.5f);
+ vRadius = XMVectorScale(DistX, 0.5f);
+ }
+ else
+ {
+ // Use min/max z.
+ vCenter = XMVectorLerp(MaxZ, MinZ, 0.5f);
+ vRadius = XMVectorScale(DistZ, 0.5f);
+ }
+ }
+ else // Y >= X
+ {
+ if (XMVector3Greater(DistY, DistZ))
+ {
+ // Use min/max y.
+ vCenter = XMVectorLerp(MaxY, MinY, 0.5f);
+ vRadius = XMVectorScale(DistY, 0.5f);
+ }
+ else
+ {
+ // Use min/max z.
+ vCenter = XMVectorLerp(MaxZ, MinZ, 0.5f);
+ vRadius = XMVectorScale(DistZ, 0.5f);
+ }
+ }
+
+ // Add any points not inside the sphere.
+ for (size_t i = 0; i < Count; ++i)
+ {
+ XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride));
+
+ XMVECTOR Delta = XMVectorSubtract(Point, vCenter);
+
+ XMVECTOR Dist = XMVector3Length(Delta);
+
+ if (XMVector3Greater(Dist, vRadius))
+ {
+ // Adjust sphere to include the new point.
+ vRadius = XMVectorScale(XMVectorAdd(vRadius, Dist), 0.5f);
+ vCenter = XMVectorAdd(vCenter, XMVectorMultiply(XMVectorSubtract(XMVectorReplicate(1.0f), XMVectorDivide(vRadius, Dist)), Delta));
+ }
+ }
+
+ XMStoreFloat3(&Out.Center, vCenter);
+ XMStoreFloat(&Out.Radius, vRadius);
+}
+
+
+//-----------------------------------------------------------------------------
+// Create sphere containing frustum
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromFrustum(BoundingSphere& Out, const BoundingFrustum& fr) noexcept
+{
+ XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT];
+ fr.GetCorners(Corners);
+ CreateFromPoints(Out, BoundingFrustum::CORNER_COUNT, Corners, sizeof(XMFLOAT3));
+}
+
+
+/****************************************************************************
+ *
+ * BoundingBox
+ *
+ ****************************************************************************/
+
+ //-----------------------------------------------------------------------------
+ // Transform an axis aligned box by an angle preserving transform.
+ //-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingBox::Transform(BoundingBox& Out, FXMMATRIX M) const noexcept
+{
+ // Load center and extents.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ // Compute and transform the corners and find new min/max bounds.
+ XMVECTOR Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[0], vCenter);
+ Corner = XMVector3Transform(Corner, M);
+
+ XMVECTOR Min, Max;
+ Min = Max = Corner;
+
+ for (size_t i = 1; i < CORNER_COUNT; ++i)
+ {
+ Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter);
+ Corner = XMVector3Transform(Corner, M);
+
+ Min = XMVectorMin(Min, Corner);
+ Max = XMVectorMax(Max, Corner);
+ }
+
+ // Store center and extents.
+ XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f));
+ XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f));
+}
+
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingBox::Transform(BoundingBox& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept
+{
+ assert(DirectX::Internal::XMQuaternionIsUnit(Rotation));
+
+ // Load center and extents.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ XMVECTOR VectorScale = XMVectorReplicate(Scale);
+
+ // Compute and transform the corners and find new min/max bounds.
+ XMVECTOR Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[0], vCenter);
+ Corner = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(Corner, VectorScale), Rotation), Translation);
+
+ XMVECTOR Min, Max;
+ Min = Max = Corner;
+
+ for (size_t i = 1; i < CORNER_COUNT; ++i)
+ {
+ Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter);
+ Corner = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(Corner, VectorScale), Rotation), Translation);
+
+ Min = XMVectorMin(Min, Corner);
+ Max = XMVectorMax(Max, Corner);
+ }
+
+ // Store center and extents.
+ XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f));
+ XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f));
+}
+
+
+//-----------------------------------------------------------------------------
+// Get the corner points of the box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::GetCorners(XMFLOAT3* Corners) const noexcept
+{
+ assert(Corners != nullptr);
+
+ // Load the box
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ for (size_t i = 0; i < CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter);
+ XMStoreFloat3(&Corners[i], C);
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingBox::Contains(FXMVECTOR Point) const noexcept
+{
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ return XMVector3InBounds(XMVectorSubtract(Point, vCenter), vExtents) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingBox::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept
+{
+ if (!Intersects(V0, V1, V2))
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ XMVECTOR d = XMVectorAbs(XMVectorSubtract(V0, vCenter));
+ XMVECTOR Inside = XMVectorLessOrEqual(d, vExtents);
+
+ d = XMVectorAbs(XMVectorSubtract(V1, vCenter));
+ Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents));
+
+ d = XMVectorAbs(XMVectorSubtract(V2, vCenter));
+ Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents));
+
+ return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains(const BoundingSphere& sh) const noexcept
+{
+ XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center);
+ XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius);
+
+ XMVECTOR BoxCenter = XMLoadFloat3(&Center);
+ XMVECTOR BoxExtents = XMLoadFloat3(&Extents);
+
+ XMVECTOR BoxMin = XMVectorSubtract(BoxCenter, BoxExtents);
+ XMVECTOR BoxMax = XMVectorAdd(BoxCenter, BoxExtents);
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess(SphereCenter, BoxMin);
+ XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxMax);
+
+ XMVECTOR MinDelta = XMVectorSubtract(SphereCenter, BoxMin);
+ XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxMax);
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect(d, MinDelta, LessThanMin);
+ d = XMVectorSelect(d, MaxDelta, GreaterThanMax);
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot(d, d);
+
+ if (XMVector3Greater(d2, XMVectorMultiply(SphereRadius, SphereRadius)))
+ return DISJOINT;
+
+ XMVECTOR InsideAll = XMVectorLessOrEqual(XMVectorAdd(BoxMin, SphereRadius), SphereCenter);
+ InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(SphereCenter, XMVectorSubtract(BoxMax, SphereRadius)));
+ InsideAll = XMVectorAndInt(InsideAll, XMVectorGreater(XMVectorSubtract(BoxMax, BoxMin), SphereRadius));
+
+ return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis-aligned box in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains(const BoundingBox& box) const noexcept
+{
+ XMVECTOR CenterA = XMLoadFloat3(&Center);
+ XMVECTOR ExtentsA = XMLoadFloat3(&Extents);
+
+ XMVECTOR CenterB = XMLoadFloat3(&box.Center);
+ XMVECTOR ExtentsB = XMLoadFloat3(&box.Extents);
+
+ XMVECTOR MinA = XMVectorSubtract(CenterA, ExtentsA);
+ XMVECTOR MaxA = XMVectorAdd(CenterA, ExtentsA);
+
+ XMVECTOR MinB = XMVectorSubtract(CenterB, ExtentsB);
+ XMVECTOR MaxB = XMVectorAdd(CenterB, ExtentsB);
+
+ // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then return false
+ XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(MinA, MaxB), XMVectorGreater(MinB, MaxA));
+
+ if (DirectX::Internal::XMVector3AnyTrue(Disjoint))
+ return DISJOINT;
+
+ // for each i in (x, y, z) if a_min(i) <= b_min(i) and b_max(i) <= a_max(i) then A contains B
+ XMVECTOR Inside = XMVectorAndInt(XMVectorLessOrEqual(MinA, MinB), XMVectorLessOrEqual(MaxB, MaxA));
+
+ return DirectX::Internal::XMVector3AllTrue(Inside) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented box in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains(const BoundingOrientedBox& box) const noexcept
+{
+ if (!box.Intersects(*this))
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ // Subtract off the AABB center to remove a subtract below
+ XMVECTOR oCenter = XMVectorSubtract(XMLoadFloat3(&box.Center), vCenter);
+
+ XMVECTOR oExtents = XMLoadFloat3(&box.Extents);
+ XMVECTOR oOrientation = XMLoadFloat4(&box.Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(oOrientation));
+
+ XMVECTOR Inside = XMVectorTrueInt();
+
+ for (size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(oExtents, g_BoxOffset[i]), oOrientation), oCenter);
+ XMVECTOR d = XMVectorAbs(C);
+ Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents));
+ }
+
+ return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains(const BoundingFrustum& fr) const noexcept
+{
+ if (!fr.Intersects(*this))
+ return DISJOINT;
+
+ XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT];
+ fr.GetCorners(Corners);
+
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ XMVECTOR Inside = XMVectorTrueInt();
+
+ for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i)
+ {
+ XMVECTOR Point = XMLoadFloat3(&Corners[i]);
+ XMVECTOR d = XMVectorAbs(XMVectorSubtract(Point, vCenter));
+ Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents));
+ }
+
+ return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere vs axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects(const BoundingSphere& sh) const noexcept
+{
+ XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center);
+ XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius);
+
+ XMVECTOR BoxCenter = XMLoadFloat3(&Center);
+ XMVECTOR BoxExtents = XMLoadFloat3(&Extents);
+
+ XMVECTOR BoxMin = XMVectorSubtract(BoxCenter, BoxExtents);
+ XMVECTOR BoxMax = XMVectorAdd(BoxCenter, BoxExtents);
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess(SphereCenter, BoxMin);
+ XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxMax);
+
+ XMVECTOR MinDelta = XMVectorSubtract(SphereCenter, BoxMin);
+ XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxMax);
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect(d, MinDelta, LessThanMin);
+ d = XMVectorSelect(d, MaxDelta, GreaterThanMax);
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot(d, d);
+
+ return XMVector3LessOrEqual(d2, XMVectorMultiply(SphereRadius, SphereRadius));
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis-aligned box vs. axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects(const BoundingBox& box) const noexcept
+{
+ XMVECTOR CenterA = XMLoadFloat3(&Center);
+ XMVECTOR ExtentsA = XMLoadFloat3(&Extents);
+
+ XMVECTOR CenterB = XMLoadFloat3(&box.Center);
+ XMVECTOR ExtentsB = XMLoadFloat3(&box.Extents);
+
+ XMVECTOR MinA = XMVectorSubtract(CenterA, ExtentsA);
+ XMVECTOR MaxA = XMVectorAdd(CenterA, ExtentsA);
+
+ XMVECTOR MinB = XMVectorSubtract(CenterB, ExtentsB);
+ XMVECTOR MaxB = XMVectorAdd(CenterB, ExtentsB);
+
+ // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then return false
+ XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(MinA, MaxB), XMVectorGreater(MinB, MaxA));
+
+ return !DirectX::Internal::XMVector3AnyTrue(Disjoint);
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented box vs. axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects(const BoundingOrientedBox& box) const noexcept
+{
+ return box.Intersects(*this);
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum vs. axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects(const BoundingFrustum& fr) const noexcept
+{
+ return fr.Intersects(*this);
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs. axis aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool XM_CALLCONV BoundingBox::Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept
+{
+ XMVECTOR Zero = XMVectorZero();
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ XMVECTOR BoxMin = XMVectorSubtract(vCenter, vExtents);
+ XMVECTOR BoxMax = XMVectorAdd(vCenter, vExtents);
+
+ // Test the axes of the box (in effect test the AAB against the minimal AAB
+ // around the triangle).
+ XMVECTOR TriMin = XMVectorMin(XMVectorMin(V0, V1), V2);
+ XMVECTOR TriMax = XMVectorMax(XMVectorMax(V0, V1), V2);
+
+ // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then disjoint
+ XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(TriMin, BoxMax), XMVectorGreater(BoxMin, TriMax));
+ if (DirectX::Internal::XMVector3AnyTrue(Disjoint))
+ return false;
+
+ // Test the plane of the triangle.
+ XMVECTOR Normal = XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0));
+ XMVECTOR Dist = XMVector3Dot(Normal, V0);
+
+ // Assert that the triangle is not degenerate.
+ assert(!XMVector3Equal(Normal, Zero));
+
+ // for each i in (x, y, z) if n(i) >= 0 then v_min(i)=b_min(i), v_max(i)=b_max(i)
+ // else v_min(i)=b_max(i), v_max(i)=b_min(i)
+ XMVECTOR NormalSelect = XMVectorGreater(Normal, Zero);
+ XMVECTOR V_Min = XMVectorSelect(BoxMax, BoxMin, NormalSelect);
+ XMVECTOR V_Max = XMVectorSelect(BoxMin, BoxMax, NormalSelect);
+
+ // if n dot v_min + d > 0 || n dot v_max + d < 0 then disjoint
+ XMVECTOR MinDist = XMVector3Dot(V_Min, Normal);
+ XMVECTOR MaxDist = XMVector3Dot(V_Max, Normal);
+
+ XMVECTOR NoIntersection = XMVectorGreater(MinDist, Dist);
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(MaxDist, Dist));
+
+ // Move the box center to zero to simplify the following tests.
+ XMVECTOR TV0 = XMVectorSubtract(V0, vCenter);
+ XMVECTOR TV1 = XMVectorSubtract(V1, vCenter);
+ XMVECTOR TV2 = XMVectorSubtract(V2, vCenter);
+
+ // Test the edge/edge axes (3*3).
+ XMVECTOR e0 = XMVectorSubtract(TV1, TV0);
+ XMVECTOR e1 = XMVectorSubtract(TV2, TV1);
+ XMVECTOR e2 = XMVectorSubtract(TV0, TV2);
+
+ // Make w zero.
+ e0 = XMVectorInsert<0, 0, 0, 0, 1>(e0, Zero);
+ e1 = XMVectorInsert<0, 0, 0, 0, 1>(e1, Zero);
+ e2 = XMVectorInsert<0, 0, 0, 0, 1>(e2, Zero);
+
+ XMVECTOR Axis;
+ XMVECTOR p0, p1, p2;
+ XMVECTOR Min, Max;
+ XMVECTOR Radius;
+
+ // Axis == (1,0,0) x e0 = (0, -e0.z, e0.y)
+ Axis = XMVectorPermute(e0, XMVectorNegate(e0));
+ p0 = XMVector3Dot(TV0, Axis);
+ // p1 = XMVector3Dot( V1, Axis ); // p1 = p0;
+ p2 = XMVector3Dot(TV2, Axis);
+ Min = XMVectorMin(p0, p2);
+ Max = XMVectorMax(p0, p2);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (1,0,0) x e1 = (0, -e1.z, e1.y)
+ Axis = XMVectorPermute(e1, XMVectorNegate(e1));
+ p0 = XMVector3Dot(TV0, Axis);
+ p1 = XMVector3Dot(TV1, Axis);
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p1;
+ Min = XMVectorMin(p0, p1);
+ Max = XMVectorMax(p0, p1);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (1,0,0) x e2 = (0, -e2.z, e2.y)
+ Axis = XMVectorPermute(e2, XMVectorNegate(e2));
+ p0 = XMVector3Dot(TV0, Axis);
+ p1 = XMVector3Dot(TV1, Axis);
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p0;
+ Min = XMVectorMin(p0, p1);
+ Max = XMVectorMax(p0, p1);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (0,1,0) x e0 = (e0.z, 0, -e0.x)
+ Axis = XMVectorPermute(e0, XMVectorNegate(e0));
+ p0 = XMVector3Dot(TV0, Axis);
+ // p1 = XMVector3Dot( V1, Axis ); // p1 = p0;
+ p2 = XMVector3Dot(TV2, Axis);
+ Min = XMVectorMin(p0, p2);
+ Max = XMVectorMax(p0, p2);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (0,1,0) x e1 = (e1.z, 0, -e1.x)
+ Axis = XMVectorPermute(e1, XMVectorNegate(e1));
+ p0 = XMVector3Dot(TV0, Axis);
+ p1 = XMVector3Dot(TV1, Axis);
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p1;
+ Min = XMVectorMin(p0, p1);
+ Max = XMVectorMax(p0, p1);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (0,0,1) x e2 = (e2.z, 0, -e2.x)
+ Axis = XMVectorPermute(e2, XMVectorNegate(e2));
+ p0 = XMVector3Dot(TV0, Axis);
+ p1 = XMVector3Dot(TV1, Axis);
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p0;
+ Min = XMVectorMin(p0, p1);
+ Max = XMVectorMax(p0, p1);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (0,0,1) x e0 = (-e0.y, e0.x, 0)
+ Axis = XMVectorPermute(e0, XMVectorNegate(e0));
+ p0 = XMVector3Dot(TV0, Axis);
+ // p1 = XMVector3Dot( V1, Axis ); // p1 = p0;
+ p2 = XMVector3Dot(TV2, Axis);
+ Min = XMVectorMin(p0, p2);
+ Max = XMVectorMax(p0, p2);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (0,0,1) x e1 = (-e1.y, e1.x, 0)
+ Axis = XMVectorPermute(e1, XMVectorNegate(e1));
+ p0 = XMVector3Dot(TV0, Axis);
+ p1 = XMVector3Dot(TV1, Axis);
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p1;
+ Min = XMVectorMin(p0, p1);
+ Max = XMVectorMax(p0, p1);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ // Axis == (0,0,1) x e2 = (-e2.y, e2.x, 0)
+ Axis = XMVectorPermute(e2, XMVectorNegate(e2));
+ p0 = XMVector3Dot(TV0, Axis);
+ p1 = XMVector3Dot(TV1, Axis);
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p0;
+ Min = XMVectorMin(p0, p1);
+ Max = XMVectorMax(p0, p1);
+ Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius));
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius)));
+
+ return XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt());
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType XM_CALLCONV BoundingBox::Intersects(FXMVECTOR Plane) const noexcept
+{
+ assert(DirectX::Internal::XMPlaneIsUnit(Plane));
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne());
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane, Outside, Inside);
+
+ // If the box is outside any plane it is outside.
+ if (XMVector4EqualInt(Outside, XMVectorTrueInt()))
+ return FRONT;
+
+ // If the box is inside all planes it is inside.
+ if (XMVector4EqualInt(Inside, XMVectorTrueInt()))
+ return BACK;
+
+ // The box is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Compute the intersection of a ray (Origin, Direction) with an axis aligned
+// box using the slabs method.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool XM_CALLCONV BoundingBox::Intersects(FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept
+{
+ assert(DirectX::Internal::XMVector3IsUnit(Direction));
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ // Adjust ray origin to be relative to center of the box.
+ XMVECTOR TOrigin = XMVectorSubtract(vCenter, Origin);
+
+ // Compute the dot product againt each axis of the box.
+ // Since the axii are (1,0,0), (0,1,0), (0,0,1) no computation is necessary.
+ XMVECTOR AxisDotOrigin = TOrigin;
+ XMVECTOR AxisDotDirection = Direction;
+
+ // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the slab.
+ XMVECTOR IsParallel = XMVectorLessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon);
+
+ // Test against all three axii simultaneously.
+ XMVECTOR InverseAxisDotDirection = XMVectorReciprocal(AxisDotDirection);
+ XMVECTOR t1 = XMVectorMultiply(XMVectorSubtract(AxisDotOrigin, vExtents), InverseAxisDotDirection);
+ XMVECTOR t2 = XMVectorMultiply(XMVectorAdd(AxisDotOrigin, vExtents), InverseAxisDotDirection);
+
+ // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+ // use the results from any directions parallel to the slab.
+ XMVECTOR t_min = XMVectorSelect(XMVectorMin(t1, t2), g_FltMin, IsParallel);
+ XMVECTOR t_max = XMVectorSelect(XMVectorMax(t1, t2), g_FltMax, IsParallel);
+
+ // t_min.x = maximum( t_min.x, t_min.y, t_min.z );
+ // t_max.x = minimum( t_max.x, t_max.y, t_max.z );
+ t_min = XMVectorMax(t_min, XMVectorSplatY(t_min)); // x = max(x,y)
+ t_min = XMVectorMax(t_min, XMVectorSplatZ(t_min)); // x = max(max(x,y),z)
+ t_max = XMVectorMin(t_max, XMVectorSplatY(t_max)); // x = min(x,y)
+ t_max = XMVectorMin(t_max, XMVectorSplatZ(t_max)); // x = min(min(x,y),z)
+
+ // if ( t_min > t_max ) return false;
+ XMVECTOR NoIntersection = XMVectorGreater(XMVectorSplatX(t_min), XMVectorSplatX(t_max));
+
+ // if ( t_max < 0.0f ) return false;
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(XMVectorSplatX(t_max), XMVectorZero()));
+
+ // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) return false;
+ XMVECTOR ParallelOverlap = XMVectorInBounds(AxisDotOrigin, vExtents);
+ NoIntersection = XMVectorOrInt(NoIntersection, XMVectorAndCInt(IsParallel, ParallelOverlap));
+
+ if (!DirectX::Internal::XMVector3AnyTrue(NoIntersection))
+ {
+ // Store the x-component to *pDist
+ XMStoreFloat(&Dist, t_min);
+ return true;
+ }
+
+ Dist = 0.f;
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test an axis alinged box vs 6 planes (typically forming a frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingBox::ContainedBy(
+ FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2,
+ GXMVECTOR Plane3,
+ HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept
+{
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne());
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane0, Outside, Inside);
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane1, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane2, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane3, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane4, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane5, Outside, Inside);
+ AnyOutside = XMVectorOrInt(AnyOutside, Outside);
+ AllInside = XMVectorAndInt(AllInside, Inside);
+
+ // If the box is outside any plane it is outside.
+ if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt()))
+ return DISJOINT;
+
+ // If the box is inside all planes it is inside.
+ if (XMVector4EqualInt(AllInside, XMVectorTrueInt()))
+ return CONTAINS;
+
+ // The box is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Create axis-aligned box that contains two other bounding boxes
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateMerged(BoundingBox& Out, const BoundingBox& b1, const BoundingBox& b2) noexcept
+{
+ XMVECTOR b1Center = XMLoadFloat3(&b1.Center);
+ XMVECTOR b1Extents = XMLoadFloat3(&b1.Extents);
+
+ XMVECTOR b2Center = XMLoadFloat3(&b2.Center);
+ XMVECTOR b2Extents = XMLoadFloat3(&b2.Extents);
+
+ XMVECTOR Min = XMVectorSubtract(b1Center, b1Extents);
+ Min = XMVectorMin(Min, XMVectorSubtract(b2Center, b2Extents));
+
+ XMVECTOR Max = XMVectorAdd(b1Center, b1Extents);
+ Max = XMVectorMax(Max, XMVectorAdd(b2Center, b2Extents));
+
+ assert(XMVector3LessOrEqual(Min, Max));
+
+ XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f));
+ XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f));
+}
+
+
+//-----------------------------------------------------------------------------
+// Create axis-aligned box that contains a bounding sphere
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateFromSphere(BoundingBox& Out, const BoundingSphere& sh) noexcept
+{
+ XMVECTOR spCenter = XMLoadFloat3(&sh.Center);
+ XMVECTOR shRadius = XMVectorReplicatePtr(&sh.Radius);
+
+ XMVECTOR Min = XMVectorSubtract(spCenter, shRadius);
+ XMVECTOR Max = XMVectorAdd(spCenter, shRadius);
+
+ assert(XMVector3LessOrEqual(Min, Max));
+
+ XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f));
+ XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f));
+}
+
+
+//-----------------------------------------------------------------------------
+// Create axis-aligned box from min/max points
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingBox::CreateFromPoints(BoundingBox& Out, FXMVECTOR pt1, FXMVECTOR pt2) noexcept
+{
+ XMVECTOR Min = XMVectorMin(pt1, pt2);
+ XMVECTOR Max = XMVectorMax(pt1, pt2);
+
+ // Store center and extents.
+ XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f));
+ XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f));
+}
+
+
+//-----------------------------------------------------------------------------
+// Find the minimum axis aligned bounding box containing a set of points.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateFromPoints(BoundingBox& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride) noexcept
+{
+ assert(Count > 0);
+ assert(pPoints);
+
+ // Find the minimum and maximum x, y, and z
+ XMVECTOR vMin, vMax;
+
+ vMin = vMax = XMLoadFloat3(pPoints);
+
+ for (size_t i = 1; i < Count; ++i)
+ {
+ XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride));
+
+ vMin = XMVectorMin(vMin, Point);
+ vMax = XMVectorMax(vMax, Point);
+ }
+
+ // Store center and extents.
+ XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(vMin, vMax), 0.5f));
+ XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(vMax, vMin), 0.5f));
+}
+
+
+/****************************************************************************
+ *
+ * BoundingOrientedBox
+ *
+ ****************************************************************************/
+
+ //-----------------------------------------------------------------------------
+ // Transform an oriented box by an angle preserving transform.
+ //-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingOrientedBox::Transform(BoundingOrientedBox& Out, FXMMATRIX M) const noexcept
+{
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+ XMVECTOR vOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation));
+
+ // Composite the box rotation and the transform rotation.
+ XMMATRIX nM;
+ nM.r[0] = XMVector3Normalize(M.r[0]);
+ nM.r[1] = XMVector3Normalize(M.r[1]);
+ nM.r[2] = XMVector3Normalize(M.r[2]);
+ nM.r[3] = g_XMIdentityR3;
+ XMVECTOR Rotation = XMQuaternionRotationMatrix(nM);
+ vOrientation = XMQuaternionMultiply(vOrientation, Rotation);
+
+ // Transform the center.
+ vCenter = XMVector3Transform(vCenter, M);
+
+ // Scale the box extents.
+ XMVECTOR dX = XMVector3Length(M.r[0]);
+ XMVECTOR dY = XMVector3Length(M.r[1]);
+ XMVECTOR dZ = XMVector3Length(M.r[2]);
+
+ XMVECTOR VectorScale = XMVectorSelect(dY, dX, g_XMSelect1000);
+ VectorScale = XMVectorSelect(dZ, VectorScale, g_XMSelect1100);
+ vExtents = XMVectorMultiply(vExtents, VectorScale);
+
+ // Store the box.
+ XMStoreFloat3(&Out.Center, vCenter);
+ XMStoreFloat3(&Out.Extents, vExtents);
+ XMStoreFloat4(&Out.Orientation, vOrientation);
+}
+
+_Use_decl_annotations_
+inline void XM_CALLCONV BoundingOrientedBox::Transform(BoundingOrientedBox& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept
+{
+ assert(DirectX::Internal::XMQuaternionIsUnit(Rotation));
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+ XMVECTOR vOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation));
+
+ // Composite the box rotation and the transform rotation.
+ vOrientation = XMQuaternionMultiply(vOrientation, Rotation);
+
+ // Transform the center.
+ XMVECTOR VectorScale = XMVectorReplicate(Scale);
+ vCenter = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(vCenter, VectorScale), Rotation), Translation);
+
+ // Scale the box extents.
+ vExtents = XMVectorMultiply(vExtents, VectorScale);
+
+ // Store the box.
+ XMStoreFloat3(&Out.Center, vCenter);
+ XMStoreFloat3(&Out.Extents, vExtents);
+ XMStoreFloat4(&Out.Orientation, vOrientation);
+}
+
+
+//-----------------------------------------------------------------------------
+// Get the corner points of the box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingOrientedBox::GetCorners(XMFLOAT3* Corners) const noexcept
+{
+ assert(Corners != nullptr);
+
+ // Load the box
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+ XMVECTOR vOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation));
+
+ for (size_t i = 0; i < CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(vExtents, g_BoxOffset[i]), vOrientation), vCenter);
+ XMStoreFloat3(&Corners[i], C);
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingOrientedBox::Contains(FXMVECTOR Point) const noexcept
+{
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+ XMVECTOR vOrientation = XMLoadFloat4(&Orientation);
+
+ // Transform the point to be local to the box.
+ XMVECTOR TPoint = XMVector3InverseRotate(XMVectorSubtract(Point, vCenter), vOrientation);
+
+ return XMVector3InBounds(TPoint, vExtents) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType XM_CALLCONV BoundingOrientedBox::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept
+{
+ // Load the box center & orientation.
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vOrientation = XMLoadFloat4(&Orientation);
+
+ // Transform the triangle vertices into the space of the box.
+ XMVECTOR TV0 = XMVector3InverseRotate(XMVectorSubtract(V0, vCenter), vOrientation);
+ XMVECTOR TV1 = XMVector3InverseRotate(XMVectorSubtract(V1, vCenter), vOrientation);
+ XMVECTOR TV2 = XMVector3InverseRotate(XMVectorSubtract(V2, vCenter), vOrientation);
+
+ BoundingBox box;
+ box.Center = XMFLOAT3(0.0f, 0.0f, 0.0f);
+ box.Extents = Extents;
+
+ // Use the triangle vs axis aligned box intersection routine.
+ return box.Contains(TV0, TV1, TV2);
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains(const BoundingSphere& sh) const noexcept
+{
+ XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center);
+ XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius);
+
+ XMVECTOR BoxCenter = XMLoadFloat3(&Center);
+ XMVECTOR BoxExtents = XMLoadFloat3(&Extents);
+ XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation));
+
+ // Transform the center of the sphere to be local to the box.
+ // BoxMin = -BoxExtents
+ // BoxMax = +BoxExtents
+ SphereCenter = XMVector3InverseRotate(XMVectorSubtract(SphereCenter, BoxCenter), BoxOrientation);
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess(SphereCenter, XMVectorNegate(BoxExtents));
+ XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxExtents);
+
+ XMVECTOR MinDelta = XMVectorAdd(SphereCenter, BoxExtents);
+ XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxExtents);
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect(d, MinDelta, LessThanMin);
+ d = XMVectorSelect(d, MaxDelta, GreaterThanMax);
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot(d, d);
+ XMVECTOR SphereRadiusSq = XMVectorMultiply(SphereRadius, SphereRadius);
+
+ if (XMVector4Greater(d2, SphereRadiusSq))
+ return DISJOINT;
+
+ // See if we are completely inside the box
+ XMVECTOR SMin = XMVectorSubtract(SphereCenter, SphereRadius);
+ XMVECTOR SMax = XMVectorAdd(SphereCenter, SphereRadius);
+
+ return (XMVector3InBounds(SMin, BoxExtents) && XMVector3InBounds(SMax, BoxExtents)) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis aligned box vs. oriented box. Constructs an oriented box and uses
+// the oriented box vs. oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains(const BoundingBox& box) const noexcept
+{
+ // Make the axis aligned box oriented and do an OBB vs OBB test.
+ BoundingOrientedBox obox(box.Center, box.Extents, XMFLOAT4(0.f, 0.f, 0.f, 1.f));
+ return Contains(obox);
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented bounding box in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains(const BoundingOrientedBox& box) const noexcept
+{
+ if (!Intersects(box))
+ return DISJOINT;
+
+ // Load the boxes
+ XMVECTOR aCenter = XMLoadFloat3(&Center);
+ XMVECTOR aExtents = XMLoadFloat3(&Extents);
+ XMVECTOR aOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(aOrientation));
+
+ XMVECTOR bCenter = XMLoadFloat3(&box.Center);
+ XMVECTOR bExtents = XMLoadFloat3(&box.Extents);
+ XMVECTOR bOrientation = XMLoadFloat4(&box.Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(bOrientation));
+
+ XMVECTOR offset = XMVectorSubtract(bCenter, aCenter);
+
+ for (size_t i = 0; i < CORNER_COUNT; ++i)
+ {
+ // Cb = rotate( bExtents * corneroffset[i], bOrientation ) + bcenter
+ // Ca = invrotate( Cb - aCenter, aOrientation )
+
+ XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(bExtents, g_BoxOffset[i]), bOrientation), offset);
+ C = XMVector3InverseRotate(C, aOrientation);
+
+ if (!XMVector3InBounds(C, aExtents))
+ return INTERSECTS;
+ }
+
+ return CONTAINS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains(const BoundingFrustum& fr) const noexcept
+{
+ if (!fr.Intersects(*this))
+ return DISJOINT;
+
+ XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT];
+ fr.GetCorners(Corners);
+
+ // Load the box
+ XMVECTOR vCenter = XMLoadFloat3(&Center);
+ XMVECTOR vExtents = XMLoadFloat3(&Extents);
+ XMVECTOR vOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation));
+
+ for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i)
+ {
+ XMVECTOR C = XMVector3InverseRotate(XMVectorSubtract(XMLoadFloat3(&Corners[i]), vCenter), vOrientation);
+
+ if (!XMVector3InBounds(C, vExtents))
+ return INTERSECTS;
+ }
+
+ return CONTAINS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere vs. oriented box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects(const BoundingSphere& sh) const noexcept
+{
+ XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center);
+ XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius);
+
+ XMVECTOR BoxCenter = XMLoadFloat3(&Center);
+ XMVECTOR BoxExtents = XMLoadFloat3(&Extents);
+ XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation));
+
+ // Transform the center of the sphere to be local to the box.
+ // BoxMin = -BoxExtents
+ // BoxMax = +BoxExtents
+ SphereCenter = XMVector3InverseRotate(XMVectorSubtract(SphereCenter, BoxCenter), BoxOrientation);
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess(SphereCenter, XMVectorNegate(BoxExtents));
+ XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxExtents);
+
+ XMVECTOR MinDelta = XMVectorAdd(SphereCenter, BoxExtents);
+ XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxExtents);
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect(d, MinDelta, LessThanMin);
+ d = XMVectorSelect(d, MaxDelta, GreaterThanMax);
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot(d, d);
+
+ return XMVector4LessOrEqual(d2, XMVectorMultiply(SphereRadius, SphereRadius)) ? true : false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis aligned box vs. oriented box. Constructs an oriented box and uses
+// the oriented box vs. oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects(const BoundingBox& box) const noexcept
+{
+ // Make the axis aligned box oriented and do an OBB vs OBB test.
+ BoundingOrientedBox obox(box.Center, box.Extents, XMFLOAT4(0.f, 0.f, 0.f, 1.f));
+ return Intersects(obox);
+}
+
+
+//-----------------------------------------------------------------------------
+// Fast oriented box / oriented box intersection test using the separating axis
+// theorem.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects(const BoundingOrientedBox& box) const noexcept
+{
+ // Build the 3x3 rotation matrix that defines the orientation of B relative to A.
+ XMVECTOR A_quat = XMLoadFloat4(&Orientation);
+ XMVECTOR B_quat = XMLoadFloat4(&box.Orientation);
+
+ assert(DirectX::Internal::XMQuaternionIsUnit(A_quat));
+ assert(DirectX::Internal::XMQuaternionIsUnit(B_quat));
+
+ XMVECTOR Q = XMQuaternionMultiply(A_quat, XMQuaternionConjugate(B_quat));
+ XMMATRIX R = XMMatrixRotationQuaternion(Q);
+
+ // Compute the translation of B relative to A.
+ XMVECTOR A_cent = XMLoadFloat3(&Center);
+ XMVECTOR B_cent = XMLoadFloat3(&box.Center);
+ XMVECTOR t = XMVector3InverseRotate(XMVectorSubtract(B_cent, A_cent), A_quat);
+
+ //
+ // h(A) = extents of A.
+ // h(B) = extents of B.
+ //
+ // a(u) = axes of A = (1,0,0), (0,1,0), (0,0,1)
+ // b(u) = axes of B relative to A = (r00,r10,r20), (r01,r11,r21), (r02,r12,r22)
+ //
+ // For each possible separating axis l:
+ // d(A) = sum (for i = u,v,w) h(A)(i) * abs( a(i) dot l )
+ // d(B) = sum (for i = u,v,w) h(B)(i) * abs( b(i) dot l )
+ // if abs( t dot l ) > d(A) + d(B) then disjoint
+ //
+
+ // Load extents of A and B.
+ XMVECTOR h_A = XMLoadFloat3(&Extents);
+ XMVECTOR h_B = XMLoadFloat3(&box.Extents);
+
+ // Rows. Note R[0,1,2]X.w = 0.
+ XMVECTOR R0X = R.r[0];
+ XMVECTOR R1X = R.r[1];
+ XMVECTOR R2X = R.r[2];
+
+ R = XMMatrixTranspose(R);
+
+ // Columns. Note RX[0,1,2].w = 0.
+ XMVECTOR RX0 = R.r[0];
+ XMVECTOR RX1 = R.r[1];
+ XMVECTOR RX2 = R.r[2];
+
+ // Absolute value of rows.
+ XMVECTOR AR0X = XMVectorAbs(R0X);
+ XMVECTOR AR1X = XMVectorAbs(R1X);
+ XMVECTOR AR2X = XMVectorAbs(R2X);
+
+ // Absolute value of columns.
+ XMVECTOR ARX0 = XMVectorAbs(RX0);
+ XMVECTOR ARX1 = XMVectorAbs(RX1);
+ XMVECTOR ARX2 = XMVectorAbs(RX2);
+
+ // Test each of the 15 possible seperating axii.
+ XMVECTOR d, d_A, d_B;
+
+ // l = a(u) = (1, 0, 0)
+ // t dot l = t.x
+ // d(A) = h(A).x
+ // d(B) = h(B) dot abs(r00, r01, r02)
+ d = XMVectorSplatX(t);
+ d_A = XMVectorSplatX(h_A);
+ d_B = XMVector3Dot(h_B, AR0X);
+ XMVECTOR NoIntersection = XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B));
+
+ // l = a(v) = (0, 1, 0)
+ // t dot l = t.y
+ // d(A) = h(A).y
+ // d(B) = h(B) dot abs(r10, r11, r12)
+ d = XMVectorSplatY(t);
+ d_A = XMVectorSplatY(h_A);
+ d_B = XMVector3Dot(h_B, AR1X);
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = a(w) = (0, 0, 1)
+ // t dot l = t.z
+ // d(A) = h(A).z
+ // d(B) = h(B) dot abs(r20, r21, r22)
+ d = XMVectorSplatZ(t);
+ d_A = XMVectorSplatZ(h_A);
+ d_B = XMVector3Dot(h_B, AR2X);
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = b(u) = (r00, r10, r20)
+ // d(A) = h(A) dot abs(r00, r10, r20)
+ // d(B) = h(B).x
+ d = XMVector3Dot(t, RX0);
+ d_A = XMVector3Dot(h_A, ARX0);
+ d_B = XMVectorSplatX(h_B);
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = b(v) = (r01, r11, r21)
+ // d(A) = h(A) dot abs(r01, r11, r21)
+ // d(B) = h(B).y
+ d = XMVector3Dot(t, RX1);
+ d_A = XMVector3Dot(h_A, ARX1);
+ d_B = XMVectorSplatY(h_B);
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = b(w) = (r02, r12, r22)
+ // d(A) = h(A) dot abs(r02, r12, r22)
+ // d(B) = h(B).z
+ d = XMVector3Dot(t, RX2);
+ d_A = XMVector3Dot(h_A, ARX2);
+ d_B = XMVectorSplatZ(h_B);
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = a(u) x b(u) = (0, -r20, r10)
+ // d(A) = h(A) dot abs(0, r20, r10)
+ // d(B) = h(B) dot abs(0, r02, r01)
+ d = XMVector3Dot(t, XMVectorPermute(RX0, XMVectorNegate(RX0)));
+ d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX0));
+ d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR0X));
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = a(u) x b(v) = (0, -r21, r11)
+ // d(A) = h(A) dot abs(0, r21, r11)
+ // d(B) = h(B) dot abs(r02, 0, r00)
+ d = XMVector3Dot(t, XMVectorPermute(RX1, XMVectorNegate(RX1)));
+ d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX1));
+ d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR0X));
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = a(u) x b(w) = (0, -r22, r12)
+ // d(A) = h(A) dot abs(0, r22, r12)
+ // d(B) = h(B) dot abs(r01, r00, 0)
+ d = XMVector3Dot(t, XMVectorPermute(RX2, XMVectorNegate(RX2)));
+ d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX2));
+ d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR0X));
+ NoIntersection = XMVectorOrInt(NoIntersection,
+ XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)));
+
+ // l = a(v) x b(u) = (r20, 0, -r00)
+ // d(A) = h(A) dot abs(r20, 0, r00)
+ // d(B) = h(B) dot abs(0, r12, r11)
+ d = XMVector3Dot(t, XMVectorPermute(RX0, XMVectorNegate(RX0)));
+ d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX0));
+ d_B = XMVector3Dot(h_B, XMVectorSwizzle