From 71b675f5cbca450477badcc03f088622e1b037fd Mon Sep 17 00:00:00 2001 From: Nick Blakely Date: Sat, 23 Jan 2021 15:40:09 -0800 Subject: [PATCH] Update to new SDK. --- GPatch/CopyBinaries.bat | 19 - GPatch/DotNetChecker.nsh | 127 - GPatch/GPatch.ico | Bin 2238 -> 0 bytes GPatch/GPatch.nsi | 128 - GPatch/Launcher/GPatch.ico | Bin 2238 -> 0 bytes GPatch/Launcher/Launcher.nsi | 75 - .../DirectXMath/.nuget/directxmath.nuspec | 31 + .../DirectXMath/.nuget/directxmath.targets | 11 + Sdk/External/DirectXMath/.nuget/icon.jpg | Bin 0 -> 3479 bytes .../DirectXMath/.nuget/signconfig.xml | 6 + .../DirectXMath/Extensions/DirectXMathAVX.h | 275 + .../DirectXMath/Extensions/DirectXMathAVX2.h | 1037 + .../DirectXMath/Extensions/DirectXMathBE.h | 95 + .../DirectXMath/Extensions/DirectXMathF16C.h | 471 + .../DirectXMath/Extensions/DirectXMathFMA3.h | 391 + .../DirectXMath/Extensions/DirectXMathFMA4.h | 415 + .../DirectXMath/Extensions/DirectXMathSSE3.h | 111 + .../DirectXMath/Extensions/DirectXMathSSE4.h | 417 + Sdk/External/DirectXMath/HISTORY.md | 172 + .../DirectXMath/Inc/DirectXCollision.h | 353 + .../DirectXMath/Inc/DirectXCollision.inl | 4816 ++++ Sdk/External/DirectXMath/Inc/DirectXColors.h | 165 + Sdk/External/DirectXMath/Inc/DirectXMath.h | 2242 ++ .../DirectXMath/Inc/DirectXMathConvert.inl | 2187 ++ .../DirectXMath/Inc/DirectXMathMatrix.inl | 3422 +++ .../DirectXMath/Inc/DirectXMathMisc.inl | 2452 ++ .../DirectXMath/Inc/DirectXMathVector.inl | 14819 ++++++++++++ .../DirectXMath/Inc/DirectXPackedVector.h | 1216 + .../DirectXMath/Inc/DirectXPackedVector.inl | 4438 ++++ Sdk/External/DirectXMath/LICENSE | 21 + Sdk/External/DirectXMath/README.md | 78 + Sdk/External/DirectXMath/SECURITY.md | 41 + Sdk/External/DirectXMath/SHMath/DirectXSH.cpp | 4905 ++++ Sdk/External/DirectXMath/SHMath/DirectXSH.h | 72 + .../DirectXMath/SHMath/DirectXSHD3D11.cpp | 376 + .../DirectXMath/SHMath/DirectXSHD3D12.cpp | 334 + .../Stereo3D/Stereo3DMatrixHelper.cpp | 257 + .../Stereo3D/Stereo3DMatrixHelper.h | 64 + Sdk/External/DirectXMath/XDSP/XDSP.h | 813 + Sdk/External/DirectXTK/.editorconfig | 9 + .../.nuget/directxtk_desktop_2017.nuspec | 78 + .../.nuget/directxtk_desktop_2017.targets | 29 + .../.nuget/directxtk_desktop_win10.nuspec | 73 + .../.nuget/directxtk_desktop_win10.targets | 29 + .../DirectXTK/.nuget/directxtk_uwp.nuspec | 77 + .../DirectXTK/.nuget/directxtk_uwp.targets | 29 + Sdk/External/DirectXTK/.nuget/icon.jpg | Bin 0 -> 3479 bytes .../DirectXTK/.nuget/signconfig_desktop.xml | 11 + .../DirectXTK/.nuget/signconfig_uwp.xml | 6 + Sdk/External/DirectXTK/.nuget/versioninfo.ps1 | 6 + Sdk/External/DirectXTK/Audio/AudioEngine.cpp | 1653 ++ .../DirectXTKAudio_Desktop_2017_Win7.vcxproj | 207 + ...XTKAudio_Desktop_2017_Win7.vcxproj.filters | 59 + .../DirectXTKAudio_Desktop_2017_Win8.vcxproj | 197 + ...XTKAudio_Desktop_2017_Win8.vcxproj.filters | 56 + .../DirectXTKAudio_Desktop_2019_Win7.vcxproj | 211 + ...XTKAudio_Desktop_2019_Win7.vcxproj.filters | 59 + .../DirectXTKAudio_Desktop_2019_Win8.vcxproj | 201 + ...XTKAudio_Desktop_2019_Win8.vcxproj.filters | 56 + .../Audio/DynamicSoundEffectInstance.cpp | 387 + Sdk/External/DirectXTK/Audio/SoundCommon.cpp | 798 + Sdk/External/DirectXTK/Audio/SoundCommon.h | 383 + Sdk/External/DirectXTK/Audio/SoundEffect.cpp | 622 + .../DirectXTK/Audio/SoundEffectInstance.cpp | 341 + .../DirectXTK/Audio/SoundStreamInstance.cpp | 849 + .../DirectXTK/Audio/WAVFileReader.cpp | 700 + Sdk/External/DirectXTK/Audio/WAVFileReader.h | 58 + Sdk/External/DirectXTK/Audio/WaveBank.cpp | 615 + .../DirectXTK/Audio/WaveBankReader.cpp | 1386 ++ Sdk/External/DirectXTK/Audio/WaveBankReader.h | 81 + Sdk/External/DirectXTK/Audio/packages.config | 4 + Sdk/External/DirectXTK/CMakeLists.txt | 234 + Sdk/External/DirectXTK/CMakeSettings.json | 64 + .../DirectXTK/DirectXTK_Desktop_2017.sln | 84 + .../DirectXTK/DirectXTK_Desktop_2017.vcxproj | 342 + .../DirectXTK_Desktop_2017.vcxproj.filters | 315 + .../DirectXTK_Desktop_2017_Win10.sln | 84 + .../DirectXTK_Desktop_2017_Win10.vcxproj | 434 + ...rectXTK_Desktop_2017_Win10.vcxproj.filters | 357 + .../DirectXTK/DirectXTK_Desktop_2017_Win7.sln | 73 + .../DirectXTK/DirectXTK_Desktop_2019.sln | 84 + .../DirectXTK/DirectXTK_Desktop_2019.vcxproj | 346 + .../DirectXTK_Desktop_2019.vcxproj.filters | 315 + .../DirectXTK_Desktop_2019_Win10.sln | 84 + .../DirectXTK_Desktop_2019_Win10.vcxproj | 440 + ...rectXTK_Desktop_2019_Win10.vcxproj.filters | 357 + .../DirectXTK/DirectXTK_Desktop_2019_Win7.sln | 73 + .../DirectXTK/DirectXTK_Windows10_2017.sln | 48 + .../DirectXTK_Windows10_2017.vcxproj | 527 + .../DirectXTK_Windows10_2017.vcxproj.filters | 357 + .../DirectXTK/DirectXTK_Windows10_2019.sln | 48 + .../DirectXTK_Windows10_2019.vcxproj | 535 + .../DirectXTK_Windows10_2019.vcxproj.filters | 357 + .../DirectXTK/DirectXTK_XboxOneXDK_2017.sln | 33 + .../DirectXTK_XboxOneXDK_2017.vcxproj | 351 + .../DirectXTK_XboxOneXDK_2017.vcxproj.filters | 365 + Sdk/External/DirectXTK/HISTORY.md | 456 + Sdk/External/DirectXTK/Inc/Audio.h | 754 + Sdk/External/DirectXTK/Inc/BufferHelpers.h | 160 + Sdk/External/DirectXTK/Inc/CommonStates.h | 66 + Sdk/External/DirectXTK/Inc/DDSTextureLoader.h | 157 + Sdk/External/DirectXTK/Inc/DirectXHelpers.h | 197 + Sdk/External/DirectXTK/Inc/Effects.h | 938 + Sdk/External/DirectXTK/Inc/GamePad.h | 303 + .../DirectXTK/Inc/GeometricPrimitive.h | 89 + Sdk/External/DirectXTK/Inc/GraphicsMemory.h | 52 + Sdk/External/DirectXTK/Inc/Keyboard.h | 496 + Sdk/External/DirectXTK/Inc/Model.h | 221 + Sdk/External/DirectXTK/Inc/Mouse.h | 147 + Sdk/External/DirectXTK/Inc/PostProcess.h | 209 + Sdk/External/DirectXTK/Inc/PrimitiveBatch.h | 141 + Sdk/External/DirectXTK/Inc/ScreenGrab.h | 48 + Sdk/External/DirectXTK/Inc/SimpleMath.h | 1080 + Sdk/External/DirectXTK/Inc/SimpleMath.inl | 3733 +++ Sdk/External/DirectXTK/Inc/SpriteBatch.h | 98 + Sdk/External/DirectXTK/Inc/SpriteFont.h | 89 + Sdk/External/DirectXTK/Inc/VertexTypes.h | 490 + Sdk/External/DirectXTK/Inc/WICTextureLoader.h | 168 + .../DirectXTK/Inc/XboxDDSTextureLoader.h | 66 + Sdk/External/DirectXTK/LICENSE | 21 + .../MakeSpriteFont/BitmapImporter.cs | 121 + .../DirectXTK/MakeSpriteFont/BitmapUtils.cs | 240 + .../MakeSpriteFont/CharacterRegion.cs | 133 + .../MakeSpriteFont/CommandLineOptions.cs | 95 + .../MakeSpriteFont/CommandLineParser.cs | 249 + .../DirectXTK/MakeSpriteFont/Glyph.cs | 39 + .../DirectXTK/MakeSpriteFont/GlyphCropper.cs | 50 + .../DirectXTK/MakeSpriteFont/GlyphPacker.cs | 281 + .../DirectXTK/MakeSpriteFont/IFontImporter.cs | 21 + .../MakeSpriteFont/MakeSpriteFont.csproj | 60 + .../DirectXTK/MakeSpriteFont/Program.cs | 183 + .../MakeSpriteFont/Properties/AssemblyInfo.cs | 21 + .../MakeSpriteFont/SpriteFontWriter.cs | 268 + .../MakeSpriteFont/TrueTypeImporter.cs | 248 + Sdk/External/DirectXTK/README.md | 93 + Sdk/External/DirectXTK/SECURITY.md | 41 + Sdk/External/DirectXTK/Src/AlignedNew.h | 64 + .../DirectXTK/Src/AlphaTestEffect.cpp | 444 + Sdk/External/DirectXTK/Src/BasicEffect.cpp | 731 + .../DirectXTK/Src/BasicPostProcess.cpp | 606 + Sdk/External/DirectXTK/Src/Bezier.h | 196 + Sdk/External/DirectXTK/Src/BinaryReader.cpp | 90 + Sdk/External/DirectXTK/Src/BinaryReader.h | 72 + Sdk/External/DirectXTK/Src/BufferHelpers.cpp | 393 + Sdk/External/DirectXTK/Src/CommonStates.cpp | 361 + Sdk/External/DirectXTK/Src/DDS.h | 273 + .../DirectXTK/Src/DDSTextureLoader.cpp | 1335 + Sdk/External/DirectXTK/Src/DGSLEffect.cpp | 940 + .../DirectXTK/Src/DGSLEffectFactory.cpp | 609 + Sdk/External/DirectXTK/Src/DebugEffect.cpp | 350 + Sdk/External/DirectXTK/Src/DemandCreate.h | 48 + Sdk/External/DirectXTK/Src/DirectXHelpers.cpp | 54 + .../DirectXTK/Src/DualPostProcess.cpp | 346 + .../DirectXTK/Src/DualTextureEffect.cpp | 334 + Sdk/External/DirectXTK/Src/EffectCommon.cpp | 458 + Sdk/External/DirectXTK/Src/EffectCommon.h | 295 + Sdk/External/DirectXTK/Src/EffectFactory.cpp | 539 + .../DirectXTK/Src/EnvironmentMapEffect.cpp | 727 + Sdk/External/DirectXTK/Src/GamePad.cpp | 1753 ++ .../DirectXTK/Src/GeometricPrimitive.cpp | 773 + Sdk/External/DirectXTK/Src/Geometry.cpp | 1195 + Sdk/External/DirectXTK/Src/Geometry.h | 29 + Sdk/External/DirectXTK/Src/GraphicsMemory.cpp | 327 + Sdk/External/DirectXTK/Src/Keyboard.cpp | 642 + Sdk/External/DirectXTK/Src/LoaderHelpers.h | 1020 + Sdk/External/DirectXTK/Src/Model.cpp | 327 + Sdk/External/DirectXTK/Src/ModelLoadCMO.cpp | 914 + .../DirectXTK/Src/ModelLoadSDKMESH.cpp | 737 + Sdk/External/DirectXTK/Src/ModelLoadVBO.cpp | 202 + Sdk/External/DirectXTK/Src/Mouse.cpp | 1502 ++ .../DirectXTK/Src/NormalMapEffect.cpp | 507 + Sdk/External/DirectXTK/Src/PBREffect.cpp | 532 + .../DirectXTK/Src/PBREffectFactory.cpp | 301 + Sdk/External/DirectXTK/Src/PlatformHelpers.h | 86 + Sdk/External/DirectXTK/Src/PrimitiveBatch.cpp | 465 + Sdk/External/DirectXTK/Src/SDKMesh.h | 338 + Sdk/External/DirectXTK/Src/ScreenGrab.cpp | 678 + .../DirectXTK/Src/Shaders/AlphaTestEffect.fx | 129 + .../DirectXTK/Src/Shaders/BasicEffect.fx | 607 + Sdk/External/DirectXTK/Src/Shaders/Common.fxh | 58 + .../DirectXTK/Src/Shaders/CompileShaders.cmd | 310 + .../DirectXTK/Src/Shaders/DGSLEffect.fx | 290 + .../DirectXTK/Src/Shaders/DGSLLambert.hlsl | 174 + .../DirectXTK/Src/Shaders/DGSLPhong.hlsl | 207 + .../DirectXTK/Src/Shaders/DGSLUnlit.hlsl | 149 + .../DirectXTK/Src/Shaders/DebugEffect.fx | 134 + .../Src/Shaders/DualTextureEffect.fx | 115 + .../Src/Shaders/EnvironmentMapEffect.fx | 447 + .../DirectXTK/Src/Shaders/Lighting.fxh | 96 + .../DirectXTK/Src/Shaders/NormalMapEffect.fx | 193 + .../DirectXTK/Src/Shaders/PBRCommon.fxh | 170 + .../DirectXTK/Src/Shaders/PBREffect.fx | 272 + .../Src/Shaders/PixelPacking_Velocity.hlsli | 95 + .../DirectXTK/Src/Shaders/PostProcess.fx | 178 + .../DirectXTK/Src/Shaders/SkinnedEffect.fx | 389 + .../DirectXTK/Src/Shaders/SpriteEffect.fx | 30 + .../DirectXTK/Src/Shaders/Structures.fxh | 227 + Sdk/External/DirectXTK/Src/Shaders/ToneMap.fx | 228 + .../DirectXTK/Src/Shaders/Utilities.fxh | 113 + .../DirectXTK/Src/SharedResourcePool.h | 109 + Sdk/External/DirectXTK/Src/SimpleMath.cpp | 156 + Sdk/External/DirectXTK/Src/SkinnedEffect.cpp | 648 + Sdk/External/DirectXTK/Src/SpriteBatch.cpp | 1200 + Sdk/External/DirectXTK/Src/SpriteFont.cpp | 654 + Sdk/External/DirectXTK/Src/TeapotData.inc | 182 + .../DirectXTK/Src/ToneMapPostProcess.cpp | 435 + Sdk/External/DirectXTK/Src/VertexTypes.cpp | 173 + .../DirectXTK/Src/WICTextureLoader.cpp | 1218 + .../DirectXTK/Src/XboxDDSTextureLoader.cpp | 788 + Sdk/External/DirectXTK/Src/pch.cpp | 10 + Sdk/External/DirectXTK/Src/pch.h | 152 + Sdk/External/DirectXTK/Src/vbo.h | 36 + Sdk/External/DirectXTK/XWBTool/directx.ico | Bin 0 -> 25214 bytes Sdk/External/DirectXTK/XWBTool/xwbtool.cpp | 1805 ++ Sdk/External/DirectXTK/XWBTool/xwbtool.rc | 115 + .../XWBTool/xwbtool_Desktop_2017.vcxproj | 238 + .../xwbtool_Desktop_2017.vcxproj.filters | 13 + .../XWBTool/xwbtool_Desktop_2019.vcxproj | 244 + .../xwbtool_Desktop_2019.vcxproj.filters | 10 + Sdk/External/HopscotchMap/bhopscotch_map.h | 734 + Sdk/External/HopscotchMap/bhopscotch_set.h | 586 + .../HopscotchMap/hopscotch_growth_policy.h | 404 + Sdk/External/HopscotchMap/hopscotch_hash.h | 1883 ++ Sdk/External/HopscotchMap/hopscotch_map.h | 735 + Sdk/External/HopscotchMap/hopscotch_set.h | 592 + Sdk/External/beehive/LICENSE | 17 + Sdk/External/beehive/beehive.hpp | 726 + Sdk/External/entt/LICENSE | 21 + Sdk/External/entt/entt.hpp | 20185 ++++++++++++++++ .../AI/Public/BehaviorProcessorSystem.h | 19 + .../AI/Public/Behaviors/BehaviorBase.h | 45 + .../AI/Public/Behaviors/CombatBehavior.h | 18 + .../AI/Public/Behaviors/DodgeBehavior.h | 18 + .../Public/Behaviors/GetEquipmentBehavior.h | 24 + Sdk/Include/AI/Public/Behaviors/IBehavior.h | 99 + .../AI/Public/Behaviors/PatrolBehavior.h | 19 + Sdk/Include/AI/Public/Command.h | 31 + .../AI/Public/Components/BehaviorProcessor.h | 11 + .../Public/Components/CombatBehaviorState.h | 9 + .../AI/Public/Components/DodgeBehavior.cpp | 179 + .../AI/Public/Components/DodgeBehaviorState.h | 16 + .../Components/GetEquipmentBehaviorState.h | 10 + Sdk/Include/AI/Public/Components/Loadout.h | 26 + .../AI/Public/Components/MoveEnactor.h | 41 + .../AI/Public/Components/MoveEnactorDebug.h | 51 + .../Public/Components/PatrolBehaviorState.h | 9 + .../AI/Public/Components/PhysicsView.h | 16 + Sdk/Include/AI/Public/Components/Senses.h | 24 + Sdk/Include/AI/Public/Core.h | 27 + Sdk/Include/AI/Public/Goal.h | 53 + Sdk/Include/AI/Public/InputSystem.h | 15 + Sdk/Include/AI/Public/InputUtil.h | 14 + Sdk/Include/AI/Public/JetpackUtil.h | 12 + Sdk/Include/AI/Public/MoveEnactorSystem.h | 15 + Sdk/Include/AI/Public/MoveUtil.h | 21 + Sdk/Include/AI/Public/SensesSystem.h | 16 + Sdk/Include/ComponentBase.h | 119 - Sdk/Include/Core/Public/BBox.h | 8 + .../{ => Core/Public}/ConfigConstants.h | 0 Sdk/Include/{ => Core/Public}/ConfigEvents.h | 0 Sdk/Include/Core/Public/Core.h | 19 + Sdk/Include/Core/Public/DataTypes.h | 284 + Sdk/Include/Core/Public/GLUtil.h | 22 + Sdk/Include/Core/Public/IConfig.h | 36 + Sdk/Include/Core/Public/IEventSource.h | 13 + Sdk/Include/Core/Public/IExceptionHandler.h | 14 + Sdk/Include/Core/Public/IGameService.h | 24 + .../Core/Public/IGameServiceProvider.h | 63 + .../{ => Core/Public}/ITextLookupService.h | 10 +- Sdk/Include/Core/Public/MinMax.h | 11 + Sdk/Include/Core/Public/Plane.h | 6 + Sdk/Include/Core/Public/Quaternion.h | 6 + Sdk/Include/{ => Core/Public}/Version.h | 0 Sdk/Include/Core/Public/VersionUtil.h | 8 + Sdk/Include/DataTypes.h | 165 - Sdk/Include/ECS/Public/Component.h | 57 + Sdk/Include/ECS/Public/Core.h | 10 + Sdk/Include/ECS/Public/Entity.h | 77 + Sdk/Include/ECS/Public/EntityRegistry.h | 8 + Sdk/Include/ECS/Public/ISystem.h | 15 + Sdk/Include/ECS/Public/SystemManager.h | 23 + Sdk/Include/ECS/Public/SystemPriority.h | 13 + .../GameObject/Public/Components/Inventory.h | 57 + .../GameObject/Public/Components/Jetpack.h | 27 + .../Public/Components/JetpackDebug.h | 26 + .../GameObject/Public/Components/ObjectDead.h | 9 + .../GameObject/Public/Components/ObjectRef.h | 17 + Sdk/Include/GameObject/Public/Core.h | 11 + Sdk/Include/GameObject/Public/InventoryUtil.h | 13 + Sdk/Include/GameObject/Public/ObjectUtil.h | 11 + Sdk/Include/IComponent.h | 15 - Sdk/Include/IComponentContainer.h | 61 - Sdk/Include/IConfig.h | 44 - Sdk/Include/IEventSource.h | 10 - Sdk/Include/IExceptionHandler.h | 18 - Sdk/Include/IGameServerConsole.h | 23 - Sdk/Include/ImGui/Public/IImGuiLayer.h | 16 + Sdk/Include/ImGui/Public/IImGuiService.h | 24 + Sdk/Include/ImGui/Public/ImGuiInit.h | 3 + Sdk/Include/MasterServer/IGiantsApiClient.h | 24 - Sdk/Include/Navigation/Public/Core.h | 7 + Sdk/Include/Navigation/Public/NavMesh.h | 17 + Sdk/Include/Navigation/Public/Path.h | 96 + Sdk/Include/Navigation/Public/PathDebugDraw.h | 55 + Sdk/Include/Navigation/Public/PathUtil.h | 17 + .../{ => Network/Public}/GameServerEvents.h | 3 +- .../{ => Network/Public}/IGameServer.h | 23 +- .../Network/Public/IGameServerConsole.h | 16 + Sdk/Include/Network/Public/IGiantsApiClient.h | 22 + Sdk/Include/{ => Network/Public}/NetCommon.h | 8 + .../Public}/PlayerInfoResponse.h | 0 .../Public}/ServerInfoResponse.h | 2 +- ServerConsoleExample/ServerConsoleApp.cpp | 14 +- ServerConsoleExample/ServerConsoleApp.h | 5 +- ServerConsoleExample/ServerDialog.cpp | 20 +- ServerConsoleExample/ServerDialog.h | 18 +- 316 files changed, 136727 insertions(+), 878 deletions(-) delete mode 100644 GPatch/CopyBinaries.bat delete mode 100644 GPatch/DotNetChecker.nsh delete mode 100644 GPatch/GPatch.ico delete mode 100644 GPatch/GPatch.nsi delete mode 100644 GPatch/Launcher/GPatch.ico delete mode 100644 GPatch/Launcher/Launcher.nsi create mode 100644 Sdk/External/DirectXMath/.nuget/directxmath.nuspec create mode 100644 Sdk/External/DirectXMath/.nuget/directxmath.targets create mode 100644 Sdk/External/DirectXMath/.nuget/icon.jpg create mode 100644 Sdk/External/DirectXMath/.nuget/signconfig.xml create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathBE.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h create mode 100644 Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h create mode 100644 Sdk/External/DirectXMath/HISTORY.md create mode 100644 Sdk/External/DirectXMath/Inc/DirectXCollision.h create mode 100644 Sdk/External/DirectXMath/Inc/DirectXCollision.inl create mode 100644 Sdk/External/DirectXMath/Inc/DirectXColors.h create mode 100644 Sdk/External/DirectXMath/Inc/DirectXMath.h create mode 100644 Sdk/External/DirectXMath/Inc/DirectXMathConvert.inl create mode 100644 Sdk/External/DirectXMath/Inc/DirectXMathMatrix.inl create mode 100644 Sdk/External/DirectXMath/Inc/DirectXMathMisc.inl create mode 100644 Sdk/External/DirectXMath/Inc/DirectXMathVector.inl create mode 100644 Sdk/External/DirectXMath/Inc/DirectXPackedVector.h create mode 100644 Sdk/External/DirectXMath/Inc/DirectXPackedVector.inl create mode 100644 Sdk/External/DirectXMath/LICENSE create mode 100644 Sdk/External/DirectXMath/README.md create mode 100644 Sdk/External/DirectXMath/SECURITY.md create mode 100644 Sdk/External/DirectXMath/SHMath/DirectXSH.cpp create mode 100644 Sdk/External/DirectXMath/SHMath/DirectXSH.h create mode 100644 Sdk/External/DirectXMath/SHMath/DirectXSHD3D11.cpp create mode 100644 Sdk/External/DirectXMath/SHMath/DirectXSHD3D12.cpp create mode 100644 Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.cpp create mode 100644 Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.h create mode 100644 Sdk/External/DirectXMath/XDSP/XDSP.h create mode 100644 Sdk/External/DirectXTK/.editorconfig create mode 100644 Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.nuspec create mode 100644 Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.targets create mode 100644 Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.nuspec create mode 100644 Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.targets create mode 100644 Sdk/External/DirectXTK/.nuget/directxtk_uwp.nuspec create mode 100644 Sdk/External/DirectXTK/.nuget/directxtk_uwp.targets create mode 100644 Sdk/External/DirectXTK/.nuget/icon.jpg create mode 100644 Sdk/External/DirectXTK/.nuget/signconfig_desktop.xml create mode 100644 Sdk/External/DirectXTK/.nuget/signconfig_uwp.xml create mode 100644 Sdk/External/DirectXTK/.nuget/versioninfo.ps1 create mode 100644 Sdk/External/DirectXTK/Audio/AudioEngine.cpp create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj create mode 100644 Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/Audio/DynamicSoundEffectInstance.cpp create mode 100644 Sdk/External/DirectXTK/Audio/SoundCommon.cpp create mode 100644 Sdk/External/DirectXTK/Audio/SoundCommon.h create mode 100644 Sdk/External/DirectXTK/Audio/SoundEffect.cpp create mode 100644 Sdk/External/DirectXTK/Audio/SoundEffectInstance.cpp create mode 100644 Sdk/External/DirectXTK/Audio/SoundStreamInstance.cpp create mode 100644 Sdk/External/DirectXTK/Audio/WAVFileReader.cpp create mode 100644 Sdk/External/DirectXTK/Audio/WAVFileReader.h create mode 100644 Sdk/External/DirectXTK/Audio/WaveBank.cpp create mode 100644 Sdk/External/DirectXTK/Audio/WaveBankReader.cpp create mode 100644 Sdk/External/DirectXTK/Audio/WaveBankReader.h create mode 100644 Sdk/External/DirectXTK/Audio/packages.config create mode 100644 Sdk/External/DirectXTK/CMakeLists.txt create mode 100644 Sdk/External/DirectXTK/CMakeSettings.json create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win7.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win7.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Windows10_2017.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/DirectXTK_Windows10_2019.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.sln create mode 100644 Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj create mode 100644 Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/HISTORY.md create mode 100644 Sdk/External/DirectXTK/Inc/Audio.h create mode 100644 Sdk/External/DirectXTK/Inc/BufferHelpers.h create mode 100644 Sdk/External/DirectXTK/Inc/CommonStates.h create mode 100644 Sdk/External/DirectXTK/Inc/DDSTextureLoader.h create mode 100644 Sdk/External/DirectXTK/Inc/DirectXHelpers.h create mode 100644 Sdk/External/DirectXTK/Inc/Effects.h create mode 100644 Sdk/External/DirectXTK/Inc/GamePad.h create mode 100644 Sdk/External/DirectXTK/Inc/GeometricPrimitive.h create mode 100644 Sdk/External/DirectXTK/Inc/GraphicsMemory.h create mode 100644 Sdk/External/DirectXTK/Inc/Keyboard.h create mode 100644 Sdk/External/DirectXTK/Inc/Model.h create mode 100644 Sdk/External/DirectXTK/Inc/Mouse.h create mode 100644 Sdk/External/DirectXTK/Inc/PostProcess.h create mode 100644 Sdk/External/DirectXTK/Inc/PrimitiveBatch.h create mode 100644 Sdk/External/DirectXTK/Inc/ScreenGrab.h create mode 100644 Sdk/External/DirectXTK/Inc/SimpleMath.h create mode 100644 Sdk/External/DirectXTK/Inc/SimpleMath.inl create mode 100644 Sdk/External/DirectXTK/Inc/SpriteBatch.h create mode 100644 Sdk/External/DirectXTK/Inc/SpriteFont.h create mode 100644 Sdk/External/DirectXTK/Inc/VertexTypes.h create mode 100644 Sdk/External/DirectXTK/Inc/WICTextureLoader.h create mode 100644 Sdk/External/DirectXTK/Inc/XboxDDSTextureLoader.h create mode 100644 Sdk/External/DirectXTK/LICENSE create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/BitmapImporter.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/BitmapUtils.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/CharacterRegion.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/CommandLineOptions.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/CommandLineParser.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/Glyph.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/GlyphCropper.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/GlyphPacker.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/IFontImporter.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/MakeSpriteFont.csproj create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/Program.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/Properties/AssemblyInfo.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/SpriteFontWriter.cs create mode 100644 Sdk/External/DirectXTK/MakeSpriteFont/TrueTypeImporter.cs create mode 100644 Sdk/External/DirectXTK/README.md create mode 100644 Sdk/External/DirectXTK/SECURITY.md create mode 100644 Sdk/External/DirectXTK/Src/AlignedNew.h create mode 100644 Sdk/External/DirectXTK/Src/AlphaTestEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/BasicEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/BasicPostProcess.cpp create mode 100644 Sdk/External/DirectXTK/Src/Bezier.h create mode 100644 Sdk/External/DirectXTK/Src/BinaryReader.cpp create mode 100644 Sdk/External/DirectXTK/Src/BinaryReader.h create mode 100644 Sdk/External/DirectXTK/Src/BufferHelpers.cpp create mode 100644 Sdk/External/DirectXTK/Src/CommonStates.cpp create mode 100644 Sdk/External/DirectXTK/Src/DDS.h create mode 100644 Sdk/External/DirectXTK/Src/DDSTextureLoader.cpp create mode 100644 Sdk/External/DirectXTK/Src/DGSLEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/DGSLEffectFactory.cpp create mode 100644 Sdk/External/DirectXTK/Src/DebugEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/DemandCreate.h create mode 100644 Sdk/External/DirectXTK/Src/DirectXHelpers.cpp create mode 100644 Sdk/External/DirectXTK/Src/DualPostProcess.cpp create mode 100644 Sdk/External/DirectXTK/Src/DualTextureEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/EffectCommon.cpp create mode 100644 Sdk/External/DirectXTK/Src/EffectCommon.h create mode 100644 Sdk/External/DirectXTK/Src/EffectFactory.cpp create mode 100644 Sdk/External/DirectXTK/Src/EnvironmentMapEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/GamePad.cpp create mode 100644 Sdk/External/DirectXTK/Src/GeometricPrimitive.cpp create mode 100644 Sdk/External/DirectXTK/Src/Geometry.cpp create mode 100644 Sdk/External/DirectXTK/Src/Geometry.h create mode 100644 Sdk/External/DirectXTK/Src/GraphicsMemory.cpp create mode 100644 Sdk/External/DirectXTK/Src/Keyboard.cpp create mode 100644 Sdk/External/DirectXTK/Src/LoaderHelpers.h create mode 100644 Sdk/External/DirectXTK/Src/Model.cpp create mode 100644 Sdk/External/DirectXTK/Src/ModelLoadCMO.cpp create mode 100644 Sdk/External/DirectXTK/Src/ModelLoadSDKMESH.cpp create mode 100644 Sdk/External/DirectXTK/Src/ModelLoadVBO.cpp create mode 100644 Sdk/External/DirectXTK/Src/Mouse.cpp create mode 100644 Sdk/External/DirectXTK/Src/NormalMapEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/PBREffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/PBREffectFactory.cpp create mode 100644 Sdk/External/DirectXTK/Src/PlatformHelpers.h create mode 100644 Sdk/External/DirectXTK/Src/PrimitiveBatch.cpp create mode 100644 Sdk/External/DirectXTK/Src/SDKMesh.h create mode 100644 Sdk/External/DirectXTK/Src/ScreenGrab.cpp create mode 100644 Sdk/External/DirectXTK/Src/Shaders/AlphaTestEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/BasicEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/Common.fxh create mode 100644 Sdk/External/DirectXTK/Src/Shaders/CompileShaders.cmd create mode 100644 Sdk/External/DirectXTK/Src/Shaders/DGSLEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/DGSLLambert.hlsl create mode 100644 Sdk/External/DirectXTK/Src/Shaders/DGSLPhong.hlsl create mode 100644 Sdk/External/DirectXTK/Src/Shaders/DGSLUnlit.hlsl create mode 100644 Sdk/External/DirectXTK/Src/Shaders/DebugEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/DualTextureEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/EnvironmentMapEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/Lighting.fxh create mode 100644 Sdk/External/DirectXTK/Src/Shaders/NormalMapEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/PBRCommon.fxh create mode 100644 Sdk/External/DirectXTK/Src/Shaders/PBREffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/PixelPacking_Velocity.hlsli create mode 100644 Sdk/External/DirectXTK/Src/Shaders/PostProcess.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/SkinnedEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/SpriteEffect.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/Structures.fxh create mode 100644 Sdk/External/DirectXTK/Src/Shaders/ToneMap.fx create mode 100644 Sdk/External/DirectXTK/Src/Shaders/Utilities.fxh create mode 100644 Sdk/External/DirectXTK/Src/SharedResourcePool.h create mode 100644 Sdk/External/DirectXTK/Src/SimpleMath.cpp create mode 100644 Sdk/External/DirectXTK/Src/SkinnedEffect.cpp create mode 100644 Sdk/External/DirectXTK/Src/SpriteBatch.cpp create mode 100644 Sdk/External/DirectXTK/Src/SpriteFont.cpp create mode 100644 Sdk/External/DirectXTK/Src/TeapotData.inc create mode 100644 Sdk/External/DirectXTK/Src/ToneMapPostProcess.cpp create mode 100644 Sdk/External/DirectXTK/Src/VertexTypes.cpp create mode 100644 Sdk/External/DirectXTK/Src/WICTextureLoader.cpp create mode 100644 Sdk/External/DirectXTK/Src/XboxDDSTextureLoader.cpp create mode 100644 Sdk/External/DirectXTK/Src/pch.cpp create mode 100644 Sdk/External/DirectXTK/Src/pch.h create mode 100644 Sdk/External/DirectXTK/Src/vbo.h create mode 100644 Sdk/External/DirectXTK/XWBTool/directx.ico create mode 100644 Sdk/External/DirectXTK/XWBTool/xwbtool.cpp create mode 100644 Sdk/External/DirectXTK/XWBTool/xwbtool.rc create mode 100644 Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj create mode 100644 Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj.filters create mode 100644 Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj create mode 100644 Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj.filters create mode 100644 Sdk/External/HopscotchMap/bhopscotch_map.h create mode 100644 Sdk/External/HopscotchMap/bhopscotch_set.h create mode 100644 Sdk/External/HopscotchMap/hopscotch_growth_policy.h create mode 100644 Sdk/External/HopscotchMap/hopscotch_hash.h create mode 100644 Sdk/External/HopscotchMap/hopscotch_map.h create mode 100644 Sdk/External/HopscotchMap/hopscotch_set.h create mode 100644 Sdk/External/beehive/LICENSE create mode 100644 Sdk/External/beehive/beehive.hpp create mode 100644 Sdk/External/entt/LICENSE create mode 100644 Sdk/External/entt/entt.hpp create mode 100644 Sdk/Include/AI/Public/BehaviorProcessorSystem.h create mode 100644 Sdk/Include/AI/Public/Behaviors/BehaviorBase.h create mode 100644 Sdk/Include/AI/Public/Behaviors/CombatBehavior.h create mode 100644 Sdk/Include/AI/Public/Behaviors/DodgeBehavior.h create mode 100644 Sdk/Include/AI/Public/Behaviors/GetEquipmentBehavior.h create mode 100644 Sdk/Include/AI/Public/Behaviors/IBehavior.h create mode 100644 Sdk/Include/AI/Public/Behaviors/PatrolBehavior.h create mode 100644 Sdk/Include/AI/Public/Command.h create mode 100644 Sdk/Include/AI/Public/Components/BehaviorProcessor.h create mode 100644 Sdk/Include/AI/Public/Components/CombatBehaviorState.h create mode 100644 Sdk/Include/AI/Public/Components/DodgeBehavior.cpp create mode 100644 Sdk/Include/AI/Public/Components/DodgeBehaviorState.h create mode 100644 Sdk/Include/AI/Public/Components/GetEquipmentBehaviorState.h create mode 100644 Sdk/Include/AI/Public/Components/Loadout.h create mode 100644 Sdk/Include/AI/Public/Components/MoveEnactor.h create mode 100644 Sdk/Include/AI/Public/Components/MoveEnactorDebug.h create mode 100644 Sdk/Include/AI/Public/Components/PatrolBehaviorState.h create mode 100644 Sdk/Include/AI/Public/Components/PhysicsView.h create mode 100644 Sdk/Include/AI/Public/Components/Senses.h create mode 100644 Sdk/Include/AI/Public/Core.h create mode 100644 Sdk/Include/AI/Public/Goal.h create mode 100644 Sdk/Include/AI/Public/InputSystem.h create mode 100644 Sdk/Include/AI/Public/InputUtil.h create mode 100644 Sdk/Include/AI/Public/JetpackUtil.h create mode 100644 Sdk/Include/AI/Public/MoveEnactorSystem.h create mode 100644 Sdk/Include/AI/Public/MoveUtil.h create mode 100644 Sdk/Include/AI/Public/SensesSystem.h delete mode 100644 Sdk/Include/ComponentBase.h create mode 100644 Sdk/Include/Core/Public/BBox.h rename Sdk/Include/{ => Core/Public}/ConfigConstants.h (100%) rename Sdk/Include/{ => Core/Public}/ConfigEvents.h (100%) create mode 100644 Sdk/Include/Core/Public/Core.h create mode 100644 Sdk/Include/Core/Public/DataTypes.h create mode 100644 Sdk/Include/Core/Public/GLUtil.h create mode 100644 Sdk/Include/Core/Public/IConfig.h create mode 100644 Sdk/Include/Core/Public/IEventSource.h create mode 100644 Sdk/Include/Core/Public/IExceptionHandler.h create mode 100644 Sdk/Include/Core/Public/IGameService.h create mode 100644 Sdk/Include/Core/Public/IGameServiceProvider.h rename Sdk/Include/{ => Core/Public}/ITextLookupService.h (79%) create mode 100644 Sdk/Include/Core/Public/MinMax.h create mode 100644 Sdk/Include/Core/Public/Plane.h create mode 100644 Sdk/Include/Core/Public/Quaternion.h rename Sdk/Include/{ => Core/Public}/Version.h (100%) create mode 100644 Sdk/Include/Core/Public/VersionUtil.h delete mode 100644 Sdk/Include/DataTypes.h create mode 100644 Sdk/Include/ECS/Public/Component.h create mode 100644 Sdk/Include/ECS/Public/Core.h create mode 100644 Sdk/Include/ECS/Public/Entity.h create mode 100644 Sdk/Include/ECS/Public/EntityRegistry.h create mode 100644 Sdk/Include/ECS/Public/ISystem.h create mode 100644 Sdk/Include/ECS/Public/SystemManager.h create mode 100644 Sdk/Include/ECS/Public/SystemPriority.h create mode 100644 Sdk/Include/GameObject/Public/Components/Inventory.h create mode 100644 Sdk/Include/GameObject/Public/Components/Jetpack.h create mode 100644 Sdk/Include/GameObject/Public/Components/JetpackDebug.h create mode 100644 Sdk/Include/GameObject/Public/Components/ObjectDead.h create mode 100644 Sdk/Include/GameObject/Public/Components/ObjectRef.h create mode 100644 Sdk/Include/GameObject/Public/Core.h create mode 100644 Sdk/Include/GameObject/Public/InventoryUtil.h create mode 100644 Sdk/Include/GameObject/Public/ObjectUtil.h delete mode 100644 Sdk/Include/IComponent.h delete mode 100644 Sdk/Include/IComponentContainer.h delete mode 100644 Sdk/Include/IConfig.h delete mode 100644 Sdk/Include/IEventSource.h delete mode 100644 Sdk/Include/IExceptionHandler.h delete mode 100644 Sdk/Include/IGameServerConsole.h create mode 100644 Sdk/Include/ImGui/Public/IImGuiLayer.h create mode 100644 Sdk/Include/ImGui/Public/IImGuiService.h create mode 100644 Sdk/Include/ImGui/Public/ImGuiInit.h delete mode 100644 Sdk/Include/MasterServer/IGiantsApiClient.h create mode 100644 Sdk/Include/Navigation/Public/Core.h create mode 100644 Sdk/Include/Navigation/Public/NavMesh.h create mode 100644 Sdk/Include/Navigation/Public/Path.h create mode 100644 Sdk/Include/Navigation/Public/PathDebugDraw.h create mode 100644 Sdk/Include/Navigation/Public/PathUtil.h rename Sdk/Include/{ => Network/Public}/GameServerEvents.h (96%) rename Sdk/Include/{ => Network/Public}/IGameServer.h (68%) create mode 100644 Sdk/Include/Network/Public/IGameServerConsole.h create mode 100644 Sdk/Include/Network/Public/IGiantsApiClient.h rename Sdk/Include/{ => Network/Public}/NetCommon.h (96%) rename Sdk/Include/{MasterServer => Network/Public}/PlayerInfoResponse.h (100%) rename Sdk/Include/{MasterServer => Network/Public}/ServerInfoResponse.h (97%) diff --git a/GPatch/CopyBinaries.bat b/GPatch/CopyBinaries.bat deleted file mode 100644 index 9b290f9..0000000 --- a/GPatch/CopyBinaries.bat +++ /dev/null @@ -1,19 +0,0 @@ -xcopy "%GIANTS_PATH%\gg_dx7r.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\gg_dx9r.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\gg_null.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\dedicated.exe" "Files\" /Y -xcopy "%GIANTS_PATH%\Giants.exe" "Files\" /Y -xcopy "%GIANTS_PATH%\GiantsMain.exe" "Files\" /Y -xcopy "%GIANTS_PATH%\GiantsDedicated.exe" "Files\" /Y -xcopy "%GIANTS_PATH%\gs_ds.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\Giants.WebApi.Clients.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\fmt.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\crashrpt_lang.ini" "Files\" /Y -xcopy "%GIANTS_PATH%\CrashRpt1403.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\CrashSender1403.exe" "Files\" /Y -xcopy "%GIANTS_PATH%\dbghelp.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\cpprest_2_10.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\Newtonsoft.Json.dll" "Files\" /Y -xcopy "%GIANTS_PATH%\zlib1.dll" "Files\" /Y - -pause \ No newline at end of file diff --git a/GPatch/DotNetChecker.nsh b/GPatch/DotNetChecker.nsh deleted file mode 100644 index abc84e5..0000000 --- a/GPatch/DotNetChecker.nsh +++ /dev/null @@ -1,127 +0,0 @@ -!macro CheckNetFramework FrameworkVersion - Var /GLOBAL dotNetUrl${FrameworkVersion} - Var /GLOBAL dotNetReadableVersion${FrameworkVersion} - - !ifndef DOTNET472_URL - !define DOTNET472_URL "https://go.microsoft.com/fwlink/?LinkId=863265" - !define DOTNET471_URL "https://go.microsoft.com/fwlink/?LinkId=852104" - !define DOTNET47_URL "https://go.microsoft.com/fwlink/?LinkId=825302" - !define DOTNET462_URL "https://go.microsoft.com/fwlink/?LinkId=780600" - !define DOTNET461_URL "https://go.microsoft.com/fwlink/?LinkId=671743" - !define DOTNET46_URL "https://go.microsoft.com/fwlink/?LinkId=528232" - !define DOTNET452_URL "https://go.microsoft.com/fwlink/?LinkId=397708" - !define DOTNET451_URL "https://go.microsoft.com/fwlink/?LinkId=322116" - !define DOTNET45_URL "https://go.microsoft.com/fwlink/?LinkId=225702" - !define DOTNET40Full_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=0a391abd-25c1-4fc0-919f-b21f31ab88b7&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2f9%2f5%2fA%2f95A9616B-7A37-4AF6-BC36-D6EA96C8DAAE%2fdotNetFx40_Full_x86_x64.exe" - !define DOTNET40Client_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=e5ad0459-cbcc-4b4f-97b6-fb17111cf544&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2f5%2f6%2f2%2f562A10F9-C9F4-4313-A044-9C94E0A8FAC8%2fdotNetFx40_Client_x86_x64.exe" - !define DOTNET35_URL "https://download.microsoft.com/download/2/0/e/20e90413-712f-438c-988e-fdaa79a8ac3d/dotnetfx35.exe" - !define DOTNET30_URL "https://download.microsoft.com/download/2/0/e/20e90413-712f-438c-988e-fdaa79a8ac3d/dotnetfx35.exe" - !define DOTNET20_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=0856eacb-4362-4b0d-8edd-aab15c5e04f5&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2f5%2f6%2f7%2f567758a3-759e-473e-bf8f-52154438565a%2fdotnetfx.exe" - !define DOTNET11_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=262d25e3-f589-4842-8157-034d1e7cf3a3&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2fa%2fa%2fc%2faac39226-8825-44ce-90e3-bf8203e74006%2fdotnetfx.exe" - !define DOTNET10_URL "https://www.microsoft.com/downloads/info.aspx?na=41&srcfamilyid=262d25e3-f589-4842-8157-034d1e7cf3a3&srcdisplaylang=en&u=http%3a%2f%2fdownload.microsoft.com%2fdownload%2fa%2fa%2fc%2faac39226-8825-44ce-90e3-bf8203e74006%2fdotnetfx.exe" - !endif - - ${If} ${FrameworkVersion} == "472" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET472_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.7.2" - ${ElseIf} ${FrameworkVersion} == "471" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET471_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.7.1" - ${ElseIf} ${FrameworkVersion} == "47" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET47_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.7" - ${ElseIf} ${FrameworkVersion} == "462" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET462_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.6.2" - ${ElseIf} ${FrameworkVersion} == "461" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET461_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.6.1" - ${ElseIf} ${FrameworkVersion} == "46" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET46_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.6" - ${ElseIf} ${FrameworkVersion} == "452" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET452_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.52" - ${ElseIf} ${FrameworkVersion} == "451" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET451_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.51" - ${ElseIf} ${FrameworkVersion} == "45" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET45_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.5" - ${ElseIf} ${FrameworkVersion} == "40Full" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET40Full_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.0 Full" - ${ElseIf} ${FrameworkVersion} == "40Client" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET40Client_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "4.0 Client" - ${ElseIf} ${FrameworkVersion} == "35" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET35_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "3.5" - ${ElseIf} ${FrameworkVersion} == "30" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET30_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "3.0" - ${ElseIf} ${FrameworkVersion} == "20" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET20_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "2.0" - ${ElseIf} ${FrameworkVersion} == "11" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET11_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "1.1" - ${ElseIf} ${FrameworkVersion} == "10" - StrCpy $dotNetUrl${FrameworkVersion} ${DOTNET10_URL} - StrCpy $dotNetReadableVersion${FrameworkVersion} "1.0" - ${EndIf} - - DetailPrint "Checking .NET Framework version..." - - Push $0 - Push $1 - Push $2 - Push $3 - Push $4 - Push $5 - Push $6 - Push $7 - - DotNetChecker::IsDotNet${FrameworkVersion}Installed - Pop $0 - - ${If} $0 == "false" - ${OrIf} $0 == "f" ; if script is compiled in ANSI mode then we get only an "f" https://github.com/ReVolly/NsisDotNetChecker/issues/4 - DetailPrint ".NET Framework $dotNetReadableVersion${FrameworkVersion} not found, download is required for program to run." - Goto NoDotNET${FrameworkVersion} - ${Else} - DetailPrint ".NET Framework $dotNetReadableVersion${FrameworkVersion} found, no need to install." - Goto NewDotNET${FrameworkVersion} - ${EndIf} - -NoDotNET${FrameworkVersion}: - MessageBox MB_YESNOCANCEL|MB_ICONEXCLAMATION \ - ".NET Framework not installed. Required version: $dotNetReadableVersion${FrameworkVersion}.$\nInstall now?" \ - /SD IDYES IDYES InstallDotNET${FrameworkVersion} IDNO NewDotNET${FrameworkVersion} - goto GiveUpDotNET${FrameworkVersion} ;IDCANCEL - -InstallDotNET${FrameworkVersion}: - DetailPrint "Starting Microsoft .NET Framework v${NETVersion} Setup..." - ExecWait "$TEMP\${NETInstallerFileName}" - - DetailPrint "Completed .NET Framework install/update. Removing .NET Framework installer." - Delete "$TEMP\${NETInstallerFileName}" - - DetailPrint ".NET Framework installer removed." - goto NewDotNet${FrameworkVersion} - -GiveUpDotNET${FrameworkVersion}: - Abort "Installation canceled by user." - -NewDotNET${FrameworkVersion}: - DetailPrint "Proceeding with remainder of installation." - Pop $7 - Pop $6 - Pop $5 - Pop $4 - Pop $3 - Pop $2 - Pop $1 - Pop $0 - -!macroend diff --git a/GPatch/GPatch.ico b/GPatch/GPatch.ico deleted file mode 100644 index 012e6a834aa076522ce14dc8f9e70ec0d1235b2f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2238 zcmc(gdstOv7KdLj6;nhm!l*c&&=N11*JjAllq9JXyd)|r3Zsyi;;2n#P6~)17+|C+ zCTNHm38JEaSb?Izlnur;4Y`;Q7b7bX;he8^EFKSFfA^i|x7XUgcYSN`v-kHs8;s)QHndF0?}pBS&^Ga=@hLZrB#$yNK`uUbm&A{ zQL9Kb>pd-6trlHtE2>4LTDaGI4{b{`^*=Pz+lQ*%RlRgrU}io${_ z$t%hJv6AAuES!a1{uf`WYVFMmz;S6?dm zo5Wtpqe+*G?$=y2+jA)_&LQ_=4o!8JIGK`7W>ywK*_pJe&(YmE$K|u7<(B3{ublefj%Hug-8qcA_@r3P+Bjs2u8{=Y$i;0DavE*NgVbVJX*tdT_J7S_a zurHcf8}_p4$OpW+WjA|vMG+Md$$O%>&`8E?j=(n}obVlCBqxOu78JsW&|tzsg4q!q zOz3vW1GlryD~K011aLmzpP`HV3EH@Y>iSLicy3~-`v#&C)^p_0di*!8!_{LgqXYbi zjbDS`s@L%l^|V_>K!`7I`}$(!wTQ%YcbUHs+mZ9}8s^5STs~Mr%V4KvUvv6Qewvh8#}3e3kr1}GEHP8?FaOhyvO}^&|RW_{~PJM-Vd_&^n1F~u-|=QTPO2@ z@wcK;uD{NBaNaRna8kW=>Aks5?R}ennE!4(KL1lSGk)KaN$t7wu8V0S$B%bIXaS-)lT8(*G%Ygh-YhQ6u%&zuY0^v=e0+fFBr?}(N8tkruz z=d;Bd{yQ)H!ud{EDQk^e9tV_0V{@H{Jr$*fR5MW~+P}@Uhs;;15S|m@xl`IaWK6jX zPl|t~M%tU2nszLiaG_FvU5B5`uvQkz#-zJJMQ^40)c(Y9rBc3+<8@`$tGBhSL4lij)SXCqMI@Snhrg{I+al`55z;5B`Fk7{MR4xqrky5~&%WDUU{*gyt-Tm+F6wA`C)KC8~%#f3`@?w_mIzJXv zl{S8(l*tDnhFEp=2|Sl~rZA@@R1L0^*Pj}6*w~<;dzVMLKJn}e3Hd#DDHU@Ag1$av zfF-pW*f*-%;Eey5(R6(3fV9l|i1u90a%Zc5m^c+H diff --git a/GPatch/GPatch.nsi b/GPatch/GPatch.nsi deleted file mode 100644 index d5d9199..0000000 --- a/GPatch/GPatch.nsi +++ /dev/null @@ -1,128 +0,0 @@ -Unicode True -SetCompressor /SOLID zlib ; LZMA compresses about 20% better but is more likely to trigger AV false positives - -!define PRODUCT_NAME "Giants: Citizen Kabuto" -!define PRODUCT_VERSION "1.499" - -; MUI 1.67 compatible ------ -!include "MUI2.nsh" -!include "DotNetChecker.nsh" - -; MUI Settings -!define MUI_ABORTWARNING -!define MUI_ICON "GPatch.ico" - -; Welcome page -; Directory page -!insertmacro MUI_PAGE_DIRECTORY -; Instfiles page -!insertmacro MUI_PAGE_INSTFILES -; Finish page -;!define MUI_FINISHPAGE_SHOWREADME $INSTDIR\readme.txt - -!define MUI_FINISHPAGE_SHOWREADME_NOTCHECKED -!insertmacro MUI_PAGE_FINISH - -!define MUI_LANGDLL_REGISTRY_ROOT "HKCU" -!define MUI_LANGDLL_REGISTRY_KEY "Software\PlanetMoon\Giants" -!define MUI_LANGDLL_REGISTRY_VALUENAME "SetupLanguage" - -; Language files -!insertmacro MUI_LANGUAGE "English" -!insertmacro MUI_LANGUAGE "French" -!insertmacro MUI_LANGUAGE "German" -!insertmacro MUI_LANGUAGE "Italian" -!insertmacro MUI_LANGUAGE "Spanish" - -; Language selection settings -!define MUI_LANGDLL_WINDOWTITLE "Setup Language" - -!include LogicLib.nsh - -; MUI end ------ - -Name "${PRODUCT_NAME} ${PRODUCT_VERSION}" -OutFile "Output\GPatch1_499_0_0.exe" -InstallDir "$PROGRAMFILES\Giants\" -InstallDirRegKey HKCU "SOFTWARE\PlanetMoon\Giants" "DestDir" -ShowInstDetails hide - -;Request application privileges for Windows Vista+ -RequestExecutionLevel admin - -Section - SetDetailsView hide - SectionIn RO - SetOverwrite on - - nsExec::Exec "taskkill /F /IM Giants.exe" - nsExec::Exec "taskkill /F /IM GiantsMain.exe" - - ; Install DX redist for DX9 renderer - SetOutPath "$INSTDIR\Redist" - File /r "Files\Redist\*.*" - ExecWait "$INSTDIR\Redist\dxsetup.exe /silent" $0 - - ${If} $0 != 0 - MessageBox MB_OK "Setup failed to update DirectX ($0). Please visit www.microsoft.com and download the latest version of the DirectX end user redistributable." - ${EndIf} - - ExecWait "$INSTDIR\Redist\VC_redist.x86.exe /install /quiet /norestart /log $\"$Temp\GPatch_VCRedist.txt$\"" $0 - ${If} $0 != 0 - ${AndIf} $0 != 1638 ;0x666 - Newer version installed - MessageBox MB_OK "Setup failed to install the Visual C++ Runtime. Please visit www.microsoft.com and download the latest version of the Visual C++ 2019 redistributable." - ${EndIf} - - RMDir /r "$INSTDIR\Redist" ; Delete temporary files - - ; Delete old files - Delete $INSTDIR\bin\Shaders\*.* - Delete $INSTDIR\gg_dx7r.dll - Delete $INSTDIR\gg_dx8r.dll - Delete $INSTDIR\gg_dx9r.dll - Delete $INSTDIR\gg_null.dll - Delete $INSTDIR\Giants.exe - Delete $INSTDIR\BugTrap.dll - Delete $INSTDIR\GiantsMain.exe - Delete $INSTDIR\*.vso - Delete $INSTDIR\*.pso - - SetOutPath "$INSTDIR" - File /r "Files\*.*" - - ; remove old mods (may have compatibility issues) - Delete $INSTDIR\bin\worldlist2.bin - Delete $INSTDIR\bin\worldlist3.bin - Delete $INSTDIR\bin\worldlist4.bin - Delete $INSTDIR\bin\worldlist5.bin - Delete $INSTDIR\bin\mappack1.gzp - Delete $INSTDIR\bin\A-GRM1.gzp - -SectionEnd - -!define NETVersion "4.7.2" -!define NETInstallerFileName "NDP472-KB4054531-Web.exe" -!define NETInstallerPath "Files\Redist\NDP472-KB4054531-Web.exe" - -Section "MS .NET Framework v${NETVersion}" SecFramework - IfFileExists "$WINDIR\Microsoft.NET\Framework\v${NETVersion}" NETFrameworkInstalled 0 - File /oname=$TEMP\${NETInstallerFileName} "${NETInstallerPath}" - - !insertmacro CheckNetFramework 472 - Return - - NETFrameworkInstalled: - DetailPrint "Microsoft .NET Framework is already installed!" -SectionEnd - - -;-------------------------------- -;Installer Functions - -Function .onInit - - !insertmacro MUI_LANGDLL_DISPLAY - -FunctionEnd - -;-------------------------------- \ No newline at end of file diff --git a/GPatch/Launcher/GPatch.ico b/GPatch/Launcher/GPatch.ico deleted file mode 100644 index 012e6a834aa076522ce14dc8f9e70ec0d1235b2f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2238 zcmc(gdstOv7KdLj6;nhm!l*c&&=N11*JjAllq9JXyd)|r3Zsyi;;2n#P6~)17+|C+ zCTNHm38JEaSb?Izlnur;4Y`;Q7b7bX;he8^EFKSFfA^i|x7XUgcYSN`v-kHs8;s)QHndF0?}pBS&^Ga=@hLZrB#$yNK`uUbm&A{ zQL9Kb>pd-6trlHtE2>4LTDaGI4{b{`^*=Pz+lQ*%RlRgrU}io${_ z$t%hJv6AAuES!a1{uf`WYVFMmz;S6?dm zo5Wtpqe+*G?$=y2+jA)_&LQ_=4o!8JIGK`7W>ywK*_pJe&(YmE$K|u7<(B3{ublefj%Hug-8qcA_@r3P+Bjs2u8{=Y$i;0DavE*NgVbVJX*tdT_J7S_a zurHcf8}_p4$OpW+WjA|vMG+Md$$O%>&`8E?j=(n}obVlCBqxOu78JsW&|tzsg4q!q zOz3vW1GlryD~K011aLmzpP`HV3EH@Y>iSLicy3~-`v#&C)^p_0di*!8!_{LgqXYbi zjbDS`s@L%l^|V_>K!`7I`}$(!wTQ%YcbUHs+mZ9}8s^5STs~Mr%V4KvUvv6Qewvh8#}3e3kr1}GEHP8?FaOhyvO}^&|RW_{~PJM-Vd_&^n1F~u-|=QTPO2@ z@wcK;uD{NBaNaRna8kW=>Aks5?R}ennE!4(KL1lSGk)KaN$t7wu8V0S$B%bIXaS-)lT8(*G%Ygh-YhQ6u%&zuY0^v=e0+fFBr?}(N8tkruz z=d;Bd{yQ)H!ud{EDQk^e9tV_0V{@H{Jr$*fR5MW~+P}@Uhs;;15S|m@xl`IaWK6jX zPl|t~M%tU2nszLiaG_FvU5B5`uvQkz#-zJJMQ^40)c(Y9rBc3+<8@`$tGBhSL4lij)SXCqMI@Snhrg{I+al`55z;5B`Fk7{MR4xqrky5~&%WDUU{*gyt-Tm+F6wA`C)KC8~%#f3`@?w_mIzJXv zl{S8(l*tDnhFEp=2|Sl~rZA@@R1L0^*Pj}6*w~<;dzVMLKJn}e3Hd#DDHU@Ag1$av zfF-pW*f*-%;Eey5(R6(3fV9l|i1u90a%Zc5m^c+H diff --git a/GPatch/Launcher/Launcher.nsi b/GPatch/Launcher/Launcher.nsi deleted file mode 100644 index 5c0e3e2..0000000 --- a/GPatch/Launcher/Launcher.nsi +++ /dev/null @@ -1,75 +0,0 @@ -SetCompressor /SOLID lzma - -!define PRODUCT_NAME "Giants Launcher" -!define PRODUCT_VERSION "1.0.0.2" - -; MUI 1.67 compatible ------ -!include "MUI.nsh" - -; MUI Settings -!define MUI_ABORTWARNING -!define MUI_ICON "GPatch.ico" - -; Welcome page -;!insertmacro MUI_PAGE_WELCOME -; Directory page -!insertmacro MUI_PAGE_DIRECTORY -; Instfiles page -!insertmacro MUI_PAGE_INSTFILES - -!define MUI_LANGDLL_REGISTRY_ROOT "HKCU" -!define MUI_LANGDLL_REGISTRY_KEY "Software\PlanetMoon\Giants" -!define MUI_LANGDLL_REGISTRY_VALUENAME "SetupLanguage" - -; Language files -!insertmacro MUI_LANGUAGE "English" - -; MUI end ------ - -Name "Giants Launcher Update" -OutFile "LauncherUpdate_1002.exe" -InstallDir "C:\Program Files\Giants" -InstallDirRegKey HKCU "SOFTWARE\PlanetMoon\Giants" "DestDir" -ShowInstDetails hide - -;Request application privileges for Windows Vista -RequestExecutionLevel admin - -Section - SetDetailsView hide - SectionIn RO - SetOverwrite on - - - SetOutPath "$INSTDIR" - File /r "Giants.exe" - - -SectionEnd - -Function .onInit - Processes::KillProcess "Giants.exe" - Processes::FindProcess "Giants.exe" - ${If} $R0 == 1 - MessageBox MB_OK "Please close the Giants launcher before installing this update." - Abort - ${EndIf} - - ClearErrors - FileOpen $R0 "$INSTDIR\Giants.exe" w - ${If} ${Errors} - MessageBox MB_OK "Could not write to Giants.exe. Please ensure the Giants launcher is closed." - Abort - ${Else} - FileClose $R0 - ${EndIf} -FunctionEnd - -Function .onInstFailed - MessageBox MB_OK "Update failed. Please visit www.giantswd.org and download the latest version manually." -FunctionEnd - -Function .onInstSuccess - MessageBox MB_OK "Update complete!" - Exec "$INSTDIR\Giants.exe" -FunctionEnd \ No newline at end of file diff --git a/Sdk/External/DirectXMath/.nuget/directxmath.nuspec b/Sdk/External/DirectXMath/.nuget/directxmath.nuspec new file mode 100644 index 0000000..c7848ee --- /dev/null +++ b/Sdk/External/DirectXMath/.nuget/directxmath.nuspec @@ -0,0 +1,31 @@ + + + + directxmath + 0.0.0-SpecifyVersionOnCommandline + DirectXMath + Microsoft + microsoft,directxtk + DirectXMath is an all inline SIMD C++ linear algebra library for use in games and graphics apps. + The DirectXMath API provides SIMD-friendly C++ types and functions for common linear algebra and graphics math operations common to DirectX applications. The library provides optimized versions for Windows 32-bit (x86), Windows 64-bit (x64), and Windows on ARM through SSE2 and ARM-NEON intrinsics support in the Visual Studio compiler. + Matches the August 2020 release. + http://go.microsoft.com/fwlink/?LinkID=615560 + images\icon.jpg + MIT + false + © Microsoft Corporation. All rights reserved. + C++ native DirectX math nativepackage + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXMath/.nuget/directxmath.targets b/Sdk/External/DirectXMath/.nuget/directxmath.targets new file mode 100644 index 0000000..0a31f57 --- /dev/null +++ b/Sdk/External/DirectXMath/.nuget/directxmath.targets @@ -0,0 +1,11 @@ + + + + + + HAS_DIRECTXMATH;%(PreprocessorDefinitions) + $(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories) + + + + diff --git a/Sdk/External/DirectXMath/.nuget/icon.jpg b/Sdk/External/DirectXMath/.nuget/icon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..08fe1faeb7f6e45d796cf1e67bf1cbb1347c514a GIT binary patch literal 3479 zcmbW$XHe5?mjLj82q7Rr2u1}(Y6PT8@1RJ((ximmf=CfjB1P&|i4>8JSGXbwVrWvN zBb`vCw;<9%dPy)y2mxMvcjxZR&VJb4^UV45oO$N_<~-+Q;$#kBGtkl30YD%S;BN{b_aR={-OXhp_WW z-DEgt>I~(7gphuk_?}Tfv#jO(ogpGp#>Mv;)0qnwIXEv}6%-P_CL${*ub`-;d`nAP zM^{hZz|7pj5@ls=gLb{^cF*0z6Z6>5KOpc)(DMlFi^!{3zLA% z9pZTx-=Q-Xkh1evNxy0Tmi_Nw&;DPse_;RRngn2A(CP5Na6kjtrwV8Pc}BQcs3RN^ zqB7U+Ui?6PBs5OkNy(>k_)is2(G@0jHLtCOS0Y}8aNO!gr(rF$N^Zmt{cf&BrNx?( z9Mf?vlnyKwWC`Vsw*mYRsHZdoN?XxtnDTKSk(O9tSs;+c364gpHQ^ATofK3Y<&VYf zDPvP_;AF|$jI{K^lUTY(K zn7ol&+vJ?@x!p3k5Qg%{L_nLq_y)VA%^lJCoSmpImpHmZ3b6aSpC&W#*ReWo%~tZ9 zYy>Um(b`C+!7j-vP3_udwd#}+zpdFfuGw~R-gRZvcQgh$?x=1|-w{OGYN4?N`84mG zKLJ{`=M;-@$$r}@+T}~T=+5o$y9eQOa=rF>b=A|#uf^ZoWue)HNAh4FHl3C8Q0Jyu zT?!pZDXVw(;zM^3T3($1;OckHl@4^((ho!BM8T+ZVi<0iTbcxOxIoN3{v--G#~z>$ z`-52f8_~oEZ|nLtW<0}XJY_g@eWhn^-{-)RC3!8NrDpa`o!()FUieD^&x>3+EZOb2 zh#cgBQcl6bz_1qY&8^}bY=pEyKDUk(8=%v4M?q;HWABISiK{FBpC* z@~Fi%UCbw5^|Y~>#cGG3m=#sN#n9k+^{P6fQe@p=?)^=u3P>YN^45C8QM??%&UL!y zh}LoJ$55F4o$^wvrs_*H$iMf_B7@)+EB3JGB)Junj!IFrc;z^$eCy5NPuQd4d}qjB z?3!B9Dl*1g35IL1D#-|Aq%^LyH6k;csr8vP={kqkg!q-}QlW>%O5Bv=z0mUxbpd1C zp997qA+Hu^Ig86l7L{zI%G}si6^!%+?a45bY4^(8fc8*~9WJJki-PVD?kn)l z@_gSnp2D_4RzjrLmC9i{@7L>lV4Qy1@jI0Y?oP)hoB+*DF*3)*&Ib36+lkS9Z#cT> zrR6Omeb818{1ncGcv$~rB(}uX1gpf%xx0-BLM!?-|6H@P$c&Mw_|h%DC+e{&ky<9& z!rhih^;JHfFY}f@mG~?Oc-gj$}>%ZBKOsxb_9h5?*OCWMaL?K1B0w6=h>g$sw?hFN*f~$kIm~x z{Db};GeV8{;yuo&-}O=T!;8N?)*z7cD(&xx{((`$4_Y_Z*lF|tn+#8UTI-ijfCZN2 zkI#*UWSeGt+V0Od9Q4byVmXdjd;$aJd|s*=O3zSYmexuYga*`Imx2W!HbwIo^mCl0 z3JA{yuf~bC2OVYXM0v|y;mmj-UX`h4=C)#%>ZH6Bjt#}m9~o9=o?V+vZY4I1)ftfL zQibt!I;`*eFf#~>?YuD~ebQHB+n4Ppz+YdA^aVq8@6v$c{su;EO%dUp2y>V0XECDf zA=adM?~>f%5SfpL0U^a`Nq#N{E;fX(p{Z`!^o*^QZDsp3{-I{h)g`Aa6gI?!k~nAR zHch5fMD2Z;#I9H0x8a51=xJ(c$|0)yu}Y731@B({_V zIS*1Qa*aQrGY9AzQwBAyqS)Qsm-L$972((IoF0(-l~HiWRMJ)zHYDngP&_7$bw8u7 z>CfPwPcy~44!b56RiBCKqW0rd z^lB|CwnlfuK{TyeikDK^XuT3xl&gM5FK+I1n|yerss8&VSu{x!PaC(mw!MalR;`*h zP6atox*856#LrOt$B)HJMYJztmfqBV4GX>a3OCJI%wtJ3Ekmo-m$2n!NqM zI48cN_?-y5e~{D&zSTx;piHEohpWt05jAUO4hc`0^ZuV5XU>p#c8 zh-3}%lUoYA`g+G^{L|xLbGN$s3|IFr2T4v^kw>GVWU;Z;8L82nsyIC)J|Tk;06iSc zzl=I&_<>rhD?g^A_&c%%_V2f&HX6jDh4BwZy5EIEx_|W$j2L1J2dKwQb5|5aEbIF* zrbEjVGm@0p$CR(Gt2zC5o0EjN-iCi}J^^Os*2H;ZOpI<^<&uswf1aRQ5}qfQPdaSP z{LT%Jx2=v>u(7>MRJvZxa=^v&(JPW@S>YECYjyi->=A`@9(4*SG9I}Sl;E+L-yov< z-fGgvAaT{lu6calw{^XMb%(BG8b}_Qd>2(Nq1yZzO|PKCy+ZTcTJEea{T}It{R}M0 zW8zH{yQqXT%1HZ5cKO6ym4@CXWN(8xw>->Gu=ZX=%6g-H+E+0#9BO)^*YIk{BNG4j zTf^_;RLm4!55Syn+`u3s!Gm03+?Br|mGzdd%WQ8v<-^juKhMtN0PmK_P-KYn^;dG# zaryX*){UmM#U1xYqp#6wx)RK4+CP3FRqt)9P^eYdFc&o2S^F)1a%5&s>K`MbhNdwzh(4!tY~Jy zq~gW;q;UhH>x%K5rx0VGjlohsJiQ{3go_TboLscKc|Q(alg7bb;~mo^7RVjjAGoep zQqag1Xcef@ObvU}1sQTwUZwU-RsU!`LRiu}f@uO?3tr-$Jt}{Pm){mV6iRCpbnZI* zCIE#NE0!4G7==Z7BM|2kS*D8=#xXW_yh%)p?n}3ZQesaRzN=@KCQaJ@^kD`XY+5JN zIqTSPd2v;D*=vJ3U++jPY&(`CeDbq5Ew-3{+2cOv^vvX}&SESnc?4k~V$kawlT@xA zSW9})w9rk^BJM?eOZ{BPC@up-BToRvPBSW@j3z;mf?_|sv?kCBKRJaAX{oz!#g`l! z?+p6ymFN!E%VbmBMmFo@|AxH1xAVm7r^aFL6CeC|CHz@jYpl8d8R6`_tn3(J;D1Q) H$+v$2rQD|6 literal 0 HcmV?d00001 diff --git a/Sdk/External/DirectXMath/.nuget/signconfig.xml b/Sdk/External/DirectXMath/.nuget/signconfig.xml new file mode 100644 index 0000000..f32a6a4 --- /dev/null +++ b/Sdk/External/DirectXMath/.nuget/signconfig.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h new file mode 100644 index 0000000..bdcaec0 --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX.h @@ -0,0 +1,275 @@ +//------------------------------------------------------------------------------------- +// DirectXMathAVX.h -- AVX (version 1) extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error AVX not supported on ARM platform +#endif + +#include + +namespace DirectX +{ + +namespace AVX +{ + +inline bool XMVerifyAVXSupport() +{ + // Should return true for AMD Bulldozer, Intel "Sandy Bridge", and Intel "Ivy Bridge" or later processors + // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012) + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = {-1}; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid( CPUInfo, 0 ); +#endif + + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1 ); +#endif + + // We check for AVX, OSXSAVE, SSSE4.1, and SSE3 + return ( (CPUInfo[2] & 0x18080001) == 0x18080001 ); +} + + +//------------------------------------------------------------------------------------- +// Vector +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue ) +{ + return _mm_broadcast_ss( pValue ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(0, 0, 0, 0) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 ) +{ + assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) ); + _Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) ); + + unsigned int elem[4] = { E0, E1, E2, E3 }; + __m128i vControl = _mm_loadu_si128( reinterpret_cast(&elem[0]) ); + return _mm_permutevar_ps( V, vControl ); +} + +inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW ) +{ + assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 ); + _Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 ); + + static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } }; + + XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW }; + __m128i vControl = _mm_load_si128( reinterpret_cast(&elem[0]) ); + + __m128i vSelect = _mm_cmpgt_epi32( vControl, three ); + vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) ); + + __m128 shuffled1 = _mm_permutevar_ps( V1, vControl ); + __m128 shuffled2 = _mm_permutevar_ps( V2, vControl ); + + __m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 ); + __m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 ); + + return _mm_or_ps( masked1, masked2 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements) +{ + assert( Elements < 4 ); + _Analysis_assume_( Elements < 4 ); + return AVX::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3)); +} + +inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements) +{ + assert( Elements < 4 ); + _Analysis_assume_( Elements < 4 ); + return AVX::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements) +{ + assert( Elements < 4 ); + _Analysis_assume_( Elements < 4 ); + return AVX::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 ); +} + + +//------------------------------------------------------------------------------------- +// Permute Templates +//------------------------------------------------------------------------------------- + +namespace Internal +{ + // Slow path fallback for permutes that do not map to a single SSE opcode. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) + { + static const XMVECTORU32 selectMask = + { + WhichX ? 0xFFFFFFFF : 0, + WhichY ? 0xFFFFFFFF : 0, + WhichZ ? 0xFFFFFFFF : 0, + WhichW ? 0xFFFFFFFF : 0, + }; + + XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle); + XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle); + + XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1); + XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2); + + return _mm_or_ps(masked1, masked2); + } + }; + + // Fast path for permutes that only read from the first vector. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); } + }; + + // Fast path for permutes that only read from the second vector. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); } + }; + + // Fast path for permutes that read XY from the first vector, ZW from the second. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); } + }; + + // Fast path for permutes that read XY from the second vector, ZW from the first. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); } + }; +}; + +// General permute template +template + inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2) +{ + static_assert(PermuteX <= 7, "PermuteX template parameter out of range"); + static_assert(PermuteY <= 7, "PermuteY template parameter out of range"); + static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range"); + static_assert(PermuteW <= 7, "PermuteW template parameter out of range"); + + const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3); + + const bool WhichX = PermuteX > 3; + const bool WhichY = PermuteY > 3; + const bool WhichZ = PermuteZ > 3; + const bool WhichW = PermuteW > 3; + + return AVX::Internal::PermuteHelper::Permute(V1, V2); +} + +// Special-case permute templates +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR) { return V1; } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR, FXMVECTOR V2) { return V2; } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); } + + +//------------------------------------------------------------------------------------- +// Swizzle Templates +//------------------------------------------------------------------------------------- + +// General swizzle template +template + inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V) +{ + static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range"); + static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range"); + static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range"); + static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range"); + + return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) ); +} + +// Specialized swizzles +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; } +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); } +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); } + + +//------------------------------------------------------------------------------------- +// Other Templates +//------------------------------------------------------------------------------------- + +template + inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2) +{ + static_assert( Elements < 4, "Elements template parameter out of range" ); + return AVX::XMVectorPermute(V1, V2); +} + +template + inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V) +{ + static_assert( Elements < 4, "Elements template parameter out of range" ); + return AVX::XMVectorSwizzle(V); +} + +template + inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V) +{ + static_assert( Elements < 4, "Elements template parameter out of range" ); + return AVX::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V); +} + +} // namespace AVX + +} // namespace DirectX; diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h new file mode 100644 index 0000000..329849b --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathAVX2.h @@ -0,0 +1,1037 @@ +//------------------------------------------------------------------------------------- +// DirectXMathAVX2.h -- AVX2 extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error AVX2 not supported on ARM platform +#endif + +#include +#include + +namespace DirectX +{ + +namespace AVX2 +{ + +inline bool XMVerifyAVX2Support() +{ + // Should return true for AMD "Excavator", Intel "Haswell" or later processors + // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012) + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = {-1}; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + + if ( CPUInfo[0] < 7 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // We check for F16C, FMA3, AVX, OSXSAVE, SSSE4.1, and SSE3 + if ( (CPUInfo[2] & 0x38081001) != 0x38081001 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid_count(7, 0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuidex(CPUInfo, 7, 0); +#endif + + return ( (CPUInfo[1] & 0x20 ) == 0x20 ); +} + + +//------------------------------------------------------------------------------------- +// Vector +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr( _In_ const float *pValue ) +{ + return _mm_broadcast_ss( pValue ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatX( FXMVECTOR V ) +{ + return _mm_broadcastss_ps( V ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatY( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(1, 1, 1, 1) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatZ( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(2, 2, 2, 2) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSplatW( FXMVECTOR V ) +{ + return _mm_permute_ps( V, _MM_SHUFFLE(3, 3, 3, 3) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) +{ + return _mm_fmadd_ps( V1, V2, V3 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) +{ + return _mm_fnmadd_ps( V1, V2, V3 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSwizzle( FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3 ) +{ + assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) ); + _Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) ); + + unsigned int elem[4] = { E0, E1, E2, E3 }; + __m128i vControl = _mm_loadu_si128( reinterpret_cast(&elem[0]) ); + return _mm_permutevar_ps( V, vControl ); +} + +inline XMVECTOR XM_CALLCONV XMVectorPermute( FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW ) +{ + assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 ); + _Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 ); + + static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } }; + + XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW }; + __m128i vControl = _mm_load_si128( reinterpret_cast(&elem[0]) ); + + __m128i vSelect = _mm_cmpgt_epi32( vControl, three ); + vControl = _mm_castps_si128( _mm_and_ps( _mm_castsi128_ps( vControl ), three ) ); + + __m128 shuffled1 = _mm_permutevar_ps( V1, vControl ); + __m128 shuffled2 = _mm_permutevar_ps( V2, vControl ); + + __m128 masked1 = _mm_andnot_ps( _mm_castsi128_ps( vSelect ), shuffled1 ); + __m128 masked2 = _mm_and_ps( _mm_castsi128_ps( vSelect ), shuffled2 ); + + return _mm_or_ps( masked1, masked2 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements) +{ + assert( Elements < 4 ); + _Analysis_assume_( Elements < 4 ); + return AVX2::XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3)); +} + +inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements) +{ + assert( Elements < 4 ); + _Analysis_assume_( Elements < 4 ); + return AVX2::XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements) +{ + assert( Elements < 4 ); + _Analysis_assume_( Elements < 4 ); + return AVX2::XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 ); +} + + +//------------------------------------------------------------------------------------- +// Vector2 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector2Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] ); + XMVECTOR vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector2TransformCoord +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] ); + XMVECTOR vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3)); + vResult = _mm_div_ps( vResult, W ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector2TransformNormal +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_mul_ps( vResult, M.r[1] ); + XMVECTOR vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Vector3 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector3Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector3TransformCoord +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3)); + vResult = _mm_div_ps( vResult, W ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector3TransformNormal +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_mul_ps( vResult, M.r[2] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2); + +inline XMVECTOR XM_CALLCONV XMVector3Project +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + CXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) +{ + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + XMMATRIX Transform = AVX2::XMMatrixMultiply(World, View); + Transform = AVX2::XMMatrixMultiply(Transform, Projection); + + XMVECTOR Result = AVX2::XMVector3TransformCoord(V, Transform); + + Result = AVX2::XMVectorMultiplyAdd(Result, Scale, Offset); + + return Result; +} + +inline XMVECTOR XM_CALLCONV XMVector3Unproject +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + CXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) +{ + static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } }; + + XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f); + Scale = XMVectorReciprocal(Scale); + + XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); + Offset = AVX2::XMVectorMultiplyAdd(Scale, Offset, D.v); + + XMMATRIX Transform = AVX2::XMMatrixMultiply(World, View); + Transform = AVX2::XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + XMVECTOR Result = AVX2::XMVectorMultiplyAdd(V, Scale, Offset); + + return AVX2::XMVector3TransformCoord(Result, Transform); +} + + +//------------------------------------------------------------------------------------- +// Vector4 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector4Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W + vResult = _mm_mul_ps( vResult, M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_broadcastss_ps(V); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Matrix +//------------------------------------------------------------------------------------- + +inline XMMATRIX XM_CALLCONV XMMatrixMultiply +( + CXMMATRIX M1, + CXMMATRIX M2 +) +{ + XMMATRIX mResult; + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + // Splat the component X,Y,Z then W + XMVECTOR vX = _mm_broadcastss_ps(vW); + XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + // Perform the operation on the first row + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[0] = vX; + // Repeat for the other 3 rows + vW = M1.r[1]; + vX = _mm_broadcastss_ps(vW); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[1] = vX; + vW = M1.r[2]; + vX = _mm_broadcastss_ps(vW); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[2] = vX; + vW = M1.r[3]; + vX = _mm_broadcastss_ps(vW); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[3] = vX; + return mResult; +} + +inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose +( + FXMMATRIX M1, + CXMMATRIX M2 +) +{ + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + // Splat the component X,Y,Z then W + XMVECTOR vX = _mm_broadcastss_ps(vW); + XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + // Perform the operation on the first row + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r0 = vX; + // Repeat for the other 3 rows + vW = M1.r[1]; + vX = _mm_broadcastss_ps(vW); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r1 = vX; + vW = M1.r[2]; + vX = _mm_broadcastss_ps(vW); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r2 = vX; + vW = M1.r[3]; + vX = _mm_broadcastss_ps(vW); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r3 = vX; + + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2)); + + XMMATRIX mResult; + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1)); + return mResult; +} + + +//------------------------------------------------------------------------------------- +// Permute Templates +//------------------------------------------------------------------------------------- + +namespace Internal +{ + // Slow path fallback for permutes that do not map to a single SSE opcode. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) + { + static const XMVECTORU32 selectMask = + { + WhichX ? 0xFFFFFFFF : 0, + WhichY ? 0xFFFFFFFF : 0, + WhichZ ? 0xFFFFFFFF : 0, + WhichW ? 0xFFFFFFFF : 0, + }; + + XMVECTOR shuffled1 = _mm_permute_ps(v1, Shuffle); + XMVECTOR shuffled2 = _mm_permute_ps(v2, Shuffle); + + XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1); + XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2); + + return _mm_or_ps(masked1, masked2); + } + }; + + // Fast path for permutes that only read from the first vector. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return _mm_permute_ps(v1, Shuffle); } + }; + + // Fast path for permutes that only read from the second vector. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return _mm_permute_ps(v2, Shuffle); } + }; + + // Fast path for permutes that read XY from the first vector, ZW from the second. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); } + }; + + // Fast path for permutes that read XY from the second vector, ZW from the first. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); } + }; +}; + +// General permute template +template + inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2) +{ + static_assert(PermuteX <= 7, "PermuteX template parameter out of range"); + static_assert(PermuteY <= 7, "PermuteY template parameter out of range"); + static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range"); + static_assert(PermuteW <= 7, "PermuteW template parameter out of range"); + + const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3); + + const bool WhichX = PermuteX > 3; + const bool WhichY = PermuteY > 3; + const bool WhichZ = PermuteZ > 3; + const bool WhichW = PermuteW > 3; + + return AVX2::Internal::PermuteHelper::Permute(V1, V2); +} + +// Special-case permute templates +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR) { return V1; } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR, FXMVECTOR V2) { return V2; } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); } +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); } + + +//------------------------------------------------------------------------------------- +// Swizzle Templates +//------------------------------------------------------------------------------------- + +// General swizzle template +template + inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V) +{ + static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range"); + static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range"); + static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range"); + static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range"); + + return _mm_permute_ps( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) ); +} + +// Specialized swizzles +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; } +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,0,0>(FXMVECTOR V) { return _mm_broadcastss_ps(V); } +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); } +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); } + + +//------------------------------------------------------------------------------------- +// Other Templates +//------------------------------------------------------------------------------------- + +template + inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2) +{ + static_assert( Elements < 4, "Elements template parameter out of range" ); + return AVX2::XMVectorPermute(V1, V2); +} + +template + inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V) +{ + static_assert( Elements < 4, "Elements template parameter out of range" ); + return AVX2::XMVectorSwizzle(V); +} + +template + inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V) +{ + static_assert( Elements < 4, "Elements template parameter out of range" ); + return AVX2::XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V); +} + +//------------------------------------------------------------------------------------- +// Data conversion +//------------------------------------------------------------------------------------- + +inline float XMConvertHalfToFloat( PackedVector::HALF Value ) +{ + __m128i V1 = _mm_cvtsi32_si128( static_cast(Value) ); + __m128 V2 = _mm_cvtph_ps( V1 ); + return _mm_cvtss_f32( V2 ); +} + +inline PackedVector::HALF XMConvertFloatToHalf( float Value ) +{ + __m128 V1 = _mm_set_ss( Value ); + __m128i V2 = _mm_cvtps_ph( V1, 0 ); + return static_cast( _mm_cvtsi128_si32(V2) ); +} + +inline float* XMConvertHalfToFloatStream +( + _Out_writes_bytes_(sizeof(float)+OutputStride*(HalfCount-1)) float* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(2+InputStride*(HalfCount-1)) const PackedVector::HALF* pInputStream, + _In_ size_t InputStride, + _In_ size_t HalfCount +) +{ + using namespace PackedVector; + + assert(pOutputStream); + assert(pInputStream); + + assert(InputStride >= sizeof(HALF)); + assert(OutputStride >= sizeof(float)); + + auto pHalf = reinterpret_cast(pInputStream); + auto pFloat = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = HalfCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(HALF)) + { + if (OutputStride == sizeof(float)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_stream_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_storeu_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_store_ss(reinterpret_cast(pFloat), FV); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); + pFloat += OutputStride; + i += 4; + } + } + } + else if (OutputStride == sizeof(float)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Scattered input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_stream_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_storeu_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_store_ss(reinterpret_cast(pFloat), FV); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); + pFloat += OutputStride; + i += 4; + } + } + } + + for (; i < HalfCount; ++i) + { + *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); + pHalf += InputStride; + pFloat += OutputStride; + } + + return pOutputStream; +} + + +inline PackedVector::HALF* XMConvertFloatToHalfStream +( + _Out_writes_bytes_(2+OutputStride*(FloatCount-1)) PackedVector::HALF* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(float)+InputStride*(FloatCount-1)) const float* pInputStream, + _In_ size_t InputStride, + _In_ size_t FloatCount +) +{ + using namespace PackedVector; + + assert(pOutputStream); + assert(pInputStream); + + assert(InputStride >= sizeof(float)); + assert(OutputStride >= sizeof(HALF)); + + auto pFloat = reinterpret_cast(pInputStream); + auto pHalf = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = FloatCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(float)) + { + if (OutputStride == sizeof(HALF)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Aligned and packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_load_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + } + else + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Aligned & packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_load_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + } + } + else if (OutputStride == sizeof(HALF)) + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); + __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); + FV = _mm_blend_ps(FV, FT, 0xC); + + __m128i HV = _mm_cvtps_ph(FV, 0); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); + __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); + FV = _mm_blend_ps(FV, FT, 0xC); + + __m128i HV = _mm_cvtps_ph(FV, 0); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + } + + for (; i < FloatCount; ++i) + { + *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); + pFloat += InputStride; + pHalf += OutputStride; + } + + return pOutputStream; +} + + +//------------------------------------------------------------------------------------- +// Half2 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource ) +{ + assert(pSource); + __m128 V = _mm_load_ss( reinterpret_cast(pSource) ); + return _mm_cvtph_ps( _mm_castps_si128( V ) ); +} + +inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V ) +{ + assert(pDestination); + __m128i V1 = _mm_cvtps_ph( V, 0 ); + _mm_store_ss( reinterpret_cast(pDestination), _mm_castsi128_ps(V1) ); +} + + +//------------------------------------------------------------------------------------- +// Half4 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource ) +{ + assert(pSource); + __m128i V = _mm_loadl_epi64( reinterpret_cast(pSource) ); + return _mm_cvtph_ps( V ); +} + +inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V ) +{ + assert(pDestination); + __m128i V1 = _mm_cvtps_ph( V, 0 ); + _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 ); +} + +} // namespace AVX2 + +} // namespace DirectX; diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathBE.h b/Sdk/External/DirectXMath/Extensions/DirectXMathBE.h new file mode 100644 index 0000000..dca2705 --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathBE.h @@ -0,0 +1,95 @@ +//------------------------------------------------------------------------------------- +// DirectXMathBE.h -- Big-endian swap extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64) +#include +#endif + +#include + +namespace DirectX +{ + +inline XMVECTOR XM_CALLCONV XMVectorEndian +( + FXMVECTOR V +) +{ +#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } }; + + uint8x8x2_t tbl; + tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V)); + tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V)); + + const uint8x8_t rL = vtbl2_u8(tbl, vget_low_u32(idx)); + const uint8x8_t rH = vtbl2_u8(tbl, vget_high_u32(idx)); + return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH)); +#else + XMVECTORU32 E; + E.v = V; + uint32_t value = E.u[0]; + E.u[0] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) ); + value = E.u[1]; + E.u[1] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) ); + value = E.u[2]; + E.u[2] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) ); + value = E.u[3]; + E.u[3] = ( (value << 24) | ((value & 0xFF00) << 8) | ((value & 0xFF0000) >> 8) | (value >> 24) ); + return E.v; +#endif +} + + +#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64) +namespace SSSE3 +{ + +inline bool XMVerifySSSE3Support() +{ + // Should return true on AMD Bulldozer, Intel Core i7/i5/i3, Intel Atom, or later processors + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = { -1 }; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // Check for SSSE3 instruction set. + return ( (CPUInfo[2] & 0x200) != 0 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorEndian +( + FXMVECTOR V +) +{ + static const XMVECTORU32 idx = { { { 0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu } } }; + + __m128i Result = _mm_shuffle_epi8( _mm_castps_si128(V), idx ); + return _mm_castsi128_ps( Result ); +} + +} // namespace SSSE3 +#endif // X86 || X64 + +} // namespace DirectX diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h b/Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h new file mode 100644 index 0000000..6305eca --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathF16C.h @@ -0,0 +1,471 @@ +//------------------------------------------------------------------------------------- +// DirectXMathF16C.h -- F16C/CVT16 extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error F16C not supported on ARM platform +#endif + +#include +#include + +namespace DirectX +{ + +namespace F16C +{ + +inline bool XMVerifyF16CSupport() +{ + // Should return true for AMD "Piledriver" and Intel "Ivy Bridge" processors + // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012) + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = { -1 }; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // We check for F16C, AVX, OSXSAVE, and SSE4.1 + return ( (CPUInfo[2] & 0x38080000 ) == 0x38080000 ); +} + + +//------------------------------------------------------------------------------------- +// Data conversion +//------------------------------------------------------------------------------------- + +inline float XMConvertHalfToFloat( PackedVector::HALF Value ) +{ + __m128i V1 = _mm_cvtsi32_si128( static_cast(Value) ); + __m128 V2 = _mm_cvtph_ps( V1 ); + return _mm_cvtss_f32( V2 ); +} + +inline PackedVector::HALF XMConvertFloatToHalf( float Value ) +{ + __m128 V1 = _mm_set_ss( Value ); + __m128i V2 = _mm_cvtps_ph( V1, 0 ); + return static_cast( _mm_cvtsi128_si32(V2) ); +} + +inline float* XMConvertHalfToFloatStream +( + _Out_writes_bytes_(sizeof(float) + OutputStride * (HalfCount - 1)) float* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(2 + InputStride * (HalfCount - 1)) const PackedVector::HALF* pInputStream, + _In_ size_t InputStride, + _In_ size_t HalfCount +) +{ + using namespace PackedVector; + + assert(pOutputStream); + assert(pInputStream); + + assert(InputStride >= sizeof(HALF)); + assert(OutputStride >= sizeof(float)); + + auto pHalf = reinterpret_cast(pInputStream); + auto pFloat = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = HalfCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(HALF)) + { + if (OutputStride == sizeof(float)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_stream_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_storeu_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_store_ss(reinterpret_cast(pFloat), FV); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); + pFloat += OutputStride; + i += 4; + } + } + } + else if (OutputStride == sizeof(float)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Scattered input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_stream_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_storeu_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_store_ss(reinterpret_cast(pFloat), FV); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); + pFloat += OutputStride; + i += 4; + } + } + } + + for (; i < HalfCount; ++i) + { + *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); + pHalf += InputStride; + pFloat += OutputStride; + } + + return pOutputStream; +} + + +inline PackedVector::HALF* XMConvertFloatToHalfStream +( + _Out_writes_bytes_(2 + OutputStride * (FloatCount - 1)) PackedVector::HALF* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(float) + InputStride * (FloatCount - 1)) const float* pInputStream, + _In_ size_t InputStride, + _In_ size_t FloatCount +) +{ + using namespace PackedVector; + + assert(pOutputStream); + assert(pInputStream); + + assert(InputStride >= sizeof(float)); + assert(OutputStride >= sizeof(HALF)); + + auto pFloat = reinterpret_cast(pInputStream); + auto pHalf = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = FloatCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(float)) + { + if (OutputStride == sizeof(HALF)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Aligned and packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_load_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + } + else + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Aligned & packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_load_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, 0); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + } + } + else if (OutputStride == sizeof(HALF)) + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); + __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); + FV = _mm_blend_ps(FV, FT, 0xC); + + __m128i HV = _mm_cvtps_ph(FV, 0); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); + __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); + FV = _mm_blend_ps(FV, FT, 0xC); + + __m128i HV = _mm_cvtps_ph(FV, 0); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + } + + for (; i < FloatCount; ++i) + { + *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); + pFloat += InputStride; + pHalf += OutputStride; + } + + return pOutputStream; +} + + +//------------------------------------------------------------------------------------- +// Half2 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMLoadHalf2( _In_ const PackedVector::XMHALF2* pSource ) +{ + assert(pSource); + __m128 V = _mm_load_ss( reinterpret_cast(pSource) ); + return _mm_cvtph_ps( _mm_castps_si128( V ) ); +} + +inline void XM_CALLCONV XMStoreHalf2( _Out_ PackedVector::XMHALF2* pDestination, _In_ FXMVECTOR V ) +{ + assert(pDestination); + __m128i V1 = _mm_cvtps_ph( V, 0 ); + _mm_store_ss( reinterpret_cast(pDestination), _mm_castsi128_ps(V1) ); +} + + +//------------------------------------------------------------------------------------- +// Half4 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMLoadHalf4( _In_ const PackedVector::XMHALF4* pSource ) +{ + assert(pSource); + __m128i V = _mm_loadl_epi64( reinterpret_cast(pSource) ); + return _mm_cvtph_ps( V ); +} + +inline void XM_CALLCONV XMStoreHalf4( _Out_ PackedVector::XMHALF4* pDestination, _In_ FXMVECTOR V ) +{ + assert(pDestination); + __m128i V1 = _mm_cvtps_ph( V, 0 ); + _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), V1 ); +} + +} // namespace F16C + +} // namespace DirectX diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h new file mode 100644 index 0000000..20c6a09 --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA3.h @@ -0,0 +1,391 @@ +//------------------------------------------------------------------------------------- +// DirectXMathFMA3.h -- FMA3 extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error FMA3 not supported on ARM platform +#endif + +#include + +namespace DirectX +{ + +namespace FMA3 +{ + +inline bool XMVerifyFMA3Support() +{ + // Should return true for AMD "Pildriver" and Intel "Haswell" processors + // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012) + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = {-1}; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // We check for FMA3, AVX, OSXSAVE + return ( (CPUInfo[2] & 0x18001000) == 0x18001000 ); +} + + +//------------------------------------------------------------------------------------- +// Vector +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) +{ + return _mm_fmadd_ps( V1, V2, V3 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) +{ + return _mm_fnmadd_ps( V1, V2, V3 ); +} + + +//------------------------------------------------------------------------------------- +// Vector2 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector2Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector2TransformCoord +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vResult, M.r[1], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3)); + vResult = _mm_div_ps( vResult, W ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector2TransformNormal +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_mul_ps( vResult, M.r[1] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Vector3 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector3Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector3TransformCoord +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_fmadd_ps( vResult, M.r[2], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3)); + vResult = _mm_div_ps( vResult, W ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector3TransformNormal +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_mul_ps( vResult, M.r[2] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2); + +inline XMVECTOR XM_CALLCONV XMVector3Project +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + CXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) +{ + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View); + Transform = FMA3::XMMatrixMultiply(Transform, Projection); + + XMVECTOR Result = FMA3::XMVector3TransformCoord(V, Transform); + + Result = FMA3::XMVectorMultiplyAdd(Result, Scale, Offset); + + return Result; +} + +inline XMVECTOR XM_CALLCONV XMVector3Unproject +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + CXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) +{ + static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } }; + + XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f); + Scale = XMVectorReciprocal(Scale); + + XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); + Offset = FMA3::XMVectorMultiplyAdd(Scale, Offset, D.v); + + XMMATRIX Transform = FMA3::XMMatrixMultiply(World, View); + Transform = FMA3::XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + XMVECTOR Result = FMA3::XMVectorMultiplyAdd(V, Scale, Offset); + + return FMA3::XMVector3TransformCoord(Result, Transform); +} + + +//------------------------------------------------------------------------------------- +// Vector4 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector4Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W + vResult = _mm_mul_ps( vResult, M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_fmadd_ps( vTemp, M.r[2], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_fmadd_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_fmadd_ps( vTemp, M.r[0], vResult ); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Matrix +//------------------------------------------------------------------------------------- + +inline XMMATRIX XM_CALLCONV XMMatrixMultiply +( + CXMMATRIX M1, + CXMMATRIX M2 +) +{ + XMMATRIX mResult; + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + // Splat the component X,Y,Z then W + XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + // Perform the operation on the first row + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[0] = vX; + // Repeat for the other 3 rows + vW = M1.r[1]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[1] = vX; + vW = M1.r[2]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[2] = vX; + vW = M1.r[3]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + mResult.r[3] = vX; + return mResult; +} + +inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose +( + FXMMATRIX M1, + CXMMATRIX M2 +) +{ + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + // Splat the component X,Y,Z then W + XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + // Perform the operation on the first row + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r0 = vX; + // Repeat for the other 3 rows + vW = M1.r[1]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r1 = vX; + vW = M1.r[2]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r2 = vX; + vW = M1.r[3]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_fmadd_ps(vY,M2.r[1],vX); + vX = _mm_fmadd_ps(vZ,M2.r[2],vX); + vX = _mm_fmadd_ps(vW,M2.r[3],vX); + __m128 r3 = vX; + + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2)); + + XMMATRIX mResult; + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1)); + return mResult; +} + +} // namespace FMA3 + +} // namespace DirectX; diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h new file mode 100644 index 0000000..38783d1 --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathFMA4.h @@ -0,0 +1,415 @@ +//------------------------------------------------------------------------------------- +// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error FMA4 not supported on ARM platform +#endif + +#include +#include + +#ifdef __GNUC__ +#include +#endif + +namespace DirectX +{ + +namespace FMA4 +{ + +inline bool XMVerifyFMA4Support() +{ + // Should return true for AMD Bulldozer processors + // with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012) + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = {-1}; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // We check for AVX, OSXSAVE (required to access FMA4) + if ( (CPUInfo[2] & 0x18000000) != 0x18000000 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0x80000000); +#endif + + if ( uint32_t(CPUInfo[0]) < 0x80000001u ) + return false; + + // We check for FMA4 +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0x80000001, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0x80000001); +#endif + + return ( CPUInfo[2] & 0x10000 ); +} + + +//------------------------------------------------------------------------------------- +// Vector +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) +{ + return _mm_macc_ps( V1, V2, V3 ); +} + +inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) +{ + return _mm_nmacc_ps( V1, V2, V3 ); +} + + +//------------------------------------------------------------------------------------- +// Vector2 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector2Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector2TransformCoord +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3)); + vResult = _mm_div_ps( vResult, W ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector2TransformNormal +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_mul_ps( vResult, M.r[1] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Vector3 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector3Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_macc_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector3TransformCoord +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_macc_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3)); + vResult = _mm_div_ps( vResult, W ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVector3TransformNormal +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_mul_ps( vResult, M.r[2] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_macc_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + return vResult; +} + +XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2); + +inline XMVECTOR XM_CALLCONV XMVector3Project +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + CXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) +{ + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View); + Transform = FMA4::XMMatrixMultiply(Transform, Projection); + + XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform); + + Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset); + + return Result; +} + +inline XMVECTOR XM_CALLCONV XMVector3Unproject +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + CXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) +{ + static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } }; + + XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f); + Scale = XMVectorReciprocal(Scale); + + XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); + Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v); + + XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View); + Transform = FMA4::XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset); + + return FMA4::XMVector3TransformCoord(Result, Transform); +} + + +//------------------------------------------------------------------------------------- +// Vector4 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector4Transform +( + FXMVECTOR V, + CXMMATRIX M +) +{ + XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W + vResult = _mm_mul_ps( vResult, M.r[3] ); + XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z + vResult = _mm_macc_ps( vTemp, M.r[2], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y + vResult = _mm_macc_ps( vTemp, M.r[1], vResult ); + vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X + vResult = _mm_macc_ps( vTemp, M.r[0], vResult ); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Matrix +//------------------------------------------------------------------------------------- + +inline XMMATRIX XM_CALLCONV XMMatrixMultiply +( + CXMMATRIX M1, + CXMMATRIX M2 +) +{ + XMMATRIX mResult; + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + // Splat the component X,Y,Z then W + XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + // Perform the operation on the first row + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + mResult.r[0] = vX; + // Repeat for the other 3 rows + vW = M1.r[1]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + mResult.r[1] = vX; + vW = M1.r[2]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + mResult.r[2] = vX; + vW = M1.r[3]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + mResult.r[3] = vX; + return mResult; +} + +inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose +( + FXMMATRIX M1, + CXMMATRIX M2 +) +{ + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + // Splat the component X,Y,Z then W + XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + // Perform the operation on the first row + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + __m128 r0 = vX; + // Repeat for the other 3 rows + vW = M1.r[1]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + __m128 r1 = vX; + vW = M1.r[2]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + __m128 r2 = vX; + vW = M1.r[3]; + vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0)); + vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1)); + vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2)); + vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3)); + vX = _mm_mul_ps(vX,M2.r[0]); + vX = _mm_macc_ps(vY,M2.r[1],vX); + vX = _mm_macc_ps(vZ,M2.r[2],vX); + vX = _mm_macc_ps(vW,M2.r[3],vX); + __m128 r3 = vX; + + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2)); + + XMMATRIX mResult; + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1)); + return mResult; +} + +} // namespace FMA4 + +} // namespace DirectX; diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h new file mode 100644 index 0000000..72a3dbb --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE3.h @@ -0,0 +1,111 @@ +//------------------------------------------------------------------------------------- +// DirectXMathSSE3.h -- SSE3 extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error SSE3 not supported on ARM platform +#endif + +#include + +#include + +namespace DirectX +{ + +namespace SSE3 +{ + +inline bool XMVerifySSE3Support() +{ + // Should return true on AMD Athlon 64, AMD Phenom, and Intel Pentium 4 or later processors + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = { -1 }; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // We only check for SSE3 instruction set. SSSE3 instructions are not used. + return ( (CPUInfo[2] & 0x1) != 0 ); +} + +inline XMVECTOR XM_CALLCONV XMVector2Dot +( + FXMVECTOR V1, + FXMVECTOR V2 +) +{ + XMVECTOR vTemp = _mm_mul_ps(V1,V2); + vTemp = _mm_hadd_ps(vTemp,vTemp); + return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(0,0,0,0)); +} + +inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V ) +{ + return SSE3::XMVector2Dot(V, V); +} + +inline XMVECTOR XM_CALLCONV XMVector3Dot +( + FXMVECTOR V1, + FXMVECTOR V2 +) +{ + XMVECTOR vTemp = _mm_mul_ps(V1,V2); + vTemp = _mm_and_ps( vTemp, g_XMMask3 ); + vTemp = _mm_hadd_ps(vTemp,vTemp); + return _mm_hadd_ps(vTemp,vTemp); +} + +inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V ) +{ + return SSE3::XMVector3Dot(V, V); +} + +inline XMVECTOR XM_CALLCONV XMVector4Dot +( + FXMVECTOR V1, + FXMVECTOR V2 +) +{ + XMVECTOR vTemp = _mm_mul_ps(V1,V2); + vTemp = _mm_hadd_ps( vTemp, vTemp ); + return _mm_hadd_ps( vTemp, vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V ) +{ + return SSE3::XMVector4Dot(V, V); +} + +inline XMVECTOR XM_CALLCONV XMVectorSwizzle_0022( FXMVECTOR V ) +{ + return _mm_moveldup_ps(V); +} + +inline XMVECTOR XM_CALLCONV XMVectorSwizzle_1133( FXMVECTOR V ) +{ + return _mm_movehdup_ps(V); +} + +} // namespace SSE3 + +} // namespace DirectX diff --git a/Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h new file mode 100644 index 0000000..16378cb --- /dev/null +++ b/Sdk/External/DirectXMath/Extensions/DirectXMathSSE4.h @@ -0,0 +1,417 @@ +//------------------------------------------------------------------------------------- +// DirectXMathSSE4.h -- SSE4.1 extensions for SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#error SSE4 not supported on ARM platform +#endif + +#include + +#include + +namespace DirectX +{ + +namespace SSE4 +{ + +inline bool XMVerifySSE4Support() +{ + // Should return true on AMD Bulldozer, Intel Core 2 ("Penryn"), and Intel Core i7 ("Nehalem") or later processors + + // See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx + int CPUInfo[4] = { -1 }; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + if ( CPUInfo[0] < 1 ) + return false; + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + + // We only check for SSE4.1 instruction set. SSE4.2 instructions are not used. + return ( (CPUInfo[2] & 0x80000) == 0x80000 ); +} + + +//------------------------------------------------------------------------------------- +// Vector +//------------------------------------------------------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast" +#endif + +inline void XM_CALLCONV XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V) +{ + assert( y != nullptr ); + *reinterpret_cast(y) = _mm_extract_ps( V, 1 ); +} + +inline void XM_CALLCONV XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V) +{ + assert( z != nullptr ); + *reinterpret_cast(z) = _mm_extract_ps( V, 2 ); +} + +inline void XM_CALLCONV XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V) +{ + assert( w != nullptr ); + *reinterpret_cast(w) = _mm_extract_ps( V, 3 ); +} + +inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V) +{ + __m128i V1 = _mm_castps_si128( V ); + return static_cast( _mm_extract_epi32( V1, 1 ) ); +} + +inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V) +{ + __m128i V1 = _mm_castps_si128( V ); + return static_cast( _mm_extract_epi32( V1, 2 ) ); +} + +inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V) +{ + __m128i V1 = _mm_castps_si128( V ); + return static_cast( _mm_extract_epi32( V1, 3 ) ); +} + +inline void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V) +{ + assert( y != nullptr ); + __m128i V1 = _mm_castps_si128( V ); + *y = static_cast( _mm_extract_epi32( V1, 1 ) ); +} + +inline void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V) +{ + assert( z != nullptr ); + __m128i V1 = _mm_castps_si128( V ); + *z = static_cast( _mm_extract_epi32( V1, 2 ) ); +} + +inline void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V) +{ + assert( w != nullptr ); + __m128i V1 = _mm_castps_si128( V ); + *w = static_cast( _mm_extract_epi32( V1, 3 ) ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y) +{ + XMVECTOR vResult = _mm_set_ss(y); + vResult = _mm_insert_ps( V, vResult, 0x10 ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z) +{ + XMVECTOR vResult = _mm_set_ss(z); + vResult = _mm_insert_ps( V, vResult, 0x20 ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w) +{ + XMVECTOR vResult = _mm_set_ss(w); + vResult = _mm_insert_ps( V, vResult, 0x30 ); + return vResult; +} + +inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y) +{ + __m128i vResult = _mm_castps_si128( V ); + vResult = _mm_insert_epi32( vResult, static_cast(y), 1 ); + return _mm_castsi128_ps( vResult ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z) +{ + __m128i vResult = _mm_castps_si128( V ); + vResult = _mm_insert_epi32( vResult, static_cast(z), 2 ); + return _mm_castsi128_ps( vResult ); +} + +inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w) +{ + __m128i vResult = _mm_castps_si128( V ); + vResult = _mm_insert_epi32( vResult, static_cast(w), 3 ); + return _mm_castsi128_ps( vResult ); +} + +inline XMVECTOR XM_CALLCONV XMVectorRound( FXMVECTOR V ) +{ + return _mm_round_ps( V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC ); +} + +inline XMVECTOR XM_CALLCONV XMVectorTruncate( FXMVECTOR V ) +{ + return _mm_round_ps( V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC ); +} + +inline XMVECTOR XM_CALLCONV XMVectorFloor( FXMVECTOR V ) +{ + return _mm_floor_ps( V ); +} + +inline XMVECTOR XM_CALLCONV XMVectorCeiling( FXMVECTOR V ) +{ + return _mm_ceil_ps( V ); +} + + +//------------------------------------------------------------------------------------- +// Vector2 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector2Dot( FXMVECTOR V1, FXMVECTOR V2 ) +{ + return _mm_dp_ps( V1, V2, 0x3f ); +} + +inline XMVECTOR XM_CALLCONV XMVector2LengthSq( FXMVECTOR V ) +{ + return SSE4::XMVector2Dot(V, V); +} + +inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f ); + return _mm_rsqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f ); + XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp ); + return _mm_div_ps( g_XMOne, vLengthSq ); +} + +inline XMVECTOR XM_CALLCONV XMVector2LengthEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f ); + return _mm_sqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector2Length( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f ); + return _mm_sqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x3f ); + XMVECTOR vResult = _mm_rsqrt_ps( vTemp ); + return _mm_mul_ps(vResult, V); +} + +inline XMVECTOR XM_CALLCONV XMVector2Normalize( FXMVECTOR V ) +{ + XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x3f ); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(V,vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult,vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq); + vResult = _mm_or_ps(vTemp1,vTemp2); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Vector3 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector3Dot( FXMVECTOR V1, FXMVECTOR V2 ) +{ + return _mm_dp_ps( V1, V2, 0x7f ); +} + +inline XMVECTOR XM_CALLCONV XMVector3LengthSq( FXMVECTOR V ) +{ + return SSE4::XMVector3Dot(V, V); +} + +inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f ); + return _mm_rsqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f ); + XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp ); + return _mm_div_ps( g_XMOne, vLengthSq ); +} + +inline XMVECTOR XM_CALLCONV XMVector3LengthEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f ); + return _mm_sqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector3Length( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f ); + return _mm_sqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0x7f ); + XMVECTOR vResult = _mm_rsqrt_ps( vTemp ); + return _mm_mul_ps(vResult, V); +} + +inline XMVECTOR XM_CALLCONV XMVector3Normalize( FXMVECTOR V ) +{ + XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0x7f ); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V,vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult,vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq); + vResult = _mm_or_ps(vTemp1,vTemp2); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Vector4 +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMVector4Dot( FXMVECTOR V1, FXMVECTOR V2 ) +{ + return _mm_dp_ps( V1, V2, 0xff ); +} + +inline XMVECTOR XM_CALLCONV XMVector4LengthSq( FXMVECTOR V ) +{ + return SSE4::XMVector4Dot(V, V); +} + +inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff ); + return _mm_rsqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff ); + XMVECTOR vLengthSq = _mm_sqrt_ps( vTemp ); + return _mm_div_ps( g_XMOne, vLengthSq ); +} + +inline XMVECTOR XM_CALLCONV XMVector4LengthEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff ); + return _mm_sqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector4Length( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff ); + return _mm_sqrt_ps( vTemp ); +} + +inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst( FXMVECTOR V ) +{ + XMVECTOR vTemp = _mm_dp_ps( V, V, 0xff ); + XMVECTOR vResult = _mm_rsqrt_ps( vTemp ); + return _mm_mul_ps(vResult, V); +} + +inline XMVECTOR XM_CALLCONV XMVector4Normalize( FXMVECTOR V ) +{ + XMVECTOR vLengthSq = _mm_dp_ps( V, V, 0xff ); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V,vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult,vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq); + vResult = _mm_or_ps(vTemp1,vTemp2); + return vResult; +} + + +//------------------------------------------------------------------------------------- +// Plane +//------------------------------------------------------------------------------------- + +inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst( FXMVECTOR P ) +{ + XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f ); + XMVECTOR vResult = _mm_rsqrt_ps( vTemp ); + return _mm_mul_ps(vResult, P); +} + +inline XMVECTOR XM_CALLCONV XMPlaneNormalize( FXMVECTOR P ) +{ + XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f ); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(P,vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult,vLengthSq); + return vResult; +} + +} // namespace SSE4 + +} // namespace DirectX diff --git a/Sdk/External/DirectXMath/HISTORY.md b/Sdk/External/DirectXMath/HISTORY.md new file mode 100644 index 0000000..1d6f5fe --- /dev/null +++ b/Sdk/External/DirectXMath/HISTORY.md @@ -0,0 +1,172 @@ +# DirectXMath + +https://github.com/Microsoft/DirectXMath + +Release available for download on [GitHub](https://github.com/microsoft/DirectXMath/releases) + +## Release History + +### August 2020 (3.16) +* Added ``XMVectorLog10`` / ``XMVectorExp10`` +* Added ``XMColorRGBToYUV_UHD`` / ``XMColorYUVToRGB_UHD`` for Rec. 2020 YUV +* Added optional ``rhcoords`` parameter for BoundingFrustum ``CreateFromMatrix`` +* Added use of Intel® Short Vector Matrix Library (SVML) supported by VS 2019 + * Opt-in with ``_XM_SVML_INTRINSICS_``; opt-out with ``_XM_DISABLE_INTEL_SVML_`` +* Fixed denorm handling for ``XMConvertFloatToHalf`` +* Fixed flush (too small for denorm) handling for ``XMStoreFloat3PK`` +* Fixed clamping bug in ``XMStoreByteN4`` +* Cleaned up ARM-NEON intrinsics type issues for improved portability on GNUC +* Fixed ``GXMVECTOR`` for x86 ``__vectorcall`` +* Code review + +### April 2020 (3.15) +* Added ``XMMatrixVectorTensorProduct`` for creating a matrix from two vectors +* Use of m256 registers and FMA3 with ``/arch:AVX2`` for stream and some matrix functions +* Optimized load/stores for SSE2 float2 & float3 functions +* Optimized some instruction choices for better AMD CPU support +* Improved conformance for clang/LLVM, GCC, and MinGW compilers +* Code review (``constexpr`` / ``noexcept`` usage) +* Retired VS 2015 support + +### August 2019 (3.14) +* Added float control around IsNan functions to resolve issue with VS 2019 with ``/fp:fast`` +* XMVerifyCPUSupport updated for clang/LLVM cpuid implementation on x86/x64 +* Added support for clang/LLVM built-in platform defines as well as the MSVC ones +* Cleaned up ARM-NEON intrinsics type issues for improved portability +* Removed unneeded malloc.h include in DirectXMath.h +* Whitespace cleanup + +### July 2018 (3.13) +* ``XMFLOAT3X4``, ``XMFLOAT3X4A``, and associated Load/Store functions +* Move/copy constructors and assignment operators for C++ types +* Minor fix for XMVectorClamp behavior with NaN +* Fixed compilation warnings with VS 2017 (15.7 update), Intel C++ 18.0 compiler, and clang 6 +* Retired VS 2013 support +* Minor code cleanup + +### February 2018 (3.12) +* ARM64 use of fused multiply-accumulate intriniscs +* Conformance fix for XMConvertFloatToHalf +* Minor code cleanup + +### June 2017 (3.11) +* AVX optimization of XMMatrixMultiply and XMMatrixMultiplyTranspose +* AVX2 optimization for XMVectorSplatX +* FMA3 optimization of XMVectorMultiplyAdd and XMVectorNegativeMultiplySubtract (implied by /arch:AVX2) +* Conformance fixes to support compilation with Clang 3.7 + +### January 2017 (3.10) +* Added XMVectorSum for horizontal adds +* ARMv8 intrinsics use for ARM64 platform (division, rounding, half-precision conversion) +* Added SSE3 codepaths using opt-in ``_XM_SSE3_INTRINSICS_`` +* XMVectorRound fix for no-intrinsics to match round to nearest (even) +* XMStoreFloat3SE fix when max channel isn't a perfect power of 2 +* constexpr conformance fix and workaround for compiler bug in VS 2015 RTM +* Remove support for VS 2012 compilers +* Remove ``__vector4i`` deprecated type + +### June 2016 (3.09) +* Includes support for additional optimizations when built with /arch:AVX or /arch:AVX2 +* Added use of constexpr for type constructors, XMConvertToRadians, and XMConvertToDegrees +* Marked ``__vector4i``, ``XMXDEC4``, ``XMDECN4``, ``XMDEC4``, and associated Load & Store functions as deprecated. + + These are vestiges of Xbox 360 support and will be removed in a future release +* Renamed parameter in XMMatrixPerspectiveFov* to reduce user confusion when relying on IntelliSense +* XMU565, XMUNIBBLE4 constructors take uint8_t instead of int8_t + +### May 2016 +* DirectXMath 3.08 released under the MIT license + +### November 2015 (3.08) +* Added use of ``_mm_sfence`` for Stream methods +* Fixed bug with non-uniform scaling transforms for BoundingOrientedBox +* Added asserts for Near/FarZ in XMMatrix* methods +* Added use of ``=default`` for PODs with VS 2013/2015 +* Additional SSE and ARM-NEON optimizations for PackedVector functions + +### April 2015 (3.07) +* Fix customer reported bugs in BoundingBox methods +* Fix customer reported bug in XMStoreFloat3SE +* Fix customer reported bug in XMVectorATan2, XMVectorATan2Est +* Fix customer reported bug in XMVectorRound + +### October 2013 (3.06) +* Fixed load/store of XMFLOAT3SE to properly match the ``DXGI_FORMAT_R9G9B9E5_SHAREDEXP`` +* Added ``XMLoadUDecN4_XR`` and ``XMStoreUDecN4_XR`` to match ``DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM`` +* Added ``XMColorRGBToSRGB`` and ``XMColorSRGBToRGB`` to convert linear RGB <-> sRGB + +### July 2013 (3.05) +* Use x86/x64 ``__vectorcall`` calling-convention when available (``XM_CALLCONV``, ``HXMVECTOR``, ``FXMMATRIX`` introduced) +* Fixed bug with XMVectorFloor and XMVectorCeiling when given whole odd numbers (i.e. 105.0) +* Improved XMVectorRound algorithm +* ARM-NEON optimizations for XMVectorExp2, XMVectorLog2, XMVectorExpE, and XMVectorLogE +* ARM-NEON code paths use multiply-by-scalar intrinsics when supported +* Additional optimizations for ARM-NEON Stream functions +* Fixed potential warning C4723 using ``operator/`` or ``operator/=`` + +### March 2013 (3.04) +* ``XMVectorExp2``, ``XMVectorLog2``, ``XMVectorExpE``, and ``XMVectorLogE`` functions added to provide base-e support in addition to the existing base-2 support +* ``XMVectorExp`` and ``XMVectorLog`` are now aliases for XMVectorExp2 and XMVectorLog2 +* Additional optimizations for Stream functions +* XMVector3Cross now ensures w component is zero on ARM +* XMConvertHalfToFloat and XMConvertFloatToHalf now use IEEE 754 standard float16 behavior for INF/QNAN +* Updated matrix version Transform for BoundingOrientedBox and BoundingFrustum to handle scaling + +### March 2012 (3.03) +* *breaking change* Removed union members from XMMATRIX type to make it a fully 'opaque' type +* Marked single-parameter C++ constructors for XMFLOAT2, XMFLOAT2A, XMFLOAT3, XMFLOAT3A, XMFLOAT4, and XMFLOAT4A explicit + +### February 2012 (3.02) +* ARM-NEON intrinsics (selected by default for the ARM platform) +* Reworked XMVectorPermute, change of ``XM_PERMUTE_`` defines, removal of XMVectorPermuteControl +* Addition of ``XM_SWIZZLE_`` defines +* Optimizations for transcendental functions +* Template forms for permute, swizzle, shift-left, rotate-left, rotation-right, and insert +* Removal of deprecated types and functions + + ``XM_CACHE_LINE_SIZE`` define, XMVectorExpEst, XMVectorLogEst, XMVectorPowEst, XMVectorSinHEs, XMVectorCosHEst, XMVectorTanHEst, XMVector2InBoundsR, XMVector3InBoundsR, XMVector4InBoundsR +* Removed ``XM_STRICT_VECTOR4``; XMVECTOR in NO-INTRINSICS always defined without .x, .y, .z, .w, .v, or .u +* Additional bounding types +* SAL fixes and improvements + +### September 2011 (3.00) +* Renamed and reorganized the headers +* Introduced C++ namespaces +* Removed the Xbox 360-specific GPU types + + HENDN3, XMHEND3, XMUHENDN3, XMUHEND3, XMDHENN3, XMDHEN3, XMUDHENN3, XMUDHEN3, XMXICON4, XMXICO4, XMICON4, XMICO4, XMUICON4, XMUICO4 + +### July 2012 (XNAMath 2.05) +* Template forms have been added for `XMVectorPermute`, `XMVectorSwizzle`, `XMVectorShiftLeft`, `XMVectorRotateLeft`, `XMVectorRotateRight`, and `XMVectorInsert` +* The `XM_STRICT_XMMATRIX` compilation define has been added for opaque `XMMATRIX`. +* Stream stride and count arguments have been changed to `size_t` +* The ``pDeterminant`` parameter of `XMMatrixInverse` is now optional +* Additional operator= overloads for `XMBYTEN4`, `XMBYTE4`, `XMUBYTEN4`, and `XMUBYTE4` types are now available + +### February 2011 (XNAMath 2.04) +* Addition of new data types and associated load-store functions: + + `XMBYTEN2, XMBYTE2, XMUBYTEN2, XMUBYTE2` + + `XMLoadByteN2, XMLoadByte2, XMLoadUByteN2, XMLoadUByte2` + + `XMStoreByteN2, XMStoreByte2, XMStoreUByteN2, XMStoreUByte2` + + `XMINT2, XMUINT2, XMINT3, XMUINT3, XMINT4, XMUINT4` + + `XMLoadSInt2, XMLoadUInt2, XMLoadSInt3, XMLoadUInt3, XMLoadSInt4, XMLoadUInt4` + + `XMStoreSInt2, XMStoreUInt2, XMStoreSInt3, XMStoreUInt3, XMStoreSInt4, XMStoreUInt4` +* Marked most single-parameter C++ constructors with `explicit` keyword +* Corrected range issues with SSE implementations of `XMVectorFloor` and `XMVectorCeiling` + + +### June 2010 (XNAMath 2.03) +* Addition of ``XMVectorDivide`` to optimize SSE2 vector division operations +* Unified handling of floating-point specials between the Windows SSE2 and no-intrinsics implementations +* Use of Visual Studio style SAL annotations +* Modifications to the C++ declarations for `XMFLOAT2A/3A/4A/4X3A/4X4A` to better support these types in C++ templates + +### February 2010 (XNAMath 2.02) +* Fixes to `XMStoreColor`, `XMQuaternionRotationMatrix`, `XMVectorATan2`, and `XMVectorATan2Est` + +### August 2009 (XNAMath 2.01) +* Adds ``XM_STRICT_VECTOR4``. This opt-in directive disallows the usage of XboxMath-like member accessors such as .x, .y, and .z. This makes it easier to write portable XNA Math code. +* Added conversion support for the following Windows graphics formats: + + 16-bit color formats (565, 555X, 5551) + + 4-bits per channel color formats (4444) + + Unique Direct3D 10/11 formats (``DXGI_FORMAT_R9G9B9E5_SHAREDEXP`` and ``DXGI_FORMAT_R11G11B10_FLOAT``) + +### March 2009 (XNAMath 2.00) +* Initial release (based on the Xbox 360 Xbox math library) diff --git a/Sdk/External/DirectXMath/Inc/DirectXCollision.h b/Sdk/External/DirectXMath/Inc/DirectXCollision.h new file mode 100644 index 0000000..94777bd --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXCollision.h @@ -0,0 +1,353 @@ +//------------------------------------------------------------------------------------- +// DirectXCollision.h -- C++ Collision Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#include "DirectXMath.h" + +namespace DirectX +{ + + enum ContainmentType + { + DISJOINT = 0, + INTERSECTS = 1, + CONTAINS = 2 + }; + + enum PlaneIntersectionType + { + FRONT = 0, + INTERSECTING = 1, + BACK = 2 + }; + + struct BoundingBox; + struct BoundingOrientedBox; + struct BoundingFrustum; + +#pragma warning(push) +#pragma warning(disable:4324 4820) + // C4324: alignment padding warnings + // C4820: Off by default noise + + //------------------------------------------------------------------------------------- + // Bounding sphere + //------------------------------------------------------------------------------------- + struct BoundingSphere + { + XMFLOAT3 Center; // Center of the sphere. + float Radius; // Radius of the sphere. + + // Creators + BoundingSphere() noexcept : Center(0, 0, 0), Radius(1.f) {} + + BoundingSphere(const BoundingSphere&) = default; + BoundingSphere& operator=(const BoundingSphere&) = default; + + BoundingSphere(BoundingSphere&&) = default; + BoundingSphere& operator=(BoundingSphere&&) = default; + + constexpr BoundingSphere(_In_ const XMFLOAT3& center, _In_ float radius) noexcept + : Center(center), Radius(radius) {} + + // Methods + void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ FXMMATRIX M) const noexcept; + void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept; + // Transform the sphere + + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept; + ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; + + bool Intersects(_In_ const BoundingSphere& sh) const noexcept; + bool Intersects(_In_ const BoundingBox& box) const noexcept; + bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; + bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + // Triangle-sphere test + + PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept; + // Plane-sphere test + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept; + // Ray-sphere test + + ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, + _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; + // Test sphere against six planes (see BoundingFrustum::GetPlanes) + + // Static methods + static void CreateMerged(_Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2) noexcept; + + static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingBox& box) noexcept; + static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box) noexcept; + + static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count, + _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept; + + static void CreateFromFrustum(_Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr) noexcept; + }; + + //------------------------------------------------------------------------------------- + // Axis-aligned bounding box + //------------------------------------------------------------------------------------- + struct BoundingBox + { + static const size_t CORNER_COUNT = 8; + + XMFLOAT3 Center; // Center of the box. + XMFLOAT3 Extents; // Distance from the center to each side. + + // Creators + BoundingBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {} + + BoundingBox(const BoundingBox&) = default; + BoundingBox& operator=(const BoundingBox&) = default; + + BoundingBox(BoundingBox&&) = default; + BoundingBox& operator=(BoundingBox&&) = default; + + constexpr BoundingBox(_In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents) noexcept + : Center(center), Extents(extents) {} + + // Methods + void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ FXMMATRIX M) const noexcept; + void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept; + + void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept; + // Gets the 8 corners of the box + + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept; + ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; + + bool Intersects(_In_ const BoundingSphere& sh) const noexcept; + bool Intersects(_In_ const BoundingBox& box) const noexcept; + bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; + bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + // Triangle-Box test + + PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept; + // Plane-box test + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept; + // Ray-Box test + + ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, + _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; + // Test box against six planes (see BoundingFrustum::GetPlanes) + + // Static methods + static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2) noexcept; + + static void CreateFromSphere(_Out_ BoundingBox& Out, _In_ const BoundingSphere& sh) noexcept; + + static void XM_CALLCONV CreateFromPoints(_Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2) noexcept; + static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count, + _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept; + }; + + //------------------------------------------------------------------------------------- + // Oriented bounding box + //------------------------------------------------------------------------------------- + struct BoundingOrientedBox + { + static const size_t CORNER_COUNT = 8; + + XMFLOAT3 Center; // Center of the box. + XMFLOAT3 Extents; // Distance from the center to each side. + XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world). + + // Creators + BoundingOrientedBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {} + + BoundingOrientedBox(const BoundingOrientedBox&) = default; + BoundingOrientedBox& operator=(const BoundingOrientedBox&) = default; + + BoundingOrientedBox(BoundingOrientedBox&&) = default; + BoundingOrientedBox& operator=(BoundingOrientedBox&&) = default; + + constexpr BoundingOrientedBox(_In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation) noexcept + : Center(_Center), Extents(_Extents), Orientation(_Orientation) {} + + // Methods + void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ FXMMATRIX M) const noexcept; + void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept; + + void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept; + // Gets the 8 corners of the box + + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept; + ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; + + bool Intersects(_In_ const BoundingSphere& sh) const noexcept; + bool Intersects(_In_ const BoundingBox& box) const noexcept; + bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; + bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + // Triangle-OrientedBox test + + PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept; + // Plane-OrientedBox test + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept; + // Ray-OrientedBox test + + ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, + _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; + // Test OrientedBox against six planes (see BoundingFrustum::GetPlanes) + + // Static methods + static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box) noexcept; + + static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, _In_ size_t Count, + _In_reads_bytes_(sizeof(XMFLOAT3) + Stride * (Count - 1)) const XMFLOAT3* pPoints, _In_ size_t Stride) noexcept; + }; + + //------------------------------------------------------------------------------------- + // Bounding frustum + //------------------------------------------------------------------------------------- + struct BoundingFrustum + { + static const size_t CORNER_COUNT = 8; + + XMFLOAT3 Origin; // Origin of the frustum (and projection). + XMFLOAT4 Orientation; // Quaternion representing rotation. + + float RightSlope; // Positive X (X/Z) + float LeftSlope; // Negative X + float TopSlope; // Positive Y (Y/Z) + float BottomSlope; // Negative Y + float Near, Far; // Z of the near plane and far plane. + + // Creators + BoundingFrustum() noexcept : + Origin(0, 0, 0), Orientation(0, 0, 0, 1.f), RightSlope(1.f), LeftSlope(-1.f), + TopSlope(1.f), BottomSlope(-1.f), Near(0), Far(1.f) {} + + BoundingFrustum(const BoundingFrustum&) = default; + BoundingFrustum& operator=(const BoundingFrustum&) = default; + + BoundingFrustum(BoundingFrustum&&) = default; + BoundingFrustum& operator=(BoundingFrustum&&) = default; + + constexpr BoundingFrustum(_In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation, + _In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope, + _In_ float _Near, _In_ float _Far) noexcept + : Origin(_Origin), Orientation(_Orientation), + RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope), + Near(_Near), Far(_Far) {} + BoundingFrustum(_In_ CXMMATRIX Projection, bool rhcoords = false) noexcept; + + // Methods + void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX M) const noexcept; + void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) const noexcept; + + void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept; + // Gets the 8 corners of the frustum + + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; + ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + ContainmentType Contains(_In_ const BoundingSphere& sp) const noexcept; + ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingOrientedBox& box) const noexcept; + ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; + // Frustum-Frustum test + + bool Intersects(_In_ const BoundingSphere& sh) const noexcept; + bool Intersects(_In_ const BoundingBox& box) const noexcept; + bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; + bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) const noexcept; + // Triangle-Frustum test + + PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR Plane) const noexcept; + // Plane-Frustum test + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR rayOrigin, _In_ FXMVECTOR Direction, _Out_ float& Dist) const noexcept; + // Ray-Frustum test + + ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2, + _In_ GXMVECTOR Plane3, _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; + // Test frustum against six planes (see BoundingFrustum::GetPlanes) + + void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane, + _Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane) const noexcept; + // Create 6 Planes representation of Frustum + + // Static methods + static void XM_CALLCONV CreateFromMatrix(_Out_ BoundingFrustum& Out, _In_ FXMMATRIX Projection, bool rhcoords = false) noexcept; + }; + + //----------------------------------------------------------------------------- + // Triangle intersection testing routines. + //----------------------------------------------------------------------------- + namespace TriangleTests + { + bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ HXMVECTOR V2, _Out_ float& Dist) noexcept; + // Ray-Triangle + + bool XM_CALLCONV Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ HXMVECTOR B1, _In_ HXMVECTOR B2) noexcept; + // Triangle-Triangle + + PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane) noexcept; + // Plane-Triangle + + ContainmentType XM_CALLCONV ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, + _In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2, + _In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5) noexcept; + // Test a triangle against six planes at once (see BoundingFrustum::GetPlanes) + } + +#pragma warning(pop) + + /**************************************************************************** + * + * Implementation + * + ****************************************************************************/ + +#pragma warning(push) +#pragma warning(disable : 4068 4365 4616 6001) + // C4068/4616: ignore unknown pragmas + // C4365: Off by default noise + // C6001: False positives + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") +#pragma prefast(disable : 26495, "Union initialization confuses /analyze") +#endif + +#include "DirectXCollision.inl" + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +#pragma warning(pop) + +} // namespace DirectX + diff --git a/Sdk/External/DirectXMath/Inc/DirectXCollision.inl b/Sdk/External/DirectXMath/Inc/DirectXCollision.inl new file mode 100644 index 0000000..c65ef54 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXCollision.inl @@ -0,0 +1,4816 @@ +//------------------------------------------------------------------------------------- +// DirectXCollision.inl -- C++ Collision Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +XMGLOBALCONST XMVECTORF32 g_BoxOffset[8] = +{ + { { { -1.0f, -1.0f, 1.0f, 0.0f } } }, + { { { 1.0f, -1.0f, 1.0f, 0.0f } } }, + { { { 1.0f, 1.0f, 1.0f, 0.0f } } }, + { { { -1.0f, 1.0f, 1.0f, 0.0f } } }, + { { { -1.0f, -1.0f, -1.0f, 0.0f } } }, + { { { 1.0f, -1.0f, -1.0f, 0.0f } } }, + { { { 1.0f, 1.0f, -1.0f, 0.0f } } }, + { { { -1.0f, 1.0f, -1.0f, 0.0f } } }, +}; + +XMGLOBALCONST XMVECTORF32 g_RayEpsilon = { { { 1e-20f, 1e-20f, 1e-20f, 1e-20f } } }; +XMGLOBALCONST XMVECTORF32 g_RayNegEpsilon = { { { -1e-20f, -1e-20f, -1e-20f, -1e-20f } } }; +XMGLOBALCONST XMVECTORF32 g_FltMin = { { { -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX } } }; +XMGLOBALCONST XMVECTORF32 g_FltMax = { { { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX } } }; + +namespace Internal +{ + + //----------------------------------------------------------------------------- + // Return true if any of the elements of a 3 vector are equal to 0xffffffff. + // Slightly more efficient than using XMVector3EqualInt. + //----------------------------------------------------------------------------- + inline bool XMVector3AnyTrue(_In_ FXMVECTOR V) noexcept + { + // Duplicate the fourth element from the first element. + XMVECTOR C = XMVectorSwizzle(V); + + return XMComparisonAnyTrue(XMVector4EqualIntR(C, XMVectorTrueInt())); + } + + + //----------------------------------------------------------------------------- + // Return true if all of the elements of a 3 vector are equal to 0xffffffff. + // Slightly more efficient than using XMVector3EqualInt. + //----------------------------------------------------------------------------- + inline bool XMVector3AllTrue(_In_ FXMVECTOR V) noexcept + { + // Duplicate the fourth element from the first element. + XMVECTOR C = XMVectorSwizzle(V); + + return XMComparisonAllTrue(XMVector4EqualIntR(C, XMVectorTrueInt())); + } + +#if defined(_PREFAST_) || !defined(NDEBUG) + + XMGLOBALCONST XMVECTORF32 g_UnitVectorEpsilon = { { { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f } } }; + XMGLOBALCONST XMVECTORF32 g_UnitQuaternionEpsilon = { { { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f } } }; + XMGLOBALCONST XMVECTORF32 g_UnitPlaneEpsilon = { { { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f } } }; + + //----------------------------------------------------------------------------- + // Return true if the vector is a unit vector (length == 1). + //----------------------------------------------------------------------------- + inline bool XMVector3IsUnit(_In_ FXMVECTOR V) noexcept + { + XMVECTOR Difference = XMVectorSubtract(XMVector3Length(V), XMVectorSplatOne()); + return XMVector4Less(XMVectorAbs(Difference), g_UnitVectorEpsilon); + } + + //----------------------------------------------------------------------------- + // Return true if the quaterion is a unit quaternion. + //----------------------------------------------------------------------------- + inline bool XMQuaternionIsUnit(_In_ FXMVECTOR Q) noexcept + { + XMVECTOR Difference = XMVectorSubtract(XMVector4Length(Q), XMVectorSplatOne()); + return XMVector4Less(XMVectorAbs(Difference), g_UnitQuaternionEpsilon); + } + + //----------------------------------------------------------------------------- + // Return true if the plane is a unit plane. + //----------------------------------------------------------------------------- + inline bool XMPlaneIsUnit(_In_ FXMVECTOR Plane) noexcept + { + XMVECTOR Difference = XMVectorSubtract(XMVector3Length(Plane), XMVectorSplatOne()); + return XMVector4Less(XMVectorAbs(Difference), g_UnitPlaneEpsilon); + } + +#endif // _PREFAST_ || !NDEBUG + + //----------------------------------------------------------------------------- + inline XMVECTOR XMPlaneTransform(_In_ FXMVECTOR Plane, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation) noexcept + { + XMVECTOR vNormal = XMVector3Rotate(Plane, Rotation); + XMVECTOR vD = XMVectorSubtract(XMVectorSplatW(Plane), XMVector3Dot(vNormal, Translation)); + + return XMVectorInsert<0, 0, 0, 0, 1>(vNormal, vD); + } + + //----------------------------------------------------------------------------- + // Return the point on the line segement (S1, S2) nearest the point P. + //----------------------------------------------------------------------------- + inline XMVECTOR PointOnLineSegmentNearestPoint(_In_ FXMVECTOR S1, _In_ FXMVECTOR S2, _In_ FXMVECTOR P) noexcept + { + XMVECTOR Dir = XMVectorSubtract(S2, S1); + XMVECTOR Projection = XMVectorSubtract(XMVector3Dot(P, Dir), XMVector3Dot(S1, Dir)); + XMVECTOR LengthSq = XMVector3Dot(Dir, Dir); + + XMVECTOR t = XMVectorMultiply(Projection, XMVectorReciprocal(LengthSq)); + XMVECTOR Point = XMVectorMultiplyAdd(t, Dir, S1); + + // t < 0 + XMVECTOR SelectS1 = XMVectorLess(Projection, XMVectorZero()); + Point = XMVectorSelect(Point, S1, SelectS1); + + // t > 1 + XMVECTOR SelectS2 = XMVectorGreater(Projection, LengthSq); + Point = XMVectorSelect(Point, S2, SelectS2); + + return Point; + } + + //----------------------------------------------------------------------------- + // Test if the point (P) on the plane of the triangle is inside the triangle + // (V0, V1, V2). + //----------------------------------------------------------------------------- + inline XMVECTOR XM_CALLCONV PointOnPlaneInsideTriangle(_In_ FXMVECTOR P, _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ GXMVECTOR V2) noexcept + { + // Compute the triangle normal. + XMVECTOR N = XMVector3Cross(XMVectorSubtract(V2, V0), XMVectorSubtract(V1, V0)); + + // Compute the cross products of the vector from the base of each edge to + // the point with each edge vector. + XMVECTOR C0 = XMVector3Cross(XMVectorSubtract(P, V0), XMVectorSubtract(V1, V0)); + XMVECTOR C1 = XMVector3Cross(XMVectorSubtract(P, V1), XMVectorSubtract(V2, V1)); + XMVECTOR C2 = XMVector3Cross(XMVectorSubtract(P, V2), XMVectorSubtract(V0, V2)); + + // If the cross product points in the same direction as the normal the the + // point is inside the edge (it is zero if is on the edge). + XMVECTOR Zero = XMVectorZero(); + XMVECTOR Inside0 = XMVectorGreaterOrEqual(XMVector3Dot(C0, N), Zero); + XMVECTOR Inside1 = XMVectorGreaterOrEqual(XMVector3Dot(C1, N), Zero); + XMVECTOR Inside2 = XMVectorGreaterOrEqual(XMVector3Dot(C2, N), Zero); + + // If the point inside all of the edges it is inside. + return XMVectorAndInt(XMVectorAndInt(Inside0, Inside1), Inside2); + } + + //----------------------------------------------------------------------------- + inline bool SolveCubic(_In_ float e, _In_ float f, _In_ float g, _Out_ float* t, _Out_ float* u, _Out_ float* v) noexcept + { + float p, q, h, rc, d, theta, costh3, sinth3; + + p = f - e * e / 3.0f; + q = g - e * f / 3.0f + e * e * e * 2.0f / 27.0f; + h = q * q / 4.0f + p * p * p / 27.0f; + + if (h > 0) + { + *t = *u = *v = 0.f; + return false; // only one real root + } + + if ((h == 0) && (q == 0)) // all the same root + { + *t = -e / 3; + *u = -e / 3; + *v = -e / 3; + + return true; + } + + d = sqrtf(q * q / 4.0f - h); + if (d < 0) + rc = -powf(-d, 1.0f / 3.0f); + else + rc = powf(d, 1.0f / 3.0f); + + theta = XMScalarACos(-q / (2.0f * d)); + costh3 = XMScalarCos(theta / 3.0f); + sinth3 = sqrtf(3.0f) * XMScalarSin(theta / 3.0f); + *t = 2.0f * rc * costh3 - e / 3.0f; + *u = -rc * (costh3 + sinth3) - e / 3.0f; + *v = -rc * (costh3 - sinth3) - e / 3.0f; + + return true; + } + + //----------------------------------------------------------------------------- + inline XMVECTOR CalculateEigenVector(_In_ float m11, _In_ float m12, _In_ float m13, + _In_ float m22, _In_ float m23, _In_ float m33, _In_ float e) noexcept + { + float fTmp[3]; + fTmp[0] = m12 * m23 - m13 * (m22 - e); + fTmp[1] = m13 * m12 - m23 * (m11 - e); + fTmp[2] = (m11 - e) * (m22 - e) - m12 * m12; + + XMVECTOR vTmp = XMLoadFloat3(reinterpret_cast(fTmp)); + + if (XMVector3Equal(vTmp, XMVectorZero())) // planar or linear + { + float f1, f2, f3; + + // we only have one equation - find a valid one + if ((m11 - e != 0) || (m12 != 0) || (m13 != 0)) + { + f1 = m11 - e; f2 = m12; f3 = m13; + } + else if ((m12 != 0) || (m22 - e != 0) || (m23 != 0)) + { + f1 = m12; f2 = m22 - e; f3 = m23; + } + else if ((m13 != 0) || (m23 != 0) || (m33 - e != 0)) + { + f1 = m13; f2 = m23; f3 = m33 - e; + } + else + { + // error, we'll just make something up - we have NO context + f1 = 1.0f; f2 = 0.0f; f3 = 0.0f; + } + + if (f1 == 0) + vTmp = XMVectorSetX(vTmp, 0.0f); + else + vTmp = XMVectorSetX(vTmp, 1.0f); + + if (f2 == 0) + vTmp = XMVectorSetY(vTmp, 0.0f); + else + vTmp = XMVectorSetY(vTmp, 1.0f); + + if (f3 == 0) + { + vTmp = XMVectorSetZ(vTmp, 0.0f); + // recalculate y to make equation work + if (m12 != 0) + vTmp = XMVectorSetY(vTmp, -f1 / f2); + } + else + { + vTmp = XMVectorSetZ(vTmp, (f2 - f1) / f3); + } + } + + if (XMVectorGetX(XMVector3LengthSq(vTmp)) > 1e-5f) + { + return XMVector3Normalize(vTmp); + } + else + { + // Multiply by a value large enough to make the vector non-zero. + vTmp = XMVectorScale(vTmp, 1e5f); + return XMVector3Normalize(vTmp); + } + } + + //----------------------------------------------------------------------------- + inline bool CalculateEigenVectors(_In_ float m11, _In_ float m12, _In_ float m13, + _In_ float m22, _In_ float m23, _In_ float m33, + _In_ float e1, _In_ float e2, _In_ float e3, + _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, _Out_ XMVECTOR* pV3) noexcept + { + *pV1 = DirectX::Internal::CalculateEigenVector(m11, m12, m13, m22, m23, m33, e1); + *pV2 = DirectX::Internal::CalculateEigenVector(m11, m12, m13, m22, m23, m33, e2); + *pV3 = DirectX::Internal::CalculateEigenVector(m11, m12, m13, m22, m23, m33, e3); + + bool v1z = false; + bool v2z = false; + bool v3z = false; + + XMVECTOR Zero = XMVectorZero(); + + if (XMVector3Equal(*pV1, Zero)) + v1z = true; + + if (XMVector3Equal(*pV2, Zero)) + v2z = true; + + if (XMVector3Equal(*pV3, Zero)) + v3z = true; + + bool e12 = (fabsf(XMVectorGetX(XMVector3Dot(*pV1, *pV2))) > 0.1f); // check for non-orthogonal vectors + bool e13 = (fabsf(XMVectorGetX(XMVector3Dot(*pV1, *pV3))) > 0.1f); + bool e23 = (fabsf(XMVectorGetX(XMVector3Dot(*pV2, *pV3))) > 0.1f); + + if ((v1z && v2z && v3z) || (e12 && e13 && e23) || + (e12 && v3z) || (e13 && v2z) || (e23 && v1z)) // all eigenvectors are 0- any basis set + { + *pV1 = g_XMIdentityR0.v; + *pV2 = g_XMIdentityR1.v; + *pV3 = g_XMIdentityR2.v; + return true; + } + + if (v1z && v2z) + { + XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV3); + if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f) + { + vTmp = XMVector3Cross(g_XMIdentityR0, *pV3); + } + *pV1 = XMVector3Normalize(vTmp); + *pV2 = XMVector3Cross(*pV3, *pV1); + return true; + } + + if (v3z && v1z) + { + XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV2); + if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f) + { + vTmp = XMVector3Cross(g_XMIdentityR0, *pV2); + } + *pV3 = XMVector3Normalize(vTmp); + *pV1 = XMVector3Cross(*pV2, *pV3); + return true; + } + + if (v2z && v3z) + { + XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV1); + if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f) + { + vTmp = XMVector3Cross(g_XMIdentityR0, *pV1); + } + *pV2 = XMVector3Normalize(vTmp); + *pV3 = XMVector3Cross(*pV1, *pV2); + return true; + } + + if ((v1z) || e12) + { + *pV1 = XMVector3Cross(*pV2, *pV3); + return true; + } + + if ((v2z) || e23) + { + *pV2 = XMVector3Cross(*pV3, *pV1); + return true; + } + + if ((v3z) || e13) + { + *pV3 = XMVector3Cross(*pV1, *pV2); + return true; + } + + return true; + } + + //----------------------------------------------------------------------------- + inline bool CalculateEigenVectorsFromCovarianceMatrix(_In_ float Cxx, _In_ float Cyy, _In_ float Czz, + _In_ float Cxy, _In_ float Cxz, _In_ float Cyz, + _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, _Out_ XMVECTOR* pV3) noexcept + { + // Calculate the eigenvalues by solving a cubic equation. + float e = -(Cxx + Cyy + Czz); + float f = Cxx * Cyy + Cyy * Czz + Czz * Cxx - Cxy * Cxy - Cxz * Cxz - Cyz * Cyz; + float g = Cxy * Cxy * Czz + Cxz * Cxz * Cyy + Cyz * Cyz * Cxx - Cxy * Cyz * Cxz * 2.0f - Cxx * Cyy * Czz; + + float ev1, ev2, ev3; + if (!DirectX::Internal::SolveCubic(e, f, g, &ev1, &ev2, &ev3)) + { + // set them to arbitrary orthonormal basis set + *pV1 = g_XMIdentityR0.v; + *pV2 = g_XMIdentityR1.v; + *pV3 = g_XMIdentityR2.v; + return false; + } + + return DirectX::Internal::CalculateEigenVectors(Cxx, Cxy, Cxz, Cyy, Cyz, Czz, ev1, ev2, ev3, pV1, pV2, pV3); + } + + //----------------------------------------------------------------------------- + inline void XM_CALLCONV FastIntersectTrianglePlane( + FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, + GXMVECTOR Plane, + XMVECTOR& Outside, XMVECTOR& Inside) noexcept + { + // Plane0 + XMVECTOR Dist0 = XMVector4Dot(V0, Plane); + XMVECTOR Dist1 = XMVector4Dot(V1, Plane); + XMVECTOR Dist2 = XMVector4Dot(V2, Plane); + + XMVECTOR MinDist = XMVectorMin(Dist0, Dist1); + MinDist = XMVectorMin(MinDist, Dist2); + + XMVECTOR MaxDist = XMVectorMax(Dist0, Dist1); + MaxDist = XMVectorMax(MaxDist, Dist2); + + XMVECTOR Zero = XMVectorZero(); + + // Outside the plane? + Outside = XMVectorGreater(MinDist, Zero); + + // Fully inside the plane? + Inside = XMVectorLess(MaxDist, Zero); + } + + //----------------------------------------------------------------------------- + inline void FastIntersectSpherePlane(_In_ FXMVECTOR Center, _In_ FXMVECTOR Radius, _In_ FXMVECTOR Plane, + _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept + { + XMVECTOR Dist = XMVector4Dot(Center, Plane); + + // Outside the plane? + Outside = XMVectorGreater(Dist, Radius); + + // Fully inside the plane? + Inside = XMVectorLess(Dist, XMVectorNegate(Radius)); + } + + //----------------------------------------------------------------------------- + inline void FastIntersectAxisAlignedBoxPlane(_In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Plane, + _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept + { + // Compute the distance to the center of the box. + XMVECTOR Dist = XMVector4Dot(Center, Plane); + + // Project the axes of the box onto the normal of the plane. Half the + // length of the projection (sometime called the "radius") is equal to + // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w)) + // where h(i) are extents of the box, n is the plane normal, and b(i) are the + // axes of the box. In this case b(i) = [(1,0,0), (0,1,0), (0,0,1)]. + XMVECTOR Radius = XMVector3Dot(Extents, XMVectorAbs(Plane)); + + // Outside the plane? + Outside = XMVectorGreater(Dist, Radius); + + // Fully inside the plane? + Inside = XMVectorLess(Dist, XMVectorNegate(Radius)); + } + + //----------------------------------------------------------------------------- + inline void XM_CALLCONV FastIntersectOrientedBoxPlane( + _In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Axis0, + _In_ GXMVECTOR Axis1, + _In_ HXMVECTOR Axis2, _In_ HXMVECTOR Plane, + _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept + { + // Compute the distance to the center of the box. + XMVECTOR Dist = XMVector4Dot(Center, Plane); + + // Project the axes of the box onto the normal of the plane. Half the + // length of the projection (sometime called the "radius") is equal to + // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w)) + // where h(i) are extents of the box, n is the plane normal, and b(i) are the + // axes of the box. + XMVECTOR Radius = XMVector3Dot(Plane, Axis0); + Radius = XMVectorInsert<0, 0, 1, 0, 0>(Radius, XMVector3Dot(Plane, Axis1)); + Radius = XMVectorInsert<0, 0, 0, 1, 0>(Radius, XMVector3Dot(Plane, Axis2)); + Radius = XMVector3Dot(Extents, XMVectorAbs(Radius)); + + // Outside the plane? + Outside = XMVectorGreater(Dist, Radius); + + // Fully inside the plane? + Inside = XMVectorLess(Dist, XMVectorNegate(Radius)); + } + + //----------------------------------------------------------------------------- + inline void XM_CALLCONV FastIntersectFrustumPlane( + _In_ FXMVECTOR Point0, _In_ FXMVECTOR Point1, _In_ FXMVECTOR Point2, + _In_ GXMVECTOR Point3, + _In_ HXMVECTOR Point4, _In_ HXMVECTOR Point5, + _In_ CXMVECTOR Point6, _In_ CXMVECTOR Point7, _In_ CXMVECTOR Plane, + _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept + { + // Find the min/max projection of the frustum onto the plane normal. + XMVECTOR Min, Max, Dist; + + Min = Max = XMVector3Dot(Plane, Point0); + + Dist = XMVector3Dot(Plane, Point1); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + Dist = XMVector3Dot(Plane, Point2); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + Dist = XMVector3Dot(Plane, Point3); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + Dist = XMVector3Dot(Plane, Point4); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + Dist = XMVector3Dot(Plane, Point5); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + Dist = XMVector3Dot(Plane, Point6); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + Dist = XMVector3Dot(Plane, Point7); + Min = XMVectorMin(Min, Dist); + Max = XMVectorMax(Max, Dist); + + XMVECTOR PlaneDist = XMVectorNegate(XMVectorSplatW(Plane)); + + // Outside the plane? + Outside = XMVectorGreater(Min, PlaneDist); + + // Fully inside the plane? + Inside = XMVectorLess(Max, PlaneDist); + } + +} // namespace Internal + + +/**************************************************************************** + * + * BoundingSphere + * + ****************************************************************************/ + + //----------------------------------------------------------------------------- + // Transform a sphere by an angle preserving transform. + //----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingSphere::Transform(BoundingSphere& Out, FXMMATRIX M) const noexcept +{ + // Load the center of the sphere. + XMVECTOR vCenter = XMLoadFloat3(&Center); + + // Transform the center of the sphere. + XMVECTOR C = XMVector3Transform(vCenter, M); + + XMVECTOR dX = XMVector3Dot(M.r[0], M.r[0]); + XMVECTOR dY = XMVector3Dot(M.r[1], M.r[1]); + XMVECTOR dZ = XMVector3Dot(M.r[2], M.r[2]); + + XMVECTOR d = XMVectorMax(dX, XMVectorMax(dY, dZ)); + + // Store the center sphere. + XMStoreFloat3(&Out.Center, C); + + // Scale the radius of the pshere. + float Scale = sqrtf(XMVectorGetX(d)); + Out.Radius = Radius * Scale; +} + +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingSphere::Transform(BoundingSphere& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept +{ + // Load the center of the sphere. + XMVECTOR vCenter = XMLoadFloat3(&Center); + + // Transform the center of the sphere. + vCenter = XMVectorAdd(XMVector3Rotate(XMVectorScale(vCenter, Scale), Rotation), Translation); + + // Store the center sphere. + XMStoreFloat3(&Out.Center, vCenter); + + // Scale the radius of the pshere. + Out.Radius = Radius * Scale; +} + + +//----------------------------------------------------------------------------- +// Point in sphere test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingSphere::Contains(FXMVECTOR Point) const noexcept +{ + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + + XMVECTOR DistanceSquared = XMVector3LengthSq(XMVectorSubtract(Point, vCenter)); + XMVECTOR RadiusSquared = XMVectorMultiply(vRadius, vRadius); + + return XMVector3LessOrEqual(DistanceSquared, RadiusSquared) ? CONTAINS : DISJOINT; +} + + +//----------------------------------------------------------------------------- +// Triangle in sphere test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingSphere::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + if (!Intersects(V0, V1, V2)) + return DISJOINT; + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + XMVECTOR RadiusSquared = XMVectorMultiply(vRadius, vRadius); + + XMVECTOR DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V0, vCenter)); + XMVECTOR Inside = XMVectorLessOrEqual(DistanceSquared, RadiusSquared); + + DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V1, vCenter)); + Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared)); + + DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V2, vCenter)); + Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared)); + + return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Sphere in sphere test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingSphere::Contains(const BoundingSphere& sh) const noexcept +{ + XMVECTOR Center1 = XMLoadFloat3(&Center); + float r1 = Radius; + + XMVECTOR Center2 = XMLoadFloat3(&sh.Center); + float r2 = sh.Radius; + + XMVECTOR V = XMVectorSubtract(Center2, Center1); + + XMVECTOR Dist = XMVector3Length(V); + + float d = XMVectorGetX(Dist); + + return (r1 + r2 >= d) ? ((r1 - r2 >= d) ? CONTAINS : INTERSECTS) : DISJOINT; +} + + +//----------------------------------------------------------------------------- +// Axis-aligned box in sphere test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingSphere::Contains(const BoundingBox& box) const noexcept +{ + if (!box.Intersects(*this)) + return DISJOINT; + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); + + XMVECTOR boxCenter = XMLoadFloat3(&box.Center); + XMVECTOR boxExtents = XMLoadFloat3(&box.Extents); + + XMVECTOR InsideAll = XMVectorTrueInt(); + + XMVECTOR offset = XMVectorSubtract(boxCenter, vCenter); + + for (size_t i = 0; i < BoundingBox::CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorMultiplyAdd(boxExtents, g_BoxOffset[i], offset); + XMVECTOR d = XMVector3LengthSq(C); + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq)); + } + + return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Oriented box in sphere test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingSphere::Contains(const BoundingOrientedBox& box) const noexcept +{ + if (!box.Intersects(*this)) + return DISJOINT; + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); + + XMVECTOR boxCenter = XMLoadFloat3(&box.Center); + XMVECTOR boxExtents = XMLoadFloat3(&box.Extents); + XMVECTOR boxOrientation = XMLoadFloat4(&box.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(boxOrientation)); + + XMVECTOR InsideAll = XMVectorTrueInt(); + + for (size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(boxExtents, g_BoxOffset[i]), boxOrientation), boxCenter); + XMVECTOR d = XMVector3LengthSq(XMVectorSubtract(vCenter, C)); + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq)); + } + + return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; + +} + + +//----------------------------------------------------------------------------- +// Frustum in sphere test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingSphere::Contains(const BoundingFrustum& fr) const noexcept +{ + if (!fr.Intersects(*this)) + return DISJOINT; + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); + + XMVECTOR vOrigin = XMLoadFloat3(&fr.Origin); + XMVECTOR vOrientation = XMLoadFloat4(&fr.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Build the corners of the frustum. + XMVECTOR vRightTop = XMVectorSet(fr.RightSlope, fr.TopSlope, 1.0f, 0.0f); + XMVECTOR vRightBottom = XMVectorSet(fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f); + XMVECTOR vLeftTop = XMVectorSet(fr.LeftSlope, fr.TopSlope, 1.0f, 0.0f); + XMVECTOR vLeftBottom = XMVectorSet(fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&fr.Near); + XMVECTOR vFar = XMVectorReplicatePtr(&fr.Far); + + XMVECTOR Corners[BoundingFrustum::CORNER_COUNT]; + Corners[0] = XMVectorMultiply(vRightTop, vNear); + Corners[1] = XMVectorMultiply(vRightBottom, vNear); + Corners[2] = XMVectorMultiply(vLeftTop, vNear); + Corners[3] = XMVectorMultiply(vLeftBottom, vNear); + Corners[4] = XMVectorMultiply(vRightTop, vFar); + Corners[5] = XMVectorMultiply(vRightBottom, vFar); + Corners[6] = XMVectorMultiply(vLeftTop, vFar); + Corners[7] = XMVectorMultiply(vLeftBottom, vFar); + + XMVECTOR InsideAll = XMVectorTrueInt(); + for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorAdd(XMVector3Rotate(Corners[i], vOrientation), vOrigin); + XMVECTOR d = XMVector3LengthSq(XMVectorSubtract(vCenter, C)); + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq)); + } + + return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Sphere vs. sphere test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingSphere::Intersects(const BoundingSphere& sh) const noexcept +{ + // Load A. + XMVECTOR vCenterA = XMLoadFloat3(&Center); + XMVECTOR vRadiusA = XMVectorReplicatePtr(&Radius); + + // Load B. + XMVECTOR vCenterB = XMLoadFloat3(&sh.Center); + XMVECTOR vRadiusB = XMVectorReplicatePtr(&sh.Radius); + + // Distance squared between centers. + XMVECTOR Delta = XMVectorSubtract(vCenterB, vCenterA); + XMVECTOR DistanceSquared = XMVector3LengthSq(Delta); + + // Sum of the radii squared. + XMVECTOR RadiusSquared = XMVectorAdd(vRadiusA, vRadiusB); + RadiusSquared = XMVectorMultiply(RadiusSquared, RadiusSquared); + + return XMVector3LessOrEqual(DistanceSquared, RadiusSquared); +} + + +//----------------------------------------------------------------------------- +// Box vs. sphere test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingSphere::Intersects(const BoundingBox& box) const noexcept +{ + return box.Intersects(*this); +} + +_Use_decl_annotations_ +inline bool BoundingSphere::Intersects(const BoundingOrientedBox& box) const noexcept +{ + return box.Intersects(*this); +} + + +//----------------------------------------------------------------------------- +// Frustum vs. sphere test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingSphere::Intersects(const BoundingFrustum& fr) const noexcept +{ + return fr.Intersects(*this); +} + + +//----------------------------------------------------------------------------- +// Triangle vs sphere test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingSphere::Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + // Load the sphere. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + + // Compute the plane of the triangle (has to be normalized). + XMVECTOR N = XMVector3Normalize(XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0))); + + // Assert that the triangle is not degenerate. + assert(!XMVector3Equal(N, XMVectorZero())); + + // Find the nearest feature on the triangle to the sphere. + XMVECTOR Dist = XMVector3Dot(XMVectorSubtract(vCenter, V0), N); + + // If the center of the sphere is farther from the plane of the triangle than + // the radius of the sphere, then there cannot be an intersection. + XMVECTOR NoIntersection = XMVectorLess(Dist, XMVectorNegate(vRadius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Dist, vRadius)); + + // Project the center of the sphere onto the plane of the triangle. + XMVECTOR Point = XMVectorNegativeMultiplySubtract(N, Dist, vCenter); + + // Is it inside all the edges? If so we intersect because the distance + // to the plane is less than the radius. + XMVECTOR Intersection = DirectX::Internal::PointOnPlaneInsideTriangle(Point, V0, V1, V2); + + // Find the nearest point on each edge. + XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); + + // Edge 0,1 + Point = DirectX::Internal::PointOnLineSegmentNearestPoint(V0, V1, vCenter); + + // If the distance to the center of the sphere to the point is less than + // the radius of the sphere then it must intersect. + Intersection = XMVectorOrInt(Intersection, XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), RadiusSq)); + + // Edge 1,2 + Point = DirectX::Internal::PointOnLineSegmentNearestPoint(V1, V2, vCenter); + + // If the distance to the center of the sphere to the point is less than + // the radius of the sphere then it must intersect. + Intersection = XMVectorOrInt(Intersection, XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), RadiusSq)); + + // Edge 2,0 + Point = DirectX::Internal::PointOnLineSegmentNearestPoint(V2, V0, vCenter); + + // If the distance to the center of the sphere to the point is less than + // the radius of the sphere then it must intersect. + Intersection = XMVectorOrInt(Intersection, XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), RadiusSq)); + + return XMVector4EqualInt(XMVectorAndCInt(Intersection, NoIntersection), XMVectorTrueInt()); +} + + +//----------------------------------------------------------------------------- +// Sphere-plane intersection +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline PlaneIntersectionType XM_CALLCONV BoundingSphere::Intersects(FXMVECTOR Plane) const noexcept +{ + assert(DirectX::Internal::XMPlaneIsUnit(Plane)); + + // Load the sphere. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + + // Set w of the center to one so we can dot4 with a plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + XMVECTOR Outside, Inside; + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane, Outside, Inside); + + // If the sphere is outside any plane it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return FRONT; + + // If the sphere is inside all planes it is inside. + if (XMVector4EqualInt(Inside, XMVectorTrueInt())) + return BACK; + + // The sphere is not inside all planes or outside a plane it intersects. + return INTERSECTING; +} + + +//----------------------------------------------------------------------------- +// Compute the intersection of a ray (Origin, Direction) with a sphere. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingSphere::Intersects(FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept +{ + assert(DirectX::Internal::XMVector3IsUnit(Direction)); + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + + // l is the vector from the ray origin to the center of the sphere. + XMVECTOR l = XMVectorSubtract(vCenter, Origin); + + // s is the projection of the l onto the ray direction. + XMVECTOR s = XMVector3Dot(l, Direction); + + XMVECTOR l2 = XMVector3Dot(l, l); + + XMVECTOR r2 = XMVectorMultiply(vRadius, vRadius); + + // m2 is squared distance from the center of the sphere to the projection. + XMVECTOR m2 = XMVectorNegativeMultiplySubtract(s, s, l2); + + XMVECTOR NoIntersection; + + // If the ray origin is outside the sphere and the center of the sphere is + // behind the ray origin there is no intersection. + NoIntersection = XMVectorAndInt(XMVectorLess(s, XMVectorZero()), XMVectorGreater(l2, r2)); + + // If the squared distance from the center of the sphere to the projection + // is greater than the radius squared the ray will miss the sphere. + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(m2, r2)); + + // The ray hits the sphere, compute the nearest intersection point. + XMVECTOR q = XMVectorSqrt(XMVectorSubtract(r2, m2)); + XMVECTOR t1 = XMVectorSubtract(s, q); + XMVECTOR t2 = XMVectorAdd(s, q); + + XMVECTOR OriginInside = XMVectorLessOrEqual(l2, r2); + XMVECTOR t = XMVectorSelect(t1, t2, OriginInside); + + if (XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt())) + { + // Store the x-component to *pDist. + XMStoreFloat(&Dist, t); + return true; + } + + Dist = 0.f; + return false; +} + + +//----------------------------------------------------------------------------- +// Test a sphere vs 6 planes (typically forming a frustum). +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingSphere::ContainedBy( + FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2, + GXMVECTOR Plane3, + HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept +{ + // Load the sphere. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); + + // Set w of the center to one so we can dot4 with a plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + XMVECTOR Outside, Inside; + + // Test against each plane. + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane0, Outside, Inside); + + XMVECTOR AnyOutside = Outside; + XMVECTOR AllInside = Inside; + + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane1, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane2, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane3, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane4, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectSpherePlane(vCenter, vRadius, Plane5, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + // If the sphere is outside any plane it is outside. + if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) + return DISJOINT; + + // If the sphere is inside all planes it is inside. + if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) + return CONTAINS; + + // The sphere is not inside all planes or outside a plane, it may intersect. + return INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Creates a bounding sphere that contains two other bounding spheres +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingSphere::CreateMerged(BoundingSphere& Out, const BoundingSphere& S1, const BoundingSphere& S2) noexcept +{ + XMVECTOR Center1 = XMLoadFloat3(&S1.Center); + float r1 = S1.Radius; + + XMVECTOR Center2 = XMLoadFloat3(&S2.Center); + float r2 = S2.Radius; + + XMVECTOR V = XMVectorSubtract(Center2, Center1); + + XMVECTOR Dist = XMVector3Length(V); + + float d = XMVectorGetX(Dist); + + if (r1 + r2 >= d) + { + if (r1 - r2 >= d) + { + Out = S1; + return; + } + else if (r2 - r1 >= d) + { + Out = S2; + return; + } + } + + XMVECTOR N = XMVectorDivide(V, Dist); + + float t1 = XMMin(-r1, d - r2); + float t2 = XMMax(r1, d + r2); + float t_5 = (t2 - t1) * 0.5f; + + XMVECTOR NCenter = XMVectorAdd(Center1, XMVectorMultiply(N, XMVectorReplicate(t_5 + t1))); + + XMStoreFloat3(&Out.Center, NCenter); + Out.Radius = t_5; +} + + +//----------------------------------------------------------------------------- +// Create sphere enscribing bounding box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingSphere::CreateFromBoundingBox(BoundingSphere& Out, const BoundingBox& box) noexcept +{ + Out.Center = box.Center; + XMVECTOR vExtents = XMLoadFloat3(&box.Extents); + Out.Radius = XMVectorGetX(XMVector3Length(vExtents)); +} + +_Use_decl_annotations_ +inline void BoundingSphere::CreateFromBoundingBox(BoundingSphere& Out, const BoundingOrientedBox& box) noexcept +{ + // Bounding box orientation is irrelevant because a sphere is rotationally invariant + Out.Center = box.Center; + XMVECTOR vExtents = XMLoadFloat3(&box.Extents); + Out.Radius = XMVectorGetX(XMVector3Length(vExtents)); +} + + +//----------------------------------------------------------------------------- +// Find the approximate smallest enclosing bounding sphere for a set of +// points. Exact computation of the smallest enclosing bounding sphere is +// possible but is slower and requires a more complex algorithm. +// The algorithm is based on Jack Ritter, "An Efficient Bounding Sphere", +// Graphics Gems. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingSphere::CreateFromPoints(BoundingSphere& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride) noexcept +{ + assert(Count > 0); + assert(pPoints); + + // Find the points with minimum and maximum x, y, and z + XMVECTOR MinX, MaxX, MinY, MaxY, MinZ, MaxZ; + + MinX = MaxX = MinY = MaxY = MinZ = MaxZ = XMLoadFloat3(pPoints); + + for (size_t i = 1; i < Count; ++i) + { + XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride)); + + float px = XMVectorGetX(Point); + float py = XMVectorGetY(Point); + float pz = XMVectorGetZ(Point); + + if (px < XMVectorGetX(MinX)) + MinX = Point; + + if (px > XMVectorGetX(MaxX)) + MaxX = Point; + + if (py < XMVectorGetY(MinY)) + MinY = Point; + + if (py > XMVectorGetY(MaxY)) + MaxY = Point; + + if (pz < XMVectorGetZ(MinZ)) + MinZ = Point; + + if (pz > XMVectorGetZ(MaxZ)) + MaxZ = Point; + } + + // Use the min/max pair that are farthest apart to form the initial sphere. + XMVECTOR DeltaX = XMVectorSubtract(MaxX, MinX); + XMVECTOR DistX = XMVector3Length(DeltaX); + + XMVECTOR DeltaY = XMVectorSubtract(MaxY, MinY); + XMVECTOR DistY = XMVector3Length(DeltaY); + + XMVECTOR DeltaZ = XMVectorSubtract(MaxZ, MinZ); + XMVECTOR DistZ = XMVector3Length(DeltaZ); + + XMVECTOR vCenter; + XMVECTOR vRadius; + + if (XMVector3Greater(DistX, DistY)) + { + if (XMVector3Greater(DistX, DistZ)) + { + // Use min/max x. + vCenter = XMVectorLerp(MaxX, MinX, 0.5f); + vRadius = XMVectorScale(DistX, 0.5f); + } + else + { + // Use min/max z. + vCenter = XMVectorLerp(MaxZ, MinZ, 0.5f); + vRadius = XMVectorScale(DistZ, 0.5f); + } + } + else // Y >= X + { + if (XMVector3Greater(DistY, DistZ)) + { + // Use min/max y. + vCenter = XMVectorLerp(MaxY, MinY, 0.5f); + vRadius = XMVectorScale(DistY, 0.5f); + } + else + { + // Use min/max z. + vCenter = XMVectorLerp(MaxZ, MinZ, 0.5f); + vRadius = XMVectorScale(DistZ, 0.5f); + } + } + + // Add any points not inside the sphere. + for (size_t i = 0; i < Count; ++i) + { + XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride)); + + XMVECTOR Delta = XMVectorSubtract(Point, vCenter); + + XMVECTOR Dist = XMVector3Length(Delta); + + if (XMVector3Greater(Dist, vRadius)) + { + // Adjust sphere to include the new point. + vRadius = XMVectorScale(XMVectorAdd(vRadius, Dist), 0.5f); + vCenter = XMVectorAdd(vCenter, XMVectorMultiply(XMVectorSubtract(XMVectorReplicate(1.0f), XMVectorDivide(vRadius, Dist)), Delta)); + } + } + + XMStoreFloat3(&Out.Center, vCenter); + XMStoreFloat(&Out.Radius, vRadius); +} + + +//----------------------------------------------------------------------------- +// Create sphere containing frustum +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingSphere::CreateFromFrustum(BoundingSphere& Out, const BoundingFrustum& fr) noexcept +{ + XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT]; + fr.GetCorners(Corners); + CreateFromPoints(Out, BoundingFrustum::CORNER_COUNT, Corners, sizeof(XMFLOAT3)); +} + + +/**************************************************************************** + * + * BoundingBox + * + ****************************************************************************/ + + //----------------------------------------------------------------------------- + // Transform an axis aligned box by an angle preserving transform. + //----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingBox::Transform(BoundingBox& Out, FXMMATRIX M) const noexcept +{ + // Load center and extents. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + // Compute and transform the corners and find new min/max bounds. + XMVECTOR Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[0], vCenter); + Corner = XMVector3Transform(Corner, M); + + XMVECTOR Min, Max; + Min = Max = Corner; + + for (size_t i = 1; i < CORNER_COUNT; ++i) + { + Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter); + Corner = XMVector3Transform(Corner, M); + + Min = XMVectorMin(Min, Corner); + Max = XMVectorMax(Max, Corner); + } + + // Store center and extents. + XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); +} + +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingBox::Transform(BoundingBox& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept +{ + assert(DirectX::Internal::XMQuaternionIsUnit(Rotation)); + + // Load center and extents. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + XMVECTOR VectorScale = XMVectorReplicate(Scale); + + // Compute and transform the corners and find new min/max bounds. + XMVECTOR Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[0], vCenter); + Corner = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(Corner, VectorScale), Rotation), Translation); + + XMVECTOR Min, Max; + Min = Max = Corner; + + for (size_t i = 1; i < CORNER_COUNT; ++i) + { + Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter); + Corner = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(Corner, VectorScale), Rotation), Translation); + + Min = XMVectorMin(Min, Corner); + Max = XMVectorMax(Max, Corner); + } + + // Store center and extents. + XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); +} + + +//----------------------------------------------------------------------------- +// Get the corner points of the box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingBox::GetCorners(XMFLOAT3* Corners) const noexcept +{ + assert(Corners != nullptr); + + // Load the box + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + for (size_t i = 0; i < CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter); + XMStoreFloat3(&Corners[i], C); + } +} + + +//----------------------------------------------------------------------------- +// Point in axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingBox::Contains(FXMVECTOR Point) const noexcept +{ + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + return XMVector3InBounds(XMVectorSubtract(Point, vCenter), vExtents) ? CONTAINS : DISJOINT; +} + + +//----------------------------------------------------------------------------- +// Triangle in axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingBox::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + if (!Intersects(V0, V1, V2)) + return DISJOINT; + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + XMVECTOR d = XMVectorAbs(XMVectorSubtract(V0, vCenter)); + XMVECTOR Inside = XMVectorLessOrEqual(d, vExtents); + + d = XMVectorAbs(XMVectorSubtract(V1, vCenter)); + Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); + + d = XMVectorAbs(XMVectorSubtract(V2, vCenter)); + Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); + + return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Sphere in axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingBox::Contains(const BoundingSphere& sh) const noexcept +{ + XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); + XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); + + XMVECTOR BoxCenter = XMLoadFloat3(&Center); + XMVECTOR BoxExtents = XMLoadFloat3(&Extents); + + XMVECTOR BoxMin = XMVectorSubtract(BoxCenter, BoxExtents); + XMVECTOR BoxMax = XMVectorAdd(BoxCenter, BoxExtents); + + // Find the distance to the nearest point on the box. + // for each i in (x, y, z) + // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 + // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2 + + XMVECTOR d = XMVectorZero(); + + // Compute d for each dimension. + XMVECTOR LessThanMin = XMVectorLess(SphereCenter, BoxMin); + XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxMax); + + XMVECTOR MinDelta = XMVectorSubtract(SphereCenter, BoxMin); + XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxMax); + + // Choose value for each dimension based on the comparison. + d = XMVectorSelect(d, MinDelta, LessThanMin); + d = XMVectorSelect(d, MaxDelta, GreaterThanMax); + + // Use a dot-product to square them and sum them together. + XMVECTOR d2 = XMVector3Dot(d, d); + + if (XMVector3Greater(d2, XMVectorMultiply(SphereRadius, SphereRadius))) + return DISJOINT; + + XMVECTOR InsideAll = XMVectorLessOrEqual(XMVectorAdd(BoxMin, SphereRadius), SphereCenter); + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(SphereCenter, XMVectorSubtract(BoxMax, SphereRadius))); + InsideAll = XMVectorAndInt(InsideAll, XMVectorGreater(XMVectorSubtract(BoxMax, BoxMin), SphereRadius)); + + return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Axis-aligned box in axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingBox::Contains(const BoundingBox& box) const noexcept +{ + XMVECTOR CenterA = XMLoadFloat3(&Center); + XMVECTOR ExtentsA = XMLoadFloat3(&Extents); + + XMVECTOR CenterB = XMLoadFloat3(&box.Center); + XMVECTOR ExtentsB = XMLoadFloat3(&box.Extents); + + XMVECTOR MinA = XMVectorSubtract(CenterA, ExtentsA); + XMVECTOR MaxA = XMVectorAdd(CenterA, ExtentsA); + + XMVECTOR MinB = XMVectorSubtract(CenterB, ExtentsB); + XMVECTOR MaxB = XMVectorAdd(CenterB, ExtentsB); + + // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then return false + XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(MinA, MaxB), XMVectorGreater(MinB, MaxA)); + + if (DirectX::Internal::XMVector3AnyTrue(Disjoint)) + return DISJOINT; + + // for each i in (x, y, z) if a_min(i) <= b_min(i) and b_max(i) <= a_max(i) then A contains B + XMVECTOR Inside = XMVectorAndInt(XMVectorLessOrEqual(MinA, MinB), XMVectorLessOrEqual(MaxB, MaxA)); + + return DirectX::Internal::XMVector3AllTrue(Inside) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Oriented box in axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingBox::Contains(const BoundingOrientedBox& box) const noexcept +{ + if (!box.Intersects(*this)) + return DISJOINT; + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + // Subtract off the AABB center to remove a subtract below + XMVECTOR oCenter = XMVectorSubtract(XMLoadFloat3(&box.Center), vCenter); + + XMVECTOR oExtents = XMLoadFloat3(&box.Extents); + XMVECTOR oOrientation = XMLoadFloat4(&box.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(oOrientation)); + + XMVECTOR Inside = XMVectorTrueInt(); + + for (size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(oExtents, g_BoxOffset[i]), oOrientation), oCenter); + XMVECTOR d = XMVectorAbs(C); + Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); + } + + return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Frustum in axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingBox::Contains(const BoundingFrustum& fr) const noexcept +{ + if (!fr.Intersects(*this)) + return DISJOINT; + + XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT]; + fr.GetCorners(Corners); + + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + XMVECTOR Inside = XMVectorTrueInt(); + + for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i) + { + XMVECTOR Point = XMLoadFloat3(&Corners[i]); + XMVECTOR d = XMVectorAbs(XMVectorSubtract(Point, vCenter)); + Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); + } + + return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Sphere vs axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingBox::Intersects(const BoundingSphere& sh) const noexcept +{ + XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); + XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); + + XMVECTOR BoxCenter = XMLoadFloat3(&Center); + XMVECTOR BoxExtents = XMLoadFloat3(&Extents); + + XMVECTOR BoxMin = XMVectorSubtract(BoxCenter, BoxExtents); + XMVECTOR BoxMax = XMVectorAdd(BoxCenter, BoxExtents); + + // Find the distance to the nearest point on the box. + // for each i in (x, y, z) + // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 + // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2 + + XMVECTOR d = XMVectorZero(); + + // Compute d for each dimension. + XMVECTOR LessThanMin = XMVectorLess(SphereCenter, BoxMin); + XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxMax); + + XMVECTOR MinDelta = XMVectorSubtract(SphereCenter, BoxMin); + XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxMax); + + // Choose value for each dimension based on the comparison. + d = XMVectorSelect(d, MinDelta, LessThanMin); + d = XMVectorSelect(d, MaxDelta, GreaterThanMax); + + // Use a dot-product to square them and sum them together. + XMVECTOR d2 = XMVector3Dot(d, d); + + return XMVector3LessOrEqual(d2, XMVectorMultiply(SphereRadius, SphereRadius)); +} + + +//----------------------------------------------------------------------------- +// Axis-aligned box vs. axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingBox::Intersects(const BoundingBox& box) const noexcept +{ + XMVECTOR CenterA = XMLoadFloat3(&Center); + XMVECTOR ExtentsA = XMLoadFloat3(&Extents); + + XMVECTOR CenterB = XMLoadFloat3(&box.Center); + XMVECTOR ExtentsB = XMLoadFloat3(&box.Extents); + + XMVECTOR MinA = XMVectorSubtract(CenterA, ExtentsA); + XMVECTOR MaxA = XMVectorAdd(CenterA, ExtentsA); + + XMVECTOR MinB = XMVectorSubtract(CenterB, ExtentsB); + XMVECTOR MaxB = XMVectorAdd(CenterB, ExtentsB); + + // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then return false + XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(MinA, MaxB), XMVectorGreater(MinB, MaxA)); + + return !DirectX::Internal::XMVector3AnyTrue(Disjoint); +} + + +//----------------------------------------------------------------------------- +// Oriented box vs. axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingBox::Intersects(const BoundingOrientedBox& box) const noexcept +{ + return box.Intersects(*this); +} + + +//----------------------------------------------------------------------------- +// Frustum vs. axis-aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingBox::Intersects(const BoundingFrustum& fr) const noexcept +{ + return fr.Intersects(*this); +} + + +//----------------------------------------------------------------------------- +// Triangle vs. axis aligned box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingBox::Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + XMVECTOR Zero = XMVectorZero(); + + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + XMVECTOR BoxMin = XMVectorSubtract(vCenter, vExtents); + XMVECTOR BoxMax = XMVectorAdd(vCenter, vExtents); + + // Test the axes of the box (in effect test the AAB against the minimal AAB + // around the triangle). + XMVECTOR TriMin = XMVectorMin(XMVectorMin(V0, V1), V2); + XMVECTOR TriMax = XMVectorMax(XMVectorMax(V0, V1), V2); + + // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then disjoint + XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(TriMin, BoxMax), XMVectorGreater(BoxMin, TriMax)); + if (DirectX::Internal::XMVector3AnyTrue(Disjoint)) + return false; + + // Test the plane of the triangle. + XMVECTOR Normal = XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0)); + XMVECTOR Dist = XMVector3Dot(Normal, V0); + + // Assert that the triangle is not degenerate. + assert(!XMVector3Equal(Normal, Zero)); + + // for each i in (x, y, z) if n(i) >= 0 then v_min(i)=b_min(i), v_max(i)=b_max(i) + // else v_min(i)=b_max(i), v_max(i)=b_min(i) + XMVECTOR NormalSelect = XMVectorGreater(Normal, Zero); + XMVECTOR V_Min = XMVectorSelect(BoxMax, BoxMin, NormalSelect); + XMVECTOR V_Max = XMVectorSelect(BoxMin, BoxMax, NormalSelect); + + // if n dot v_min + d > 0 || n dot v_max + d < 0 then disjoint + XMVECTOR MinDist = XMVector3Dot(V_Min, Normal); + XMVECTOR MaxDist = XMVector3Dot(V_Max, Normal); + + XMVECTOR NoIntersection = XMVectorGreater(MinDist, Dist); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(MaxDist, Dist)); + + // Move the box center to zero to simplify the following tests. + XMVECTOR TV0 = XMVectorSubtract(V0, vCenter); + XMVECTOR TV1 = XMVectorSubtract(V1, vCenter); + XMVECTOR TV2 = XMVectorSubtract(V2, vCenter); + + // Test the edge/edge axes (3*3). + XMVECTOR e0 = XMVectorSubtract(TV1, TV0); + XMVECTOR e1 = XMVectorSubtract(TV2, TV1); + XMVECTOR e2 = XMVectorSubtract(TV0, TV2); + + // Make w zero. + e0 = XMVectorInsert<0, 0, 0, 0, 1>(e0, Zero); + e1 = XMVectorInsert<0, 0, 0, 0, 1>(e1, Zero); + e2 = XMVectorInsert<0, 0, 0, 0, 1>(e2, Zero); + + XMVECTOR Axis; + XMVECTOR p0, p1, p2; + XMVECTOR Min, Max; + XMVECTOR Radius; + + // Axis == (1,0,0) x e0 = (0, -e0.z, e0.y) + Axis = XMVectorPermute(e0, XMVectorNegate(e0)); + p0 = XMVector3Dot(TV0, Axis); + // p1 = XMVector3Dot( V1, Axis ); // p1 = p0; + p2 = XMVector3Dot(TV2, Axis); + Min = XMVectorMin(p0, p2); + Max = XMVectorMax(p0, p2); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (1,0,0) x e1 = (0, -e1.z, e1.y) + Axis = XMVectorPermute(e1, XMVectorNegate(e1)); + p0 = XMVector3Dot(TV0, Axis); + p1 = XMVector3Dot(TV1, Axis); + // p2 = XMVector3Dot( V2, Axis ); // p2 = p1; + Min = XMVectorMin(p0, p1); + Max = XMVectorMax(p0, p1); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (1,0,0) x e2 = (0, -e2.z, e2.y) + Axis = XMVectorPermute(e2, XMVectorNegate(e2)); + p0 = XMVector3Dot(TV0, Axis); + p1 = XMVector3Dot(TV1, Axis); + // p2 = XMVector3Dot( V2, Axis ); // p2 = p0; + Min = XMVectorMin(p0, p1); + Max = XMVectorMax(p0, p1); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (0,1,0) x e0 = (e0.z, 0, -e0.x) + Axis = XMVectorPermute(e0, XMVectorNegate(e0)); + p0 = XMVector3Dot(TV0, Axis); + // p1 = XMVector3Dot( V1, Axis ); // p1 = p0; + p2 = XMVector3Dot(TV2, Axis); + Min = XMVectorMin(p0, p2); + Max = XMVectorMax(p0, p2); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (0,1,0) x e1 = (e1.z, 0, -e1.x) + Axis = XMVectorPermute(e1, XMVectorNegate(e1)); + p0 = XMVector3Dot(TV0, Axis); + p1 = XMVector3Dot(TV1, Axis); + // p2 = XMVector3Dot( V2, Axis ); // p2 = p1; + Min = XMVectorMin(p0, p1); + Max = XMVectorMax(p0, p1); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (0,0,1) x e2 = (e2.z, 0, -e2.x) + Axis = XMVectorPermute(e2, XMVectorNegate(e2)); + p0 = XMVector3Dot(TV0, Axis); + p1 = XMVector3Dot(TV1, Axis); + // p2 = XMVector3Dot( V2, Axis ); // p2 = p0; + Min = XMVectorMin(p0, p1); + Max = XMVectorMax(p0, p1); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (0,0,1) x e0 = (-e0.y, e0.x, 0) + Axis = XMVectorPermute(e0, XMVectorNegate(e0)); + p0 = XMVector3Dot(TV0, Axis); + // p1 = XMVector3Dot( V1, Axis ); // p1 = p0; + p2 = XMVector3Dot(TV2, Axis); + Min = XMVectorMin(p0, p2); + Max = XMVectorMax(p0, p2); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (0,0,1) x e1 = (-e1.y, e1.x, 0) + Axis = XMVectorPermute(e1, XMVectorNegate(e1)); + p0 = XMVector3Dot(TV0, Axis); + p1 = XMVector3Dot(TV1, Axis); + // p2 = XMVector3Dot( V2, Axis ); // p2 = p1; + Min = XMVectorMin(p0, p1); + Max = XMVectorMax(p0, p1); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + // Axis == (0,0,1) x e2 = (-e2.y, e2.x, 0) + Axis = XMVectorPermute(e2, XMVectorNegate(e2)); + p0 = XMVector3Dot(TV0, Axis); + p1 = XMVector3Dot(TV1, Axis); + // p2 = XMVector3Dot( V2, Axis ); // p2 = p0; + Min = XMVectorMin(p0, p1); + Max = XMVectorMax(p0, p1); + Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(Max, XMVectorNegate(Radius))); + + return XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt()); +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline PlaneIntersectionType XM_CALLCONV BoundingBox::Intersects(FXMVECTOR Plane) const noexcept +{ + assert(DirectX::Internal::XMPlaneIsUnit(Plane)); + + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + // Set w of the center to one so we can dot4 with a plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + XMVECTOR Outside, Inside; + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane, Outside, Inside); + + // If the box is outside any plane it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return FRONT; + + // If the box is inside all planes it is inside. + if (XMVector4EqualInt(Inside, XMVectorTrueInt())) + return BACK; + + // The box is not inside all planes or outside a plane it intersects. + return INTERSECTING; +} + + +//----------------------------------------------------------------------------- +// Compute the intersection of a ray (Origin, Direction) with an axis aligned +// box using the slabs method. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingBox::Intersects(FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept +{ + assert(DirectX::Internal::XMVector3IsUnit(Direction)); + + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + // Adjust ray origin to be relative to center of the box. + XMVECTOR TOrigin = XMVectorSubtract(vCenter, Origin); + + // Compute the dot product againt each axis of the box. + // Since the axii are (1,0,0), (0,1,0), (0,0,1) no computation is necessary. + XMVECTOR AxisDotOrigin = TOrigin; + XMVECTOR AxisDotDirection = Direction; + + // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the slab. + XMVECTOR IsParallel = XMVectorLessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon); + + // Test against all three axii simultaneously. + XMVECTOR InverseAxisDotDirection = XMVectorReciprocal(AxisDotDirection); + XMVECTOR t1 = XMVectorMultiply(XMVectorSubtract(AxisDotOrigin, vExtents), InverseAxisDotDirection); + XMVECTOR t2 = XMVectorMultiply(XMVectorAdd(AxisDotOrigin, vExtents), InverseAxisDotDirection); + + // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't + // use the results from any directions parallel to the slab. + XMVECTOR t_min = XMVectorSelect(XMVectorMin(t1, t2), g_FltMin, IsParallel); + XMVECTOR t_max = XMVectorSelect(XMVectorMax(t1, t2), g_FltMax, IsParallel); + + // t_min.x = maximum( t_min.x, t_min.y, t_min.z ); + // t_max.x = minimum( t_max.x, t_max.y, t_max.z ); + t_min = XMVectorMax(t_min, XMVectorSplatY(t_min)); // x = max(x,y) + t_min = XMVectorMax(t_min, XMVectorSplatZ(t_min)); // x = max(max(x,y),z) + t_max = XMVectorMin(t_max, XMVectorSplatY(t_max)); // x = min(x,y) + t_max = XMVectorMin(t_max, XMVectorSplatZ(t_max)); // x = min(min(x,y),z) + + // if ( t_min > t_max ) return false; + XMVECTOR NoIntersection = XMVectorGreater(XMVectorSplatX(t_min), XMVectorSplatX(t_max)); + + // if ( t_max < 0.0f ) return false; + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(XMVectorSplatX(t_max), XMVectorZero())); + + // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) return false; + XMVECTOR ParallelOverlap = XMVectorInBounds(AxisDotOrigin, vExtents); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorAndCInt(IsParallel, ParallelOverlap)); + + if (!DirectX::Internal::XMVector3AnyTrue(NoIntersection)) + { + // Store the x-component to *pDist + XMStoreFloat(&Dist, t_min); + return true; + } + + Dist = 0.f; + return false; +} + + +//----------------------------------------------------------------------------- +// Test an axis alinged box vs 6 planes (typically forming a frustum). +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingBox::ContainedBy( + FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2, + GXMVECTOR Plane3, + HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept +{ + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + + // Set w of the center to one so we can dot4 with a plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + XMVECTOR Outside, Inside; + + // Test against each plane. + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane0, Outside, Inside); + + XMVECTOR AnyOutside = Outside; + XMVECTOR AllInside = Inside; + + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane1, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane2, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane3, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane4, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectAxisAlignedBoxPlane(vCenter, vExtents, Plane5, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + // If the box is outside any plane it is outside. + if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) + return DISJOINT; + + // If the box is inside all planes it is inside. + if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) + return CONTAINS; + + // The box is not inside all planes or outside a plane, it may intersect. + return INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Create axis-aligned box that contains two other bounding boxes +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingBox::CreateMerged(BoundingBox& Out, const BoundingBox& b1, const BoundingBox& b2) noexcept +{ + XMVECTOR b1Center = XMLoadFloat3(&b1.Center); + XMVECTOR b1Extents = XMLoadFloat3(&b1.Extents); + + XMVECTOR b2Center = XMLoadFloat3(&b2.Center); + XMVECTOR b2Extents = XMLoadFloat3(&b2.Extents); + + XMVECTOR Min = XMVectorSubtract(b1Center, b1Extents); + Min = XMVectorMin(Min, XMVectorSubtract(b2Center, b2Extents)); + + XMVECTOR Max = XMVectorAdd(b1Center, b1Extents); + Max = XMVectorMax(Max, XMVectorAdd(b2Center, b2Extents)); + + assert(XMVector3LessOrEqual(Min, Max)); + + XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); +} + + +//----------------------------------------------------------------------------- +// Create axis-aligned box that contains a bounding sphere +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingBox::CreateFromSphere(BoundingBox& Out, const BoundingSphere& sh) noexcept +{ + XMVECTOR spCenter = XMLoadFloat3(&sh.Center); + XMVECTOR shRadius = XMVectorReplicatePtr(&sh.Radius); + + XMVECTOR Min = XMVectorSubtract(spCenter, shRadius); + XMVECTOR Max = XMVectorAdd(spCenter, shRadius); + + assert(XMVector3LessOrEqual(Min, Max)); + + XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); +} + + +//----------------------------------------------------------------------------- +// Create axis-aligned box from min/max points +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingBox::CreateFromPoints(BoundingBox& Out, FXMVECTOR pt1, FXMVECTOR pt2) noexcept +{ + XMVECTOR Min = XMVectorMin(pt1, pt2); + XMVECTOR Max = XMVectorMax(pt1, pt2); + + // Store center and extents. + XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); +} + + +//----------------------------------------------------------------------------- +// Find the minimum axis aligned bounding box containing a set of points. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingBox::CreateFromPoints(BoundingBox& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride) noexcept +{ + assert(Count > 0); + assert(pPoints); + + // Find the minimum and maximum x, y, and z + XMVECTOR vMin, vMax; + + vMin = vMax = XMLoadFloat3(pPoints); + + for (size_t i = 1; i < Count; ++i) + { + XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride)); + + vMin = XMVectorMin(vMin, Point); + vMax = XMVectorMax(vMax, Point); + } + + // Store center and extents. + XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(vMin, vMax), 0.5f)); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(vMax, vMin), 0.5f)); +} + + +/**************************************************************************** + * + * BoundingOrientedBox + * + ****************************************************************************/ + + //----------------------------------------------------------------------------- + // Transform an oriented box by an angle preserving transform. + //----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingOrientedBox::Transform(BoundingOrientedBox& Out, FXMMATRIX M) const noexcept +{ + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Composite the box rotation and the transform rotation. + XMMATRIX nM; + nM.r[0] = XMVector3Normalize(M.r[0]); + nM.r[1] = XMVector3Normalize(M.r[1]); + nM.r[2] = XMVector3Normalize(M.r[2]); + nM.r[3] = g_XMIdentityR3; + XMVECTOR Rotation = XMQuaternionRotationMatrix(nM); + vOrientation = XMQuaternionMultiply(vOrientation, Rotation); + + // Transform the center. + vCenter = XMVector3Transform(vCenter, M); + + // Scale the box extents. + XMVECTOR dX = XMVector3Length(M.r[0]); + XMVECTOR dY = XMVector3Length(M.r[1]); + XMVECTOR dZ = XMVector3Length(M.r[2]); + + XMVECTOR VectorScale = XMVectorSelect(dY, dX, g_XMSelect1000); + VectorScale = XMVectorSelect(dZ, VectorScale, g_XMSelect1100); + vExtents = XMVectorMultiply(vExtents, VectorScale); + + // Store the box. + XMStoreFloat3(&Out.Center, vCenter); + XMStoreFloat3(&Out.Extents, vExtents); + XMStoreFloat4(&Out.Orientation, vOrientation); +} + +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingOrientedBox::Transform(BoundingOrientedBox& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept +{ + assert(DirectX::Internal::XMQuaternionIsUnit(Rotation)); + + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Composite the box rotation and the transform rotation. + vOrientation = XMQuaternionMultiply(vOrientation, Rotation); + + // Transform the center. + XMVECTOR VectorScale = XMVectorReplicate(Scale); + vCenter = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(vCenter, VectorScale), Rotation), Translation); + + // Scale the box extents. + vExtents = XMVectorMultiply(vExtents, VectorScale); + + // Store the box. + XMStoreFloat3(&Out.Center, vCenter); + XMStoreFloat3(&Out.Extents, vExtents); + XMStoreFloat4(&Out.Orientation, vOrientation); +} + + +//----------------------------------------------------------------------------- +// Get the corner points of the box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingOrientedBox::GetCorners(XMFLOAT3* Corners) const noexcept +{ + assert(Corners != nullptr); + + // Load the box + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + for (size_t i = 0; i < CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(vExtents, g_BoxOffset[i]), vOrientation), vCenter); + XMStoreFloat3(&Corners[i], C); + } +} + + +//----------------------------------------------------------------------------- +// Point in oriented box test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingOrientedBox::Contains(FXMVECTOR Point) const noexcept +{ + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Transform the point to be local to the box. + XMVECTOR TPoint = XMVector3InverseRotate(XMVectorSubtract(Point, vCenter), vOrientation); + + return XMVector3InBounds(TPoint, vExtents) ? CONTAINS : DISJOINT; +} + + +//----------------------------------------------------------------------------- +// Triangle in oriented bounding box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingOrientedBox::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + // Load the box center & orientation. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Transform the triangle vertices into the space of the box. + XMVECTOR TV0 = XMVector3InverseRotate(XMVectorSubtract(V0, vCenter), vOrientation); + XMVECTOR TV1 = XMVector3InverseRotate(XMVectorSubtract(V1, vCenter), vOrientation); + XMVECTOR TV2 = XMVector3InverseRotate(XMVectorSubtract(V2, vCenter), vOrientation); + + BoundingBox box; + box.Center = XMFLOAT3(0.0f, 0.0f, 0.0f); + box.Extents = Extents; + + // Use the triangle vs axis aligned box intersection routine. + return box.Contains(TV0, TV1, TV2); +} + + +//----------------------------------------------------------------------------- +// Sphere in oriented bounding box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingOrientedBox::Contains(const BoundingSphere& sh) const noexcept +{ + XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); + XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); + + XMVECTOR BoxCenter = XMLoadFloat3(&Center); + XMVECTOR BoxExtents = XMLoadFloat3(&Extents); + XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation)); + + // Transform the center of the sphere to be local to the box. + // BoxMin = -BoxExtents + // BoxMax = +BoxExtents + SphereCenter = XMVector3InverseRotate(XMVectorSubtract(SphereCenter, BoxCenter), BoxOrientation); + + // Find the distance to the nearest point on the box. + // for each i in (x, y, z) + // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 + // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2 + + XMVECTOR d = XMVectorZero(); + + // Compute d for each dimension. + XMVECTOR LessThanMin = XMVectorLess(SphereCenter, XMVectorNegate(BoxExtents)); + XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxExtents); + + XMVECTOR MinDelta = XMVectorAdd(SphereCenter, BoxExtents); + XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxExtents); + + // Choose value for each dimension based on the comparison. + d = XMVectorSelect(d, MinDelta, LessThanMin); + d = XMVectorSelect(d, MaxDelta, GreaterThanMax); + + // Use a dot-product to square them and sum them together. + XMVECTOR d2 = XMVector3Dot(d, d); + XMVECTOR SphereRadiusSq = XMVectorMultiply(SphereRadius, SphereRadius); + + if (XMVector4Greater(d2, SphereRadiusSq)) + return DISJOINT; + + // See if we are completely inside the box + XMVECTOR SMin = XMVectorSubtract(SphereCenter, SphereRadius); + XMVECTOR SMax = XMVectorAdd(SphereCenter, SphereRadius); + + return (XMVector3InBounds(SMin, BoxExtents) && XMVector3InBounds(SMax, BoxExtents)) ? CONTAINS : INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Axis aligned box vs. oriented box. Constructs an oriented box and uses +// the oriented box vs. oriented box test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingOrientedBox::Contains(const BoundingBox& box) const noexcept +{ + // Make the axis aligned box oriented and do an OBB vs OBB test. + BoundingOrientedBox obox(box.Center, box.Extents, XMFLOAT4(0.f, 0.f, 0.f, 1.f)); + return Contains(obox); +} + + +//----------------------------------------------------------------------------- +// Oriented bounding box in oriented bounding box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingOrientedBox::Contains(const BoundingOrientedBox& box) const noexcept +{ + if (!Intersects(box)) + return DISJOINT; + + // Load the boxes + XMVECTOR aCenter = XMLoadFloat3(&Center); + XMVECTOR aExtents = XMLoadFloat3(&Extents); + XMVECTOR aOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(aOrientation)); + + XMVECTOR bCenter = XMLoadFloat3(&box.Center); + XMVECTOR bExtents = XMLoadFloat3(&box.Extents); + XMVECTOR bOrientation = XMLoadFloat4(&box.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(bOrientation)); + + XMVECTOR offset = XMVectorSubtract(bCenter, aCenter); + + for (size_t i = 0; i < CORNER_COUNT; ++i) + { + // Cb = rotate( bExtents * corneroffset[i], bOrientation ) + bcenter + // Ca = invrotate( Cb - aCenter, aOrientation ) + + XMVECTOR C = XMVectorAdd(XMVector3Rotate(XMVectorMultiply(bExtents, g_BoxOffset[i]), bOrientation), offset); + C = XMVector3InverseRotate(C, aOrientation); + + if (!XMVector3InBounds(C, aExtents)) + return INTERSECTS; + } + + return CONTAINS; +} + + +//----------------------------------------------------------------------------- +// Frustum in oriented bounding box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingOrientedBox::Contains(const BoundingFrustum& fr) const noexcept +{ + if (!fr.Intersects(*this)) + return DISJOINT; + + XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT]; + fr.GetCorners(Corners); + + // Load the box + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i) + { + XMVECTOR C = XMVector3InverseRotate(XMVectorSubtract(XMLoadFloat3(&Corners[i]), vCenter), vOrientation); + + if (!XMVector3InBounds(C, vExtents)) + return INTERSECTS; + } + + return CONTAINS; +} + + +//----------------------------------------------------------------------------- +// Sphere vs. oriented box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingOrientedBox::Intersects(const BoundingSphere& sh) const noexcept +{ + XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); + XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); + + XMVECTOR BoxCenter = XMLoadFloat3(&Center); + XMVECTOR BoxExtents = XMLoadFloat3(&Extents); + XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation)); + + // Transform the center of the sphere to be local to the box. + // BoxMin = -BoxExtents + // BoxMax = +BoxExtents + SphereCenter = XMVector3InverseRotate(XMVectorSubtract(SphereCenter, BoxCenter), BoxOrientation); + + // Find the distance to the nearest point on the box. + // for each i in (x, y, z) + // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 + // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2 + + XMVECTOR d = XMVectorZero(); + + // Compute d for each dimension. + XMVECTOR LessThanMin = XMVectorLess(SphereCenter, XMVectorNegate(BoxExtents)); + XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxExtents); + + XMVECTOR MinDelta = XMVectorAdd(SphereCenter, BoxExtents); + XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxExtents); + + // Choose value for each dimension based on the comparison. + d = XMVectorSelect(d, MinDelta, LessThanMin); + d = XMVectorSelect(d, MaxDelta, GreaterThanMax); + + // Use a dot-product to square them and sum them together. + XMVECTOR d2 = XMVector3Dot(d, d); + + return XMVector4LessOrEqual(d2, XMVectorMultiply(SphereRadius, SphereRadius)) ? true : false; +} + + +//----------------------------------------------------------------------------- +// Axis aligned box vs. oriented box. Constructs an oriented box and uses +// the oriented box vs. oriented box test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingOrientedBox::Intersects(const BoundingBox& box) const noexcept +{ + // Make the axis aligned box oriented and do an OBB vs OBB test. + BoundingOrientedBox obox(box.Center, box.Extents, XMFLOAT4(0.f, 0.f, 0.f, 1.f)); + return Intersects(obox); +} + + +//----------------------------------------------------------------------------- +// Fast oriented box / oriented box intersection test using the separating axis +// theorem. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingOrientedBox::Intersects(const BoundingOrientedBox& box) const noexcept +{ + // Build the 3x3 rotation matrix that defines the orientation of B relative to A. + XMVECTOR A_quat = XMLoadFloat4(&Orientation); + XMVECTOR B_quat = XMLoadFloat4(&box.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(A_quat)); + assert(DirectX::Internal::XMQuaternionIsUnit(B_quat)); + + XMVECTOR Q = XMQuaternionMultiply(A_quat, XMQuaternionConjugate(B_quat)); + XMMATRIX R = XMMatrixRotationQuaternion(Q); + + // Compute the translation of B relative to A. + XMVECTOR A_cent = XMLoadFloat3(&Center); + XMVECTOR B_cent = XMLoadFloat3(&box.Center); + XMVECTOR t = XMVector3InverseRotate(XMVectorSubtract(B_cent, A_cent), A_quat); + + // + // h(A) = extents of A. + // h(B) = extents of B. + // + // a(u) = axes of A = (1,0,0), (0,1,0), (0,0,1) + // b(u) = axes of B relative to A = (r00,r10,r20), (r01,r11,r21), (r02,r12,r22) + // + // For each possible separating axis l: + // d(A) = sum (for i = u,v,w) h(A)(i) * abs( a(i) dot l ) + // d(B) = sum (for i = u,v,w) h(B)(i) * abs( b(i) dot l ) + // if abs( t dot l ) > d(A) + d(B) then disjoint + // + + // Load extents of A and B. + XMVECTOR h_A = XMLoadFloat3(&Extents); + XMVECTOR h_B = XMLoadFloat3(&box.Extents); + + // Rows. Note R[0,1,2]X.w = 0. + XMVECTOR R0X = R.r[0]; + XMVECTOR R1X = R.r[1]; + XMVECTOR R2X = R.r[2]; + + R = XMMatrixTranspose(R); + + // Columns. Note RX[0,1,2].w = 0. + XMVECTOR RX0 = R.r[0]; + XMVECTOR RX1 = R.r[1]; + XMVECTOR RX2 = R.r[2]; + + // Absolute value of rows. + XMVECTOR AR0X = XMVectorAbs(R0X); + XMVECTOR AR1X = XMVectorAbs(R1X); + XMVECTOR AR2X = XMVectorAbs(R2X); + + // Absolute value of columns. + XMVECTOR ARX0 = XMVectorAbs(RX0); + XMVECTOR ARX1 = XMVectorAbs(RX1); + XMVECTOR ARX2 = XMVectorAbs(RX2); + + // Test each of the 15 possible seperating axii. + XMVECTOR d, d_A, d_B; + + // l = a(u) = (1, 0, 0) + // t dot l = t.x + // d(A) = h(A).x + // d(B) = h(B) dot abs(r00, r01, r02) + d = XMVectorSplatX(t); + d_A = XMVectorSplatX(h_A); + d_B = XMVector3Dot(h_B, AR0X); + XMVECTOR NoIntersection = XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)); + + // l = a(v) = (0, 1, 0) + // t dot l = t.y + // d(A) = h(A).y + // d(B) = h(B) dot abs(r10, r11, r12) + d = XMVectorSplatY(t); + d_A = XMVectorSplatY(h_A); + d_B = XMVector3Dot(h_B, AR1X); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(w) = (0, 0, 1) + // t dot l = t.z + // d(A) = h(A).z + // d(B) = h(B) dot abs(r20, r21, r22) + d = XMVectorSplatZ(t); + d_A = XMVectorSplatZ(h_A); + d_B = XMVector3Dot(h_B, AR2X); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = b(u) = (r00, r10, r20) + // d(A) = h(A) dot abs(r00, r10, r20) + // d(B) = h(B).x + d = XMVector3Dot(t, RX0); + d_A = XMVector3Dot(h_A, ARX0); + d_B = XMVectorSplatX(h_B); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = b(v) = (r01, r11, r21) + // d(A) = h(A) dot abs(r01, r11, r21) + // d(B) = h(B).y + d = XMVector3Dot(t, RX1); + d_A = XMVector3Dot(h_A, ARX1); + d_B = XMVectorSplatY(h_B); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = b(w) = (r02, r12, r22) + // d(A) = h(A) dot abs(r02, r12, r22) + // d(B) = h(B).z + d = XMVector3Dot(t, RX2); + d_A = XMVector3Dot(h_A, ARX2); + d_B = XMVectorSplatZ(h_B); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(u) x b(u) = (0, -r20, r10) + // d(A) = h(A) dot abs(0, r20, r10) + // d(B) = h(B) dot abs(0, r02, r01) + d = XMVector3Dot(t, XMVectorPermute(RX0, XMVectorNegate(RX0))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX0)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR0X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(u) x b(v) = (0, -r21, r11) + // d(A) = h(A) dot abs(0, r21, r11) + // d(B) = h(B) dot abs(r02, 0, r00) + d = XMVector3Dot(t, XMVectorPermute(RX1, XMVectorNegate(RX1))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX1)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR0X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(u) x b(w) = (0, -r22, r12) + // d(A) = h(A) dot abs(0, r22, r12) + // d(B) = h(B) dot abs(r01, r00, 0) + d = XMVector3Dot(t, XMVectorPermute(RX2, XMVectorNegate(RX2))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX2)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR0X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(v) x b(u) = (r20, 0, -r00) + // d(A) = h(A) dot abs(r20, 0, r00) + // d(B) = h(B) dot abs(0, r12, r11) + d = XMVector3Dot(t, XMVectorPermute(RX0, XMVectorNegate(RX0))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX0)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR1X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(v) x b(v) = (r21, 0, -r01) + // d(A) = h(A) dot abs(r21, 0, r01) + // d(B) = h(B) dot abs(r12, 0, r10) + d = XMVector3Dot(t, XMVectorPermute(RX1, XMVectorNegate(RX1))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX1)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR1X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(v) x b(w) = (r22, 0, -r02) + // d(A) = h(A) dot abs(r22, 0, r02) + // d(B) = h(B) dot abs(r11, r10, 0) + d = XMVector3Dot(t, XMVectorPermute(RX2, XMVectorNegate(RX2))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX2)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR1X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(w) x b(u) = (-r10, r00, 0) + // d(A) = h(A) dot abs(r10, r00, 0) + // d(B) = h(B) dot abs(0, r22, r21) + d = XMVector3Dot(t, XMVectorPermute(RX0, XMVectorNegate(RX0))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX0)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR2X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(w) x b(v) = (-r11, r01, 0) + // d(A) = h(A) dot abs(r11, r01, 0) + // d(B) = h(B) dot abs(r22, 0, r20) + d = XMVector3Dot(t, XMVectorPermute(RX1, XMVectorNegate(RX1))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX1)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR2X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // l = a(w) x b(w) = (-r12, r02, 0) + // d(A) = h(A) dot abs(r12, r02, 0) + // d(B) = h(B) dot abs(r21, r20, 0) + d = XMVector3Dot(t, XMVectorPermute(RX2, XMVectorNegate(RX2))); + d_A = XMVector3Dot(h_A, XMVectorSwizzle(ARX2)); + d_B = XMVector3Dot(h_B, XMVectorSwizzle(AR2X)); + NoIntersection = XMVectorOrInt(NoIntersection, + XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); + + // No seperating axis found, boxes must intersect. + return XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt()) ? true : false; +} + + +//----------------------------------------------------------------------------- +// Frustum vs. oriented box test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingOrientedBox::Intersects(const BoundingFrustum& fr) const noexcept +{ + return fr.Intersects(*this); +} + + +//----------------------------------------------------------------------------- +// Triangle vs. oriented box test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingOrientedBox::Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + // Load the box center & orientation. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Transform the triangle vertices into the space of the box. + XMVECTOR TV0 = XMVector3InverseRotate(XMVectorSubtract(V0, vCenter), vOrientation); + XMVECTOR TV1 = XMVector3InverseRotate(XMVectorSubtract(V1, vCenter), vOrientation); + XMVECTOR TV2 = XMVector3InverseRotate(XMVectorSubtract(V2, vCenter), vOrientation); + + BoundingBox box; + box.Center = XMFLOAT3(0.0f, 0.0f, 0.0f); + box.Extents = Extents; + + // Use the triangle vs axis aligned box intersection routine. + return box.Intersects(TV0, TV1, TV2); +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline PlaneIntersectionType XM_CALLCONV BoundingOrientedBox::Intersects(FXMVECTOR Plane) const noexcept +{ + assert(DirectX::Internal::XMPlaneIsUnit(Plane)); + + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation)); + + // Set w of the center to one so we can dot4 with a plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + // Build the 3x3 rotation matrix that defines the box axes. + XMMATRIX R = XMMatrixRotationQuaternion(BoxOrientation); + + XMVECTOR Outside, Inside; + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane, Outside, Inside); + + // If the box is outside any plane it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return FRONT; + + // If the box is inside all planes it is inside. + if (XMVector4EqualInt(Inside, XMVectorTrueInt())) + return BACK; + + // The box is not inside all planes or outside a plane it intersects. + return INTERSECTING; +} + + +//----------------------------------------------------------------------------- +// Compute the intersection of a ray (Origin, Direction) with an oriented box +// using the slabs method. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingOrientedBox::Intersects(FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept +{ + assert(DirectX::Internal::XMVector3IsUnit(Direction)); + + static const XMVECTORU32 SelectY = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0 } } }; + static const XMVECTORU32 SelectZ = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } }; + + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Get the boxes normalized side directions. + XMMATRIX R = XMMatrixRotationQuaternion(vOrientation); + + // Adjust ray origin to be relative to center of the box. + XMVECTOR TOrigin = XMVectorSubtract(vCenter, Origin); + + // Compute the dot product againt each axis of the box. + XMVECTOR AxisDotOrigin = XMVector3Dot(R.r[0], TOrigin); + AxisDotOrigin = XMVectorSelect(AxisDotOrigin, XMVector3Dot(R.r[1], TOrigin), SelectY); + AxisDotOrigin = XMVectorSelect(AxisDotOrigin, XMVector3Dot(R.r[2], TOrigin), SelectZ); + + XMVECTOR AxisDotDirection = XMVector3Dot(R.r[0], Direction); + AxisDotDirection = XMVectorSelect(AxisDotDirection, XMVector3Dot(R.r[1], Direction), SelectY); + AxisDotDirection = XMVectorSelect(AxisDotDirection, XMVector3Dot(R.r[2], Direction), SelectZ); + + // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the slab. + XMVECTOR IsParallel = XMVectorLessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon); + + // Test against all three axes simultaneously. + XMVECTOR InverseAxisDotDirection = XMVectorReciprocal(AxisDotDirection); + XMVECTOR t1 = XMVectorMultiply(XMVectorSubtract(AxisDotOrigin, vExtents), InverseAxisDotDirection); + XMVECTOR t2 = XMVectorMultiply(XMVectorAdd(AxisDotOrigin, vExtents), InverseAxisDotDirection); + + // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't + // use the results from any directions parallel to the slab. + XMVECTOR t_min = XMVectorSelect(XMVectorMin(t1, t2), g_FltMin, IsParallel); + XMVECTOR t_max = XMVectorSelect(XMVectorMax(t1, t2), g_FltMax, IsParallel); + + // t_min.x = maximum( t_min.x, t_min.y, t_min.z ); + // t_max.x = minimum( t_max.x, t_max.y, t_max.z ); + t_min = XMVectorMax(t_min, XMVectorSplatY(t_min)); // x = max(x,y) + t_min = XMVectorMax(t_min, XMVectorSplatZ(t_min)); // x = max(max(x,y),z) + t_max = XMVectorMin(t_max, XMVectorSplatY(t_max)); // x = min(x,y) + t_max = XMVectorMin(t_max, XMVectorSplatZ(t_max)); // x = min(min(x,y),z) + + // if ( t_min > t_max ) return false; + XMVECTOR NoIntersection = XMVectorGreater(XMVectorSplatX(t_min), XMVectorSplatX(t_max)); + + // if ( t_max < 0.0f ) return false; + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(XMVectorSplatX(t_max), XMVectorZero())); + + // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) return false; + XMVECTOR ParallelOverlap = XMVectorInBounds(AxisDotOrigin, vExtents); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorAndCInt(IsParallel, ParallelOverlap)); + + if (!DirectX::Internal::XMVector3AnyTrue(NoIntersection)) + { + // Store the x-component to *pDist + XMStoreFloat(&Dist, t_min); + return true; + } + + Dist = 0.f; + return false; +} + + +//----------------------------------------------------------------------------- +// Test an oriented box vs 6 planes (typically forming a frustum). +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingOrientedBox::ContainedBy( + FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2, + GXMVECTOR Plane3, + HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept +{ + // Load the box. + XMVECTOR vCenter = XMLoadFloat3(&Center); + XMVECTOR vExtents = XMLoadFloat3(&Extents); + XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation)); + + // Set w of the center to one so we can dot4 with a plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + // Build the 3x3 rotation matrix that defines the box axes. + XMMATRIX R = XMMatrixRotationQuaternion(BoxOrientation); + + XMVECTOR Outside, Inside; + + // Test against each plane. + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane0, Outside, Inside); + + XMVECTOR AnyOutside = Outside; + XMVECTOR AllInside = Inside; + + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane1, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane2, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane3, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane4, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectOrientedBoxPlane(vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane5, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + // If the box is outside any plane it is outside. + if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) + return DISJOINT; + + // If the box is inside all planes it is inside. + if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) + return CONTAINS; + + // The box is not inside all planes or outside a plane, it may intersect. + return INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Create oriented bounding box from axis-aligned bounding box +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingOrientedBox::CreateFromBoundingBox(BoundingOrientedBox& Out, const BoundingBox& box) noexcept +{ + Out.Center = box.Center; + Out.Extents = box.Extents; + Out.Orientation = XMFLOAT4(0.f, 0.f, 0.f, 1.f); +} + + +//----------------------------------------------------------------------------- +// Find the approximate minimum oriented bounding box containing a set of +// points. Exact computation of minimum oriented bounding box is possible but +// is slower and requires a more complex algorithm. +// The algorithm works by computing the inertia tensor of the points and then +// using the eigenvectors of the intertia tensor as the axes of the box. +// Computing the intertia tensor of the convex hull of the points will usually +// result in better bounding box but the computation is more complex. +// Exact computation of the minimum oriented bounding box is possible but the +// best know algorithm is O(N^3) and is significanly more complex to implement. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingOrientedBox::CreateFromPoints(BoundingOrientedBox& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride) noexcept +{ + assert(Count > 0); + assert(pPoints != nullptr); + + XMVECTOR CenterOfMass = XMVectorZero(); + + // Compute the center of mass and inertia tensor of the points. + for (size_t i = 0; i < Count; ++i) + { + XMVECTOR Point = XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride)); + + CenterOfMass = XMVectorAdd(CenterOfMass, Point); + } + + CenterOfMass = XMVectorMultiply(CenterOfMass, XMVectorReciprocal(XMVectorReplicate(float(Count)))); + + // Compute the inertia tensor of the points around the center of mass. + // Using the center of mass is not strictly necessary, but will hopefully + // improve the stability of finding the eigenvectors. + XMVECTOR XX_YY_ZZ = XMVectorZero(); + XMVECTOR XY_XZ_YZ = XMVectorZero(); + + for (size_t i = 0; i < Count; ++i) + { + XMVECTOR Point = XMVectorSubtract(XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride)), CenterOfMass); + + XX_YY_ZZ = XMVectorAdd(XX_YY_ZZ, XMVectorMultiply(Point, Point)); + + XMVECTOR XXY = XMVectorSwizzle(Point); + XMVECTOR YZZ = XMVectorSwizzle(Point); + + XY_XZ_YZ = XMVectorAdd(XY_XZ_YZ, XMVectorMultiply(XXY, YZZ)); + } + + XMVECTOR v1, v2, v3; + + // Compute the eigenvectors of the inertia tensor. + DirectX::Internal::CalculateEigenVectorsFromCovarianceMatrix(XMVectorGetX(XX_YY_ZZ), XMVectorGetY(XX_YY_ZZ), + XMVectorGetZ(XX_YY_ZZ), + XMVectorGetX(XY_XZ_YZ), XMVectorGetY(XY_XZ_YZ), + XMVectorGetZ(XY_XZ_YZ), + &v1, &v2, &v3); + + // Put them in a matrix. + XMMATRIX R; + + R.r[0] = XMVectorSetW(v1, 0.f); + R.r[1] = XMVectorSetW(v2, 0.f); + R.r[2] = XMVectorSetW(v3, 0.f); + R.r[3] = g_XMIdentityR3.v; + + // Multiply by -1 to convert the matrix into a right handed coordinate + // system (Det ~= 1) in case the eigenvectors form a left handed + // coordinate system (Det ~= -1) because XMQuaternionRotationMatrix only + // works on right handed matrices. + XMVECTOR Det = XMMatrixDeterminant(R); + + if (XMVector4Less(Det, XMVectorZero())) + { + R.r[0] = XMVectorMultiply(R.r[0], g_XMNegativeOne.v); + R.r[1] = XMVectorMultiply(R.r[1], g_XMNegativeOne.v); + R.r[2] = XMVectorMultiply(R.r[2], g_XMNegativeOne.v); + } + + // Get the rotation quaternion from the matrix. + XMVECTOR vOrientation = XMQuaternionRotationMatrix(R); + + // Make sure it is normal (in case the vectors are slightly non-orthogonal). + vOrientation = XMQuaternionNormalize(vOrientation); + + // Rebuild the rotation matrix from the quaternion. + R = XMMatrixRotationQuaternion(vOrientation); + + // Build the rotation into the rotated space. + XMMATRIX InverseR = XMMatrixTranspose(R); + + // Find the minimum OBB using the eigenvectors as the axes. + XMVECTOR vMin, vMax; + + vMin = vMax = XMVector3TransformNormal(XMLoadFloat3(pPoints), InverseR); + + for (size_t i = 1; i < Count; ++i) + { + XMVECTOR Point = XMVector3TransformNormal(XMLoadFloat3(reinterpret_cast(reinterpret_cast(pPoints) + i * Stride)), + InverseR); + + vMin = XMVectorMin(vMin, Point); + vMax = XMVectorMax(vMax, Point); + } + + // Rotate the center into world space. + XMVECTOR vCenter = XMVectorScale(XMVectorAdd(vMin, vMax), 0.5f); + vCenter = XMVector3TransformNormal(vCenter, R); + + // Store center, extents, and orientation. + XMStoreFloat3(&Out.Center, vCenter); + XMStoreFloat3(&Out.Extents, XMVectorScale(XMVectorSubtract(vMax, vMin), 0.5f)); + XMStoreFloat4(&Out.Orientation, vOrientation); +} + + +/**************************************************************************** + * + * BoundingFrustum + * + ****************************************************************************/ + +_Use_decl_annotations_ +inline BoundingFrustum::BoundingFrustum(CXMMATRIX Projection, bool rhcoords) noexcept +{ + CreateFromMatrix(*this, Projection, rhcoords); +} + + +//----------------------------------------------------------------------------- +// Transform a frustum by an angle preserving transform. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingFrustum::Transform(BoundingFrustum& Out, FXMMATRIX M) const noexcept +{ + // Load the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Composite the frustum rotation and the transform rotation + XMMATRIX nM; + nM.r[0] = XMVector3Normalize(M.r[0]); + nM.r[1] = XMVector3Normalize(M.r[1]); + nM.r[2] = XMVector3Normalize(M.r[2]); + nM.r[3] = g_XMIdentityR3; + XMVECTOR Rotation = XMQuaternionRotationMatrix(nM); + vOrientation = XMQuaternionMultiply(vOrientation, Rotation); + + // Transform the center. + vOrigin = XMVector3Transform(vOrigin, M); + + // Store the frustum. + XMStoreFloat3(&Out.Origin, vOrigin); + XMStoreFloat4(&Out.Orientation, vOrientation); + + // Scale the near and far distances (the slopes remain the same). + XMVECTOR dX = XMVector3Dot(M.r[0], M.r[0]); + XMVECTOR dY = XMVector3Dot(M.r[1], M.r[1]); + XMVECTOR dZ = XMVector3Dot(M.r[2], M.r[2]); + + XMVECTOR d = XMVectorMax(dX, XMVectorMax(dY, dZ)); + float Scale = sqrtf(XMVectorGetX(d)); + + Out.Near = Near * Scale; + Out.Far = Far * Scale; + + // Copy the slopes. + Out.RightSlope = RightSlope; + Out.LeftSlope = LeftSlope; + Out.TopSlope = TopSlope; + Out.BottomSlope = BottomSlope; +} + +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingFrustum::Transform(BoundingFrustum& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation) const noexcept +{ + assert(DirectX::Internal::XMQuaternionIsUnit(Rotation)); + + // Load the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Composite the frustum rotation and the transform rotation. + vOrientation = XMQuaternionMultiply(vOrientation, Rotation); + + // Transform the origin. + vOrigin = XMVectorAdd(XMVector3Rotate(XMVectorScale(vOrigin, Scale), Rotation), Translation); + + // Store the frustum. + XMStoreFloat3(&Out.Origin, vOrigin); + XMStoreFloat4(&Out.Orientation, vOrientation); + + // Scale the near and far distances (the slopes remain the same). + Out.Near = Near * Scale; + Out.Far = Far * Scale; + + // Copy the slopes. + Out.RightSlope = RightSlope; + Out.LeftSlope = LeftSlope; + Out.TopSlope = TopSlope; + Out.BottomSlope = BottomSlope; +} + + +//----------------------------------------------------------------------------- +// Get the corner points of the frustum +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingFrustum::GetCorners(XMFLOAT3* Corners) const noexcept +{ + assert(Corners != nullptr); + + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Build the corners of the frustum. + XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&Near); + XMVECTOR vFar = XMVectorReplicatePtr(&Far); + + // Returns 8 corners position of bounding frustum. + // Near Far + // 0----1 4----5 + // | | | | + // | | | | + // 3----2 7----6 + + XMVECTOR vCorners[CORNER_COUNT]; + vCorners[0] = XMVectorMultiply(vLeftTop, vNear); + vCorners[1] = XMVectorMultiply(vRightTop, vNear); + vCorners[2] = XMVectorMultiply(vRightBottom, vNear); + vCorners[3] = XMVectorMultiply(vLeftBottom, vNear); + vCorners[4] = XMVectorMultiply(vLeftTop, vFar); + vCorners[5] = XMVectorMultiply(vRightTop, vFar); + vCorners[6] = XMVectorMultiply(vRightBottom, vFar); + vCorners[7] = XMVectorMultiply(vLeftBottom, vFar); + + for (size_t i = 0; i < CORNER_COUNT; ++i) + { + XMVECTOR C = XMVectorAdd(XMVector3Rotate(vCorners[i], vOrientation), vOrigin); + XMStoreFloat3(&Corners[i], C); + } +} + + +//----------------------------------------------------------------------------- +// Point in frustum test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingFrustum::Contains(FXMVECTOR Point) const noexcept +{ + // Build frustum planes. + XMVECTOR Planes[6]; + Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + + // Load origin and orientation. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Transform point into local space of frustum. + XMVECTOR TPoint = XMVector3InverseRotate(XMVectorSubtract(Point, vOrigin), vOrientation); + + // Set w to one. + TPoint = XMVectorInsert<0, 0, 0, 0, 1>(TPoint, XMVectorSplatOne()); + + XMVECTOR Zero = XMVectorZero(); + XMVECTOR Outside = Zero; + + // Test point against each plane of the frustum. + for (size_t i = 0; i < 6; ++i) + { + XMVECTOR Dot = XMVector4Dot(TPoint, Planes[i]); + Outside = XMVectorOrInt(Outside, XMVectorGreater(Dot, Zero)); + } + + return XMVector4NotEqualInt(Outside, XMVectorTrueInt()) ? CONTAINS : DISJOINT; +} + + +//----------------------------------------------------------------------------- +// Triangle vs frustum test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingFrustum::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Create 6 planes (do it inline to encourage use of registers) + XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + NearPlane = DirectX::Internal::XMPlaneTransform(NearPlane, vOrientation, vOrigin); + NearPlane = XMPlaneNormalize(NearPlane); + + XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + FarPlane = DirectX::Internal::XMPlaneTransform(FarPlane, vOrientation, vOrigin); + FarPlane = XMPlaneNormalize(FarPlane); + + XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + RightPlane = DirectX::Internal::XMPlaneTransform(RightPlane, vOrientation, vOrigin); + RightPlane = XMPlaneNormalize(RightPlane); + + XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + LeftPlane = DirectX::Internal::XMPlaneTransform(LeftPlane, vOrientation, vOrigin); + LeftPlane = XMPlaneNormalize(LeftPlane); + + XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + TopPlane = DirectX::Internal::XMPlaneTransform(TopPlane, vOrientation, vOrigin); + TopPlane = XMPlaneNormalize(TopPlane); + + XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + BottomPlane = DirectX::Internal::XMPlaneTransform(BottomPlane, vOrientation, vOrigin); + BottomPlane = XMPlaneNormalize(BottomPlane); + + return TriangleTests::ContainedBy(V0, V1, V2, NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane); +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingFrustum::Contains(const BoundingSphere& sh) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Create 6 planes (do it inline to encourage use of registers) + XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + NearPlane = DirectX::Internal::XMPlaneTransform(NearPlane, vOrientation, vOrigin); + NearPlane = XMPlaneNormalize(NearPlane); + + XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + FarPlane = DirectX::Internal::XMPlaneTransform(FarPlane, vOrientation, vOrigin); + FarPlane = XMPlaneNormalize(FarPlane); + + XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + RightPlane = DirectX::Internal::XMPlaneTransform(RightPlane, vOrientation, vOrigin); + RightPlane = XMPlaneNormalize(RightPlane); + + XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + LeftPlane = DirectX::Internal::XMPlaneTransform(LeftPlane, vOrientation, vOrigin); + LeftPlane = XMPlaneNormalize(LeftPlane); + + XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + TopPlane = DirectX::Internal::XMPlaneTransform(TopPlane, vOrientation, vOrigin); + TopPlane = XMPlaneNormalize(TopPlane); + + XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + BottomPlane = DirectX::Internal::XMPlaneTransform(BottomPlane, vOrientation, vOrigin); + BottomPlane = XMPlaneNormalize(BottomPlane); + + return sh.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane); +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingFrustum::Contains(const BoundingBox& box) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Create 6 planes (do it inline to encourage use of registers) + XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + NearPlane = DirectX::Internal::XMPlaneTransform(NearPlane, vOrientation, vOrigin); + NearPlane = XMPlaneNormalize(NearPlane); + + XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + FarPlane = DirectX::Internal::XMPlaneTransform(FarPlane, vOrientation, vOrigin); + FarPlane = XMPlaneNormalize(FarPlane); + + XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + RightPlane = DirectX::Internal::XMPlaneTransform(RightPlane, vOrientation, vOrigin); + RightPlane = XMPlaneNormalize(RightPlane); + + XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + LeftPlane = DirectX::Internal::XMPlaneTransform(LeftPlane, vOrientation, vOrigin); + LeftPlane = XMPlaneNormalize(LeftPlane); + + XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + TopPlane = DirectX::Internal::XMPlaneTransform(TopPlane, vOrientation, vOrigin); + TopPlane = XMPlaneNormalize(TopPlane); + + XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + BottomPlane = DirectX::Internal::XMPlaneTransform(BottomPlane, vOrientation, vOrigin); + BottomPlane = XMPlaneNormalize(BottomPlane); + + return box.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane); +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingFrustum::Contains(const BoundingOrientedBox& box) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Create 6 planes (do it inline to encourage use of registers) + XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + NearPlane = DirectX::Internal::XMPlaneTransform(NearPlane, vOrientation, vOrigin); + NearPlane = XMPlaneNormalize(NearPlane); + + XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + FarPlane = DirectX::Internal::XMPlaneTransform(FarPlane, vOrientation, vOrigin); + FarPlane = XMPlaneNormalize(FarPlane); + + XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + RightPlane = DirectX::Internal::XMPlaneTransform(RightPlane, vOrientation, vOrigin); + RightPlane = XMPlaneNormalize(RightPlane); + + XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + LeftPlane = DirectX::Internal::XMPlaneTransform(LeftPlane, vOrientation, vOrigin); + LeftPlane = XMPlaneNormalize(LeftPlane); + + XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + TopPlane = DirectX::Internal::XMPlaneTransform(TopPlane, vOrientation, vOrigin); + TopPlane = XMPlaneNormalize(TopPlane); + + XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + BottomPlane = DirectX::Internal::XMPlaneTransform(BottomPlane, vOrientation, vOrigin); + BottomPlane = XMPlaneNormalize(BottomPlane); + + return box.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane); +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType BoundingFrustum::Contains(const BoundingFrustum& fr) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + // Create 6 planes (do it inline to encourage use of registers) + XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + NearPlane = DirectX::Internal::XMPlaneTransform(NearPlane, vOrientation, vOrigin); + NearPlane = XMPlaneNormalize(NearPlane); + + XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + FarPlane = DirectX::Internal::XMPlaneTransform(FarPlane, vOrientation, vOrigin); + FarPlane = XMPlaneNormalize(FarPlane); + + XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + RightPlane = DirectX::Internal::XMPlaneTransform(RightPlane, vOrientation, vOrigin); + RightPlane = XMPlaneNormalize(RightPlane); + + XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + LeftPlane = DirectX::Internal::XMPlaneTransform(LeftPlane, vOrientation, vOrigin); + LeftPlane = XMPlaneNormalize(LeftPlane); + + XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + TopPlane = DirectX::Internal::XMPlaneTransform(TopPlane, vOrientation, vOrigin); + TopPlane = XMPlaneNormalize(TopPlane); + + XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + BottomPlane = DirectX::Internal::XMPlaneTransform(BottomPlane, vOrientation, vOrigin); + BottomPlane = XMPlaneNormalize(BottomPlane); + + return fr.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane); +} + + +//----------------------------------------------------------------------------- +// Exact sphere vs frustum test. The algorithm first checks the sphere against +// the planes of the frustum, then if the plane checks were indeterminate finds +// the nearest feature (plane, line, point) on the frustum to the center of the +// sphere and compares the distance to the nearest feature to the radius of the +// sphere +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingFrustum::Intersects(const BoundingSphere& sh) const noexcept +{ + XMVECTOR Zero = XMVectorZero(); + + // Build the frustum planes. + XMVECTOR Planes[6]; + Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + + // Normalize the planes so we can compare to the sphere radius. + Planes[2] = XMVector3Normalize(Planes[2]); + Planes[3] = XMVector3Normalize(Planes[3]); + Planes[4] = XMVector3Normalize(Planes[4]); + Planes[5] = XMVector3Normalize(Planes[5]); + + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Load the sphere. + XMVECTOR vCenter = XMLoadFloat3(&sh.Center); + XMVECTOR vRadius = XMVectorReplicatePtr(&sh.Radius); + + // Transform the center of the sphere into the local space of frustum. + vCenter = XMVector3InverseRotate(XMVectorSubtract(vCenter, vOrigin), vOrientation); + + // Set w of the center to one so we can dot4 with the plane. + vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); + + // Check against each plane of the frustum. + XMVECTOR Outside = XMVectorFalseInt(); + XMVECTOR InsideAll = XMVectorTrueInt(); + XMVECTOR CenterInsideAll = XMVectorTrueInt(); + + XMVECTOR Dist[6]; + + for (size_t i = 0; i < 6; ++i) + { + Dist[i] = XMVector4Dot(vCenter, Planes[i]); + + // Outside the plane? + Outside = XMVectorOrInt(Outside, XMVectorGreater(Dist[i], vRadius)); + + // Fully inside the plane? + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(Dist[i], XMVectorNegate(vRadius))); + + // Check if the center is inside the plane. + CenterInsideAll = XMVectorAndInt(CenterInsideAll, XMVectorLessOrEqual(Dist[i], Zero)); + } + + // If the sphere is outside any of the planes it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If the sphere is inside all planes it is fully inside. + if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) + return true; + + // If the center of the sphere is inside all planes and the sphere intersects + // one or more planes then it must intersect. + if (XMVector4EqualInt(CenterInsideAll, XMVectorTrueInt())) + return true; + + // The sphere may be outside the frustum or intersecting the frustum. + // Find the nearest feature (face, edge, or corner) on the frustum + // to the sphere. + + // The faces adjacent to each face are: + static const size_t adjacent_faces[6][4] = + { + { 2, 3, 4, 5 }, // 0 + { 2, 3, 4, 5 }, // 1 + { 0, 1, 4, 5 }, // 2 + { 0, 1, 4, 5 }, // 3 + { 0, 1, 2, 3 }, // 4 + { 0, 1, 2, 3 } + }; // 5 + + XMVECTOR Intersects = XMVectorFalseInt(); + + // Check to see if the nearest feature is one of the planes. + for (size_t i = 0; i < 6; ++i) + { + // Find the nearest point on the plane to the center of the sphere. + XMVECTOR Point = XMVectorNegativeMultiplySubtract(Planes[i], Dist[i], vCenter); + + // Set w of the point to one. + Point = XMVectorInsert<0, 0, 0, 0, 1>(Point, XMVectorSplatOne()); + + // If the point is inside the face (inside the adjacent planes) then + // this plane is the nearest feature. + XMVECTOR InsideFace = XMVectorTrueInt(); + + for (size_t j = 0; j < 4; j++) + { + size_t plane_index = adjacent_faces[i][j]; + + InsideFace = XMVectorAndInt(InsideFace, + XMVectorLessOrEqual(XMVector4Dot(Point, Planes[plane_index]), Zero)); + } + + // Since we have already checked distance from the plane we know that the + // sphere must intersect if this plane is the nearest feature. + Intersects = XMVectorOrInt(Intersects, + XMVectorAndInt(XMVectorGreater(Dist[i], Zero), InsideFace)); + } + + if (XMVector4EqualInt(Intersects, XMVectorTrueInt())) + return true; + + // Build the corners of the frustum. + XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&Near); + XMVECTOR vFar = XMVectorReplicatePtr(&Far); + + XMVECTOR Corners[CORNER_COUNT]; + Corners[0] = XMVectorMultiply(vRightTop, vNear); + Corners[1] = XMVectorMultiply(vRightBottom, vNear); + Corners[2] = XMVectorMultiply(vLeftTop, vNear); + Corners[3] = XMVectorMultiply(vLeftBottom, vNear); + Corners[4] = XMVectorMultiply(vRightTop, vFar); + Corners[5] = XMVectorMultiply(vRightBottom, vFar); + Corners[6] = XMVectorMultiply(vLeftTop, vFar); + Corners[7] = XMVectorMultiply(vLeftBottom, vFar); + + // The Edges are: + static const size_t edges[12][2] = + { + { 0, 1 }, { 2, 3 }, { 0, 2 }, { 1, 3 }, // Near plane + { 4, 5 }, { 6, 7 }, { 4, 6 }, { 5, 7 }, // Far plane + { 0, 4 }, { 1, 5 }, { 2, 6 }, { 3, 7 }, + }; // Near to far + + XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); + + // Check to see if the nearest feature is one of the edges (or corners). + for (size_t i = 0; i < 12; ++i) + { + size_t ei0 = edges[i][0]; + size_t ei1 = edges[i][1]; + + // Find the nearest point on the edge to the center of the sphere. + // The corners of the frustum are included as the endpoints of the edges. + XMVECTOR Point = DirectX::Internal::PointOnLineSegmentNearestPoint(Corners[ei0], Corners[ei1], vCenter); + + XMVECTOR Delta = XMVectorSubtract(vCenter, Point); + + XMVECTOR DistSq = XMVector3Dot(Delta, Delta); + + // If the distance to the center of the sphere to the point is less than + // the radius of the sphere then it must intersect. + Intersects = XMVectorOrInt(Intersects, XMVectorLessOrEqual(DistSq, RadiusSq)); + } + + if (XMVector4EqualInt(Intersects, XMVectorTrueInt())) + return true; + + // The sphere must be outside the frustum. + return false; +} + + +//----------------------------------------------------------------------------- +// Exact axis aligned box vs frustum test. Constructs an oriented box and uses +// the oriented box vs frustum test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingFrustum::Intersects(const BoundingBox& box) const noexcept +{ + // Make the axis aligned box oriented and do an OBB vs frustum test. + BoundingOrientedBox obox(box.Center, box.Extents, XMFLOAT4(0.f, 0.f, 0.f, 1.f)); + return Intersects(obox); +} + + +//----------------------------------------------------------------------------- +// Exact oriented box vs frustum test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingFrustum::Intersects(const BoundingOrientedBox& box) const noexcept +{ + static const XMVECTORU32 SelectY = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0 } } }; + static const XMVECTORU32 SelectZ = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } }; + + XMVECTOR Zero = XMVectorZero(); + + // Build the frustum planes. + XMVECTOR Planes[6]; + Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR FrustumOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(FrustumOrientation)); + + // Load the box. + XMVECTOR Center = XMLoadFloat3(&box.Center); + XMVECTOR Extents = XMLoadFloat3(&box.Extents); + XMVECTOR BoxOrientation = XMLoadFloat4(&box.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(BoxOrientation)); + + // Transform the oriented box into the space of the frustum in order to + // minimize the number of transforms we have to do. + Center = XMVector3InverseRotate(XMVectorSubtract(Center, vOrigin), FrustumOrientation); + BoxOrientation = XMQuaternionMultiply(BoxOrientation, XMQuaternionConjugate(FrustumOrientation)); + + // Set w of the center to one so we can dot4 with the plane. + Center = XMVectorInsert<0, 0, 0, 0, 1>(Center, XMVectorSplatOne()); + + // Build the 3x3 rotation matrix that defines the box axes. + XMMATRIX R = XMMatrixRotationQuaternion(BoxOrientation); + + // Check against each plane of the frustum. + XMVECTOR Outside = XMVectorFalseInt(); + XMVECTOR InsideAll = XMVectorTrueInt(); + XMVECTOR CenterInsideAll = XMVectorTrueInt(); + + for (size_t i = 0; i < 6; ++i) + { + // Compute the distance to the center of the box. + XMVECTOR Dist = XMVector4Dot(Center, Planes[i]); + + // Project the axes of the box onto the normal of the plane. Half the + // length of the projection (sometime called the "radius") is equal to + // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w)) + // where h(i) are extents of the box, n is the plane normal, and b(i) are the + // axes of the box. + XMVECTOR Radius = XMVector3Dot(Planes[i], R.r[0]); + Radius = XMVectorSelect(Radius, XMVector3Dot(Planes[i], R.r[1]), SelectY); + Radius = XMVectorSelect(Radius, XMVector3Dot(Planes[i], R.r[2]), SelectZ); + Radius = XMVector3Dot(Extents, XMVectorAbs(Radius)); + + // Outside the plane? + Outside = XMVectorOrInt(Outside, XMVectorGreater(Dist, Radius)); + + // Fully inside the plane? + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(Dist, XMVectorNegate(Radius))); + + // Check if the center is inside the plane. + CenterInsideAll = XMVectorAndInt(CenterInsideAll, XMVectorLessOrEqual(Dist, Zero)); + } + + // If the box is outside any of the planes it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If the box is inside all planes it is fully inside. + if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) + return true; + + // If the center of the box is inside all planes and the box intersects + // one or more planes then it must intersect. + if (XMVector4EqualInt(CenterInsideAll, XMVectorTrueInt())) + return true; + + // Build the corners of the frustum. + XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&Near); + XMVECTOR vFar = XMVectorReplicatePtr(&Far); + + XMVECTOR Corners[CORNER_COUNT]; + Corners[0] = XMVectorMultiply(vRightTop, vNear); + Corners[1] = XMVectorMultiply(vRightBottom, vNear); + Corners[2] = XMVectorMultiply(vLeftTop, vNear); + Corners[3] = XMVectorMultiply(vLeftBottom, vNear); + Corners[4] = XMVectorMultiply(vRightTop, vFar); + Corners[5] = XMVectorMultiply(vRightBottom, vFar); + Corners[6] = XMVectorMultiply(vLeftTop, vFar); + Corners[7] = XMVectorMultiply(vLeftBottom, vFar); + + // Test against box axes (3) + { + // Find the min/max values of the projection of the frustum onto each axis. + XMVECTOR FrustumMin, FrustumMax; + + FrustumMin = XMVector3Dot(Corners[0], R.r[0]); + FrustumMin = XMVectorSelect(FrustumMin, XMVector3Dot(Corners[0], R.r[1]), SelectY); + FrustumMin = XMVectorSelect(FrustumMin, XMVector3Dot(Corners[0], R.r[2]), SelectZ); + FrustumMax = FrustumMin; + + for (size_t i = 1; i < BoundingOrientedBox::CORNER_COUNT; ++i) + { + XMVECTOR Temp = XMVector3Dot(Corners[i], R.r[0]); + Temp = XMVectorSelect(Temp, XMVector3Dot(Corners[i], R.r[1]), SelectY); + Temp = XMVectorSelect(Temp, XMVector3Dot(Corners[i], R.r[2]), SelectZ); + + FrustumMin = XMVectorMin(FrustumMin, Temp); + FrustumMax = XMVectorMax(FrustumMax, Temp); + } + + // Project the center of the box onto the axes. + XMVECTOR BoxDist = XMVector3Dot(Center, R.r[0]); + BoxDist = XMVectorSelect(BoxDist, XMVector3Dot(Center, R.r[1]), SelectY); + BoxDist = XMVectorSelect(BoxDist, XMVector3Dot(Center, R.r[2]), SelectZ); + + // The projection of the box onto the axis is just its Center and Extents. + // if (min > box_max || max < box_min) reject; + XMVECTOR Result = XMVectorOrInt(XMVectorGreater(FrustumMin, XMVectorAdd(BoxDist, Extents)), + XMVectorLess(FrustumMax, XMVectorSubtract(BoxDist, Extents))); + + if (DirectX::Internal::XMVector3AnyTrue(Result)) + return false; + } + + // Test against edge/edge axes (3*6). + XMVECTOR FrustumEdgeAxis[6]; + + FrustumEdgeAxis[0] = vRightTop; + FrustumEdgeAxis[1] = vRightBottom; + FrustumEdgeAxis[2] = vLeftTop; + FrustumEdgeAxis[3] = vLeftBottom; + FrustumEdgeAxis[4] = XMVectorSubtract(vRightTop, vLeftTop); + FrustumEdgeAxis[5] = XMVectorSubtract(vLeftBottom, vLeftTop); + + for (size_t i = 0; i < 3; ++i) + { + for (size_t j = 0; j < 6; j++) + { + // Compute the axis we are going to test. + XMVECTOR Axis = XMVector3Cross(R.r[i], FrustumEdgeAxis[j]); + + // Find the min/max values of the projection of the frustum onto the axis. + XMVECTOR FrustumMin, FrustumMax; + + FrustumMin = FrustumMax = XMVector3Dot(Axis, Corners[0]); + + for (size_t k = 1; k < CORNER_COUNT; k++) + { + XMVECTOR Temp = XMVector3Dot(Axis, Corners[k]); + FrustumMin = XMVectorMin(FrustumMin, Temp); + FrustumMax = XMVectorMax(FrustumMax, Temp); + } + + // Project the center of the box onto the axis. + XMVECTOR Dist = XMVector3Dot(Center, Axis); + + // Project the axes of the box onto the axis to find the "radius" of the box. + XMVECTOR Radius = XMVector3Dot(Axis, R.r[0]); + Radius = XMVectorSelect(Radius, XMVector3Dot(Axis, R.r[1]), SelectY); + Radius = XMVectorSelect(Radius, XMVector3Dot(Axis, R.r[2]), SelectZ); + Radius = XMVector3Dot(Extents, XMVectorAbs(Radius)); + + // if (center > max + radius || center < min - radius) reject; + Outside = XMVectorOrInt(Outside, XMVectorGreater(Dist, XMVectorAdd(FrustumMax, Radius))); + Outside = XMVectorOrInt(Outside, XMVectorLess(Dist, XMVectorSubtract(FrustumMin, Radius))); + } + } + + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If we did not find a separating plane then the box must intersect the frustum. + return true; +} + + +//----------------------------------------------------------------------------- +// Exact frustum vs frustum test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool BoundingFrustum::Intersects(const BoundingFrustum& fr) const noexcept +{ + // Load origin and orientation of frustum B. + XMVECTOR OriginB = XMLoadFloat3(&Origin); + XMVECTOR OrientationB = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(OrientationB)); + + // Build the planes of frustum B. + XMVECTOR AxisB[6]; + AxisB[0] = XMVectorSet(0.0f, 0.0f, -1.0f, 0.0f); + AxisB[1] = XMVectorSet(0.0f, 0.0f, 1.0f, 0.0f); + AxisB[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + AxisB[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + AxisB[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + AxisB[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + + XMVECTOR PlaneDistB[6]; + PlaneDistB[0] = XMVectorNegate(XMVectorReplicatePtr(&Near)); + PlaneDistB[1] = XMVectorReplicatePtr(&Far); + PlaneDistB[2] = XMVectorZero(); + PlaneDistB[3] = XMVectorZero(); + PlaneDistB[4] = XMVectorZero(); + PlaneDistB[5] = XMVectorZero(); + + // Load origin and orientation of frustum A. + XMVECTOR OriginA = XMLoadFloat3(&fr.Origin); + XMVECTOR OrientationA = XMLoadFloat4(&fr.Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(OrientationA)); + + // Transform frustum A into the space of the frustum B in order to + // minimize the number of transforms we have to do. + OriginA = XMVector3InverseRotate(XMVectorSubtract(OriginA, OriginB), OrientationB); + OrientationA = XMQuaternionMultiply(OrientationA, XMQuaternionConjugate(OrientationB)); + + // Build the corners of frustum A (in the local space of B). + XMVECTOR RightTopA = XMVectorSet(fr.RightSlope, fr.TopSlope, 1.0f, 0.0f); + XMVECTOR RightBottomA = XMVectorSet(fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f); + XMVECTOR LeftTopA = XMVectorSet(fr.LeftSlope, fr.TopSlope, 1.0f, 0.0f); + XMVECTOR LeftBottomA = XMVectorSet(fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f); + XMVECTOR NearA = XMVectorReplicatePtr(&fr.Near); + XMVECTOR FarA = XMVectorReplicatePtr(&fr.Far); + + RightTopA = XMVector3Rotate(RightTopA, OrientationA); + RightBottomA = XMVector3Rotate(RightBottomA, OrientationA); + LeftTopA = XMVector3Rotate(LeftTopA, OrientationA); + LeftBottomA = XMVector3Rotate(LeftBottomA, OrientationA); + + XMVECTOR CornersA[CORNER_COUNT]; + CornersA[0] = XMVectorMultiplyAdd(RightTopA, NearA, OriginA); + CornersA[1] = XMVectorMultiplyAdd(RightBottomA, NearA, OriginA); + CornersA[2] = XMVectorMultiplyAdd(LeftTopA, NearA, OriginA); + CornersA[3] = XMVectorMultiplyAdd(LeftBottomA, NearA, OriginA); + CornersA[4] = XMVectorMultiplyAdd(RightTopA, FarA, OriginA); + CornersA[5] = XMVectorMultiplyAdd(RightBottomA, FarA, OriginA); + CornersA[6] = XMVectorMultiplyAdd(LeftTopA, FarA, OriginA); + CornersA[7] = XMVectorMultiplyAdd(LeftBottomA, FarA, OriginA); + + // Check frustum A against each plane of frustum B. + XMVECTOR Outside = XMVectorFalseInt(); + XMVECTOR InsideAll = XMVectorTrueInt(); + + for (size_t i = 0; i < 6; ++i) + { + // Find the min/max projection of the frustum onto the plane normal. + XMVECTOR Min, Max; + + Min = Max = XMVector3Dot(AxisB[i], CornersA[0]); + + for (size_t j = 1; j < CORNER_COUNT; j++) + { + XMVECTOR Temp = XMVector3Dot(AxisB[i], CornersA[j]); + Min = XMVectorMin(Min, Temp); + Max = XMVectorMax(Max, Temp); + } + + // Outside the plane? + Outside = XMVectorOrInt(Outside, XMVectorGreater(Min, PlaneDistB[i])); + + // Fully inside the plane? + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(Max, PlaneDistB[i])); + } + + // If the frustum A is outside any of the planes of frustum B it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If frustum A is inside all planes of frustum B it is fully inside. + if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) + return true; + + // Build the corners of frustum B. + XMVECTOR RightTopB = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR RightBottomB = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR LeftTopB = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR LeftBottomB = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR NearB = XMVectorReplicatePtr(&Near); + XMVECTOR FarB = XMVectorReplicatePtr(&Far); + + XMVECTOR CornersB[BoundingFrustum::CORNER_COUNT]; + CornersB[0] = XMVectorMultiply(RightTopB, NearB); + CornersB[1] = XMVectorMultiply(RightBottomB, NearB); + CornersB[2] = XMVectorMultiply(LeftTopB, NearB); + CornersB[3] = XMVectorMultiply(LeftBottomB, NearB); + CornersB[4] = XMVectorMultiply(RightTopB, FarB); + CornersB[5] = XMVectorMultiply(RightBottomB, FarB); + CornersB[6] = XMVectorMultiply(LeftTopB, FarB); + CornersB[7] = XMVectorMultiply(LeftBottomB, FarB); + + // Build the planes of frustum A (in the local space of B). + XMVECTOR AxisA[6]; + XMVECTOR PlaneDistA[6]; + + AxisA[0] = XMVectorSet(0.0f, 0.0f, -1.0f, 0.0f); + AxisA[1] = XMVectorSet(0.0f, 0.0f, 1.0f, 0.0f); + AxisA[2] = XMVectorSet(1.0f, 0.0f, -fr.RightSlope, 0.0f); + AxisA[3] = XMVectorSet(-1.0f, 0.0f, fr.LeftSlope, 0.0f); + AxisA[4] = XMVectorSet(0.0f, 1.0f, -fr.TopSlope, 0.0f); + AxisA[5] = XMVectorSet(0.0f, -1.0f, fr.BottomSlope, 0.0f); + + AxisA[0] = XMVector3Rotate(AxisA[0], OrientationA); + AxisA[1] = XMVectorNegate(AxisA[0]); + AxisA[2] = XMVector3Rotate(AxisA[2], OrientationA); + AxisA[3] = XMVector3Rotate(AxisA[3], OrientationA); + AxisA[4] = XMVector3Rotate(AxisA[4], OrientationA); + AxisA[5] = XMVector3Rotate(AxisA[5], OrientationA); + + PlaneDistA[0] = XMVector3Dot(AxisA[0], CornersA[0]); // Re-use corner on near plane. + PlaneDistA[1] = XMVector3Dot(AxisA[1], CornersA[4]); // Re-use corner on far plane. + PlaneDistA[2] = XMVector3Dot(AxisA[2], OriginA); + PlaneDistA[3] = XMVector3Dot(AxisA[3], OriginA); + PlaneDistA[4] = XMVector3Dot(AxisA[4], OriginA); + PlaneDistA[5] = XMVector3Dot(AxisA[5], OriginA); + + // Check each axis of frustum A for a seperating plane (5). + for (size_t i = 0; i < 6; ++i) + { + // Find the minimum projection of the frustum onto the plane normal. + XMVECTOR Min; + + Min = XMVector3Dot(AxisA[i], CornersB[0]); + + for (size_t j = 1; j < CORNER_COUNT; j++) + { + XMVECTOR Temp = XMVector3Dot(AxisA[i], CornersB[j]); + Min = XMVectorMin(Min, Temp); + } + + // Outside the plane? + Outside = XMVectorOrInt(Outside, XMVectorGreater(Min, PlaneDistA[i])); + } + + // If the frustum B is outside any of the planes of frustum A it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // Check edge/edge axes (6 * 6). + XMVECTOR FrustumEdgeAxisA[6]; + FrustumEdgeAxisA[0] = RightTopA; + FrustumEdgeAxisA[1] = RightBottomA; + FrustumEdgeAxisA[2] = LeftTopA; + FrustumEdgeAxisA[3] = LeftBottomA; + FrustumEdgeAxisA[4] = XMVectorSubtract(RightTopA, LeftTopA); + FrustumEdgeAxisA[5] = XMVectorSubtract(LeftBottomA, LeftTopA); + + XMVECTOR FrustumEdgeAxisB[6]; + FrustumEdgeAxisB[0] = RightTopB; + FrustumEdgeAxisB[1] = RightBottomB; + FrustumEdgeAxisB[2] = LeftTopB; + FrustumEdgeAxisB[3] = LeftBottomB; + FrustumEdgeAxisB[4] = XMVectorSubtract(RightTopB, LeftTopB); + FrustumEdgeAxisB[5] = XMVectorSubtract(LeftBottomB, LeftTopB); + + for (size_t i = 0; i < 6; ++i) + { + for (size_t j = 0; j < 6; j++) + { + // Compute the axis we are going to test. + XMVECTOR Axis = XMVector3Cross(FrustumEdgeAxisA[i], FrustumEdgeAxisB[j]); + + // Find the min/max values of the projection of both frustums onto the axis. + XMVECTOR MinA, MaxA; + XMVECTOR MinB, MaxB; + + MinA = MaxA = XMVector3Dot(Axis, CornersA[0]); + MinB = MaxB = XMVector3Dot(Axis, CornersB[0]); + + for (size_t k = 1; k < CORNER_COUNT; k++) + { + XMVECTOR TempA = XMVector3Dot(Axis, CornersA[k]); + MinA = XMVectorMin(MinA, TempA); + MaxA = XMVectorMax(MaxA, TempA); + + XMVECTOR TempB = XMVector3Dot(Axis, CornersB[k]); + MinB = XMVectorMin(MinB, TempB); + MaxB = XMVectorMax(MaxB, TempB); + } + + // if (MinA > MaxB || MinB > MaxA) reject + Outside = XMVectorOrInt(Outside, XMVectorGreater(MinA, MaxB)); + Outside = XMVectorOrInt(Outside, XMVectorGreater(MinB, MaxA)); + } + } + + // If there is a seperating plane, then the frustums do not intersect. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If we did not find a separating plane then the frustums intersect. + return true; +} + + +//----------------------------------------------------------------------------- +// Triangle vs frustum test. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingFrustum::Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept +{ + // Build the frustum planes (NOTE: D is negated from the usual). + XMVECTOR Planes[6]; + Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, -Near); + Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, Far); + Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Transform triangle into the local space of frustum. + XMVECTOR TV0 = XMVector3InverseRotate(XMVectorSubtract(V0, vOrigin), vOrientation); + XMVECTOR TV1 = XMVector3InverseRotate(XMVectorSubtract(V1, vOrigin), vOrientation); + XMVECTOR TV2 = XMVector3InverseRotate(XMVectorSubtract(V2, vOrigin), vOrientation); + + // Test each vertex of the triangle against the frustum planes. + XMVECTOR Outside = XMVectorFalseInt(); + XMVECTOR InsideAll = XMVectorTrueInt(); + + for (size_t i = 0; i < 6; ++i) + { + XMVECTOR Dist0 = XMVector3Dot(TV0, Planes[i]); + XMVECTOR Dist1 = XMVector3Dot(TV1, Planes[i]); + XMVECTOR Dist2 = XMVector3Dot(TV2, Planes[i]); + + XMVECTOR MinDist = XMVectorMin(Dist0, Dist1); + MinDist = XMVectorMin(MinDist, Dist2); + XMVECTOR MaxDist = XMVectorMax(Dist0, Dist1); + MaxDist = XMVectorMax(MaxDist, Dist2); + + XMVECTOR PlaneDist = XMVectorSplatW(Planes[i]); + + // Outside the plane? + Outside = XMVectorOrInt(Outside, XMVectorGreater(MinDist, PlaneDist)); + + // Fully inside the plane? + InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(MaxDist, PlaneDist)); + } + + // If the triangle is outside any of the planes it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If the triangle is inside all planes it is fully inside. + if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) + return true; + + // Build the corners of the frustum. + XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&Near); + XMVECTOR vFar = XMVectorReplicatePtr(&Far); + + XMVECTOR Corners[CORNER_COUNT]; + Corners[0] = XMVectorMultiply(vRightTop, vNear); + Corners[1] = XMVectorMultiply(vRightBottom, vNear); + Corners[2] = XMVectorMultiply(vLeftTop, vNear); + Corners[3] = XMVectorMultiply(vLeftBottom, vNear); + Corners[4] = XMVectorMultiply(vRightTop, vFar); + Corners[5] = XMVectorMultiply(vRightBottom, vFar); + Corners[6] = XMVectorMultiply(vLeftTop, vFar); + Corners[7] = XMVectorMultiply(vLeftBottom, vFar); + + // Test the plane of the triangle. + XMVECTOR Normal = XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0)); + XMVECTOR Dist = XMVector3Dot(Normal, V0); + + XMVECTOR MinDist, MaxDist; + MinDist = MaxDist = XMVector3Dot(Corners[0], Normal); + for (size_t i = 1; i < CORNER_COUNT; ++i) + { + XMVECTOR Temp = XMVector3Dot(Corners[i], Normal); + MinDist = XMVectorMin(MinDist, Temp); + MaxDist = XMVectorMax(MaxDist, Temp); + } + + Outside = XMVectorOrInt(XMVectorGreater(MinDist, Dist), XMVectorLess(MaxDist, Dist)); + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // Check the edge/edge axes (3*6). + XMVECTOR TriangleEdgeAxis[3]; + TriangleEdgeAxis[0] = XMVectorSubtract(V1, V0); + TriangleEdgeAxis[1] = XMVectorSubtract(V2, V1); + TriangleEdgeAxis[2] = XMVectorSubtract(V0, V2); + + XMVECTOR FrustumEdgeAxis[6]; + FrustumEdgeAxis[0] = vRightTop; + FrustumEdgeAxis[1] = vRightBottom; + FrustumEdgeAxis[2] = vLeftTop; + FrustumEdgeAxis[3] = vLeftBottom; + FrustumEdgeAxis[4] = XMVectorSubtract(vRightTop, vLeftTop); + FrustumEdgeAxis[5] = XMVectorSubtract(vLeftBottom, vLeftTop); + + for (size_t i = 0; i < 3; ++i) + { + for (size_t j = 0; j < 6; j++) + { + // Compute the axis we are going to test. + XMVECTOR Axis = XMVector3Cross(TriangleEdgeAxis[i], FrustumEdgeAxis[j]); + + // Find the min/max of the projection of the triangle onto the axis. + XMVECTOR MinA, MaxA; + + XMVECTOR Dist0 = XMVector3Dot(V0, Axis); + XMVECTOR Dist1 = XMVector3Dot(V1, Axis); + XMVECTOR Dist2 = XMVector3Dot(V2, Axis); + + MinA = XMVectorMin(Dist0, Dist1); + MinA = XMVectorMin(MinA, Dist2); + MaxA = XMVectorMax(Dist0, Dist1); + MaxA = XMVectorMax(MaxA, Dist2); + + // Find the min/max of the projection of the frustum onto the axis. + XMVECTOR MinB, MaxB; + + MinB = MaxB = XMVector3Dot(Axis, Corners[0]); + + for (size_t k = 1; k < CORNER_COUNT; k++) + { + XMVECTOR Temp = XMVector3Dot(Axis, Corners[k]); + MinB = XMVectorMin(MinB, Temp); + MaxB = XMVectorMax(MaxB, Temp); + } + + // if (MinA > MaxB || MinB > MaxA) reject; + Outside = XMVectorOrInt(Outside, XMVectorGreater(MinA, MaxB)); + Outside = XMVectorOrInt(Outside, XMVectorGreater(MinB, MaxA)); + } + } + + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return false; + + // If we did not find a separating plane then the triangle must intersect the frustum. + return true; +} + + +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline PlaneIntersectionType XM_CALLCONV BoundingFrustum::Intersects(FXMVECTOR Plane) const noexcept +{ + assert(DirectX::Internal::XMPlaneIsUnit(Plane)); + + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Set w of the origin to one so we can dot4 with a plane. + vOrigin = XMVectorInsert<0, 0, 0, 0, 1>(vOrigin, XMVectorSplatOne()); + + // Build the corners of the frustum (in world space). + XMVECTOR RightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR RightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR LeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR LeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&Near); + XMVECTOR vFar = XMVectorReplicatePtr(&Far); + + RightTop = XMVector3Rotate(RightTop, vOrientation); + RightBottom = XMVector3Rotate(RightBottom, vOrientation); + LeftTop = XMVector3Rotate(LeftTop, vOrientation); + LeftBottom = XMVector3Rotate(LeftBottom, vOrientation); + + XMVECTOR Corners0 = XMVectorMultiplyAdd(RightTop, vNear, vOrigin); + XMVECTOR Corners1 = XMVectorMultiplyAdd(RightBottom, vNear, vOrigin); + XMVECTOR Corners2 = XMVectorMultiplyAdd(LeftTop, vNear, vOrigin); + XMVECTOR Corners3 = XMVectorMultiplyAdd(LeftBottom, vNear, vOrigin); + XMVECTOR Corners4 = XMVectorMultiplyAdd(RightTop, vFar, vOrigin); + XMVECTOR Corners5 = XMVectorMultiplyAdd(RightBottom, vFar, vOrigin); + XMVECTOR Corners6 = XMVectorMultiplyAdd(LeftTop, vFar, vOrigin); + XMVECTOR Corners7 = XMVectorMultiplyAdd(LeftBottom, vFar, vOrigin); + + XMVECTOR Outside, Inside; + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane, Outside, Inside); + + // If the frustum is outside any plane it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return FRONT; + + // If the frustum is inside all planes it is inside. + if (XMVector4EqualInt(Inside, XMVectorTrueInt())) + return BACK; + + // The frustum is not inside all planes or outside a plane it intersects. + return INTERSECTING; +} + + +//----------------------------------------------------------------------------- +// Ray vs. frustum test +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline bool XM_CALLCONV BoundingFrustum::Intersects(FXMVECTOR rayOrigin, FXMVECTOR Direction, float& Dist) const noexcept +{ + // If ray starts inside the frustum, return a distance of 0 for the hit + if (Contains(rayOrigin) == CONTAINS) + { + Dist = 0.0f; + return true; + } + + // Build the frustum planes. + XMVECTOR Planes[6]; + Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + + // Load origin and orientation of the frustum. + XMVECTOR frOrigin = XMLoadFloat3(&Origin); + XMVECTOR frOrientation = XMLoadFloat4(&Orientation); + + // This algorithm based on "Fast Ray-Convex Polyhedron Intersectin," in James Arvo, ed., Graphics Gems II pp. 247-250 + float tnear = -FLT_MAX; + float tfar = FLT_MAX; + + for (size_t i = 0; i < 6; ++i) + { + XMVECTOR Plane = DirectX::Internal::XMPlaneTransform(Planes[i], frOrientation, frOrigin); + Plane = XMPlaneNormalize(Plane); + + XMVECTOR AxisDotOrigin = XMPlaneDotCoord(Plane, rayOrigin); + XMVECTOR AxisDotDirection = XMVector3Dot(Plane, Direction); + + if (XMVector3LessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon)) + { + // Ray is parallel to plane - check if ray origin is inside plane's + if (XMVector3Greater(AxisDotOrigin, g_XMZero)) + { + // Ray origin is outside half-space. + Dist = 0.f; + return false; + } + } + else + { + // Ray not parallel - get distance to plane. + float vd = XMVectorGetX(AxisDotDirection); + float vn = XMVectorGetX(AxisDotOrigin); + float t = -vn / vd; + if (vd < 0.0f) + { + // Front face - T is a near point. + if (t > tfar) + { + Dist = 0.f; + return false; + } + if (t > tnear) + { + // Hit near face. + tnear = t; + } + } + else + { + // back face - T is far point. + if (t < tnear) + { + Dist = 0.f; + return false; + } + if (t < tfar) + { + // Hit far face. + tfar = t; + } + } + } + } + + // Survived all tests. + // Note: if ray originates on polyhedron, may want to change 0.0f to some + // epsilon to avoid intersecting the originating face. + float distance = (tnear >= 0.0f) ? tnear : tfar; + if (distance >= 0.0f) + { + Dist = distance; + return true; + } + + Dist = 0.f; + return false; +} + + +//----------------------------------------------------------------------------- +// Test a frustum vs 6 planes (typically forming another frustum). +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline ContainmentType XM_CALLCONV BoundingFrustum::ContainedBy( + FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2, + GXMVECTOR Plane3, + HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + assert(DirectX::Internal::XMQuaternionIsUnit(vOrientation)); + + // Set w of the origin to one so we can dot4 with a plane. + vOrigin = XMVectorInsert<0, 0, 0, 0, 1>(vOrigin, XMVectorSplatOne()); + + // Build the corners of the frustum (in world space). + XMVECTOR RightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR RightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR LeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); + XMVECTOR LeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); + XMVECTOR vNear = XMVectorReplicatePtr(&Near); + XMVECTOR vFar = XMVectorReplicatePtr(&Far); + + RightTop = XMVector3Rotate(RightTop, vOrientation); + RightBottom = XMVector3Rotate(RightBottom, vOrientation); + LeftTop = XMVector3Rotate(LeftTop, vOrientation); + LeftBottom = XMVector3Rotate(LeftBottom, vOrientation); + + XMVECTOR Corners0 = XMVectorMultiplyAdd(RightTop, vNear, vOrigin); + XMVECTOR Corners1 = XMVectorMultiplyAdd(RightBottom, vNear, vOrigin); + XMVECTOR Corners2 = XMVectorMultiplyAdd(LeftTop, vNear, vOrigin); + XMVECTOR Corners3 = XMVectorMultiplyAdd(LeftBottom, vNear, vOrigin); + XMVECTOR Corners4 = XMVectorMultiplyAdd(RightTop, vFar, vOrigin); + XMVECTOR Corners5 = XMVectorMultiplyAdd(RightBottom, vFar, vOrigin); + XMVECTOR Corners6 = XMVectorMultiplyAdd(LeftTop, vFar, vOrigin); + XMVECTOR Corners7 = XMVectorMultiplyAdd(LeftBottom, vFar, vOrigin); + + XMVECTOR Outside, Inside; + + // Test against each plane. + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane0, Outside, Inside); + + XMVECTOR AnyOutside = Outside; + XMVECTOR AllInside = Inside; + + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane1, Outside, Inside); + + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane2, Outside, Inside); + + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane3, Outside, Inside); + + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane4, Outside, Inside); + + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectFrustumPlane(Corners0, Corners1, Corners2, Corners3, + Corners4, Corners5, Corners6, Corners7, + Plane5, Outside, Inside); + + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + // If the frustum is outside any plane it is outside. + if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) + return DISJOINT; + + // If the frustum is inside all planes it is inside. + if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) + return CONTAINS; + + // The frustum is not inside all planes or outside a plane, it may intersect. + return INTERSECTS; +} + + +//----------------------------------------------------------------------------- +// Build the 6 frustum planes from a frustum. +// +// The intended use for these routines is for fast culling to a view frustum. +// When the volume being tested against a view frustum is small relative to the +// view frustum it is usually either inside all six planes of the frustum +// (CONTAINS) or outside one of the planes of the frustum (DISJOINT). If neither +// of these cases is true then it may or may not be intersecting the frustum +// (INTERSECTS) +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void BoundingFrustum::GetPlanes(XMVECTOR* NearPlane, XMVECTOR* FarPlane, XMVECTOR* RightPlane, + XMVECTOR* LeftPlane, XMVECTOR* TopPlane, XMVECTOR* BottomPlane) const noexcept +{ + // Load origin and orientation of the frustum. + XMVECTOR vOrigin = XMLoadFloat3(&Origin); + XMVECTOR vOrientation = XMLoadFloat4(&Orientation); + + if (NearPlane) + { + XMVECTOR vNearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); + vNearPlane = DirectX::Internal::XMPlaneTransform(vNearPlane, vOrientation, vOrigin); + *NearPlane = XMPlaneNormalize(vNearPlane); + } + + if (FarPlane) + { + XMVECTOR vFarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); + vFarPlane = DirectX::Internal::XMPlaneTransform(vFarPlane, vOrientation, vOrigin); + *FarPlane = XMPlaneNormalize(vFarPlane); + } + + if (RightPlane) + { + XMVECTOR vRightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); + vRightPlane = DirectX::Internal::XMPlaneTransform(vRightPlane, vOrientation, vOrigin); + *RightPlane = XMPlaneNormalize(vRightPlane); + } + + if (LeftPlane) + { + XMVECTOR vLeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); + vLeftPlane = DirectX::Internal::XMPlaneTransform(vLeftPlane, vOrientation, vOrigin); + *LeftPlane = XMPlaneNormalize(vLeftPlane); + } + + if (TopPlane) + { + XMVECTOR vTopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); + vTopPlane = DirectX::Internal::XMPlaneTransform(vTopPlane, vOrientation, vOrigin); + *TopPlane = XMPlaneNormalize(vTopPlane); + } + + if (BottomPlane) + { + XMVECTOR vBottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); + vBottomPlane = DirectX::Internal::XMPlaneTransform(vBottomPlane, vOrientation, vOrigin); + *BottomPlane = XMPlaneNormalize(vBottomPlane); + } +} + + +//----------------------------------------------------------------------------- +// Build a frustum from a persepective projection matrix. The matrix may only +// contain a projection; any rotation, translation or scale will cause the +// constructed frustum to be incorrect. +//----------------------------------------------------------------------------- +_Use_decl_annotations_ +inline void XM_CALLCONV BoundingFrustum::CreateFromMatrix(BoundingFrustum& Out, FXMMATRIX Projection, bool rhcoords) noexcept +{ + // Corners of the projection frustum in homogenous space. + static XMVECTORF32 HomogenousPoints[6] = + { + { { { 1.0f, 0.0f, 1.0f, 1.0f } } }, // right (at far plane) + { { { -1.0f, 0.0f, 1.0f, 1.0f } } }, // left + { { { 0.0f, 1.0f, 1.0f, 1.0f } } }, // top + { { { 0.0f, -1.0f, 1.0f, 1.0f } } }, // bottom + + { { { 0.0f, 0.0f, 0.0f, 1.0f } } }, // near + { { { 0.0f, 0.0f, 1.0f, 1.0f } } } // far + }; + + XMVECTOR Determinant; + XMMATRIX matInverse = XMMatrixInverse(&Determinant, Projection); + + // Compute the frustum corners in world space. + XMVECTOR Points[6]; + + for (size_t i = 0; i < 6; ++i) + { + // Transform point. + Points[i] = XMVector4Transform(HomogenousPoints[i], matInverse); + } + + Out.Origin = XMFLOAT3(0.0f, 0.0f, 0.0f); + Out.Orientation = XMFLOAT4(0.0f, 0.0f, 0.0f, 1.0f); + + // Compute the slopes. + Points[0] = XMVectorMultiply(Points[0], XMVectorReciprocal(XMVectorSplatZ(Points[0]))); + Points[1] = XMVectorMultiply(Points[1], XMVectorReciprocal(XMVectorSplatZ(Points[1]))); + Points[2] = XMVectorMultiply(Points[2], XMVectorReciprocal(XMVectorSplatZ(Points[2]))); + Points[3] = XMVectorMultiply(Points[3], XMVectorReciprocal(XMVectorSplatZ(Points[3]))); + + Out.RightSlope = XMVectorGetX(Points[0]); + Out.LeftSlope = XMVectorGetX(Points[1]); + Out.TopSlope = XMVectorGetY(Points[2]); + Out.BottomSlope = XMVectorGetY(Points[3]); + + // Compute near and far. + Points[4] = XMVectorMultiply(Points[4], XMVectorReciprocal(XMVectorSplatW(Points[4]))); + Points[5] = XMVectorMultiply(Points[5], XMVectorReciprocal(XMVectorSplatW(Points[5]))); + + if (rhcoords) + { + Out.Near = XMVectorGetZ(Points[5]); + Out.Far = XMVectorGetZ(Points[4]); + } + else + { + Out.Near = XMVectorGetZ(Points[4]); + Out.Far = XMVectorGetZ(Points[5]); + } +} + + +/**************************************************************************** + * + * TriangleTests + * + ****************************************************************************/ + +namespace TriangleTests +{ + + //----------------------------------------------------------------------------- + // Compute the intersection of a ray (Origin, Direction) with a triangle + // (V0, V1, V2). Return true if there is an intersection and also set *pDist + // to the distance along the ray to the intersection. + // + // The algorithm is based on Moller, Tomas and Trumbore, "Fast, Minimum Storage + // Ray-Triangle Intersection", Journal of Graphics Tools, vol. 2, no. 1, + // pp 21-28, 1997. + //----------------------------------------------------------------------------- + _Use_decl_annotations_ + inline bool XM_CALLCONV Intersects( + FXMVECTOR Origin, FXMVECTOR Direction, FXMVECTOR V0, + GXMVECTOR V1, + HXMVECTOR V2, float& Dist) noexcept + { + assert(DirectX::Internal::XMVector3IsUnit(Direction)); + + XMVECTOR Zero = XMVectorZero(); + + XMVECTOR e1 = XMVectorSubtract(V1, V0); + XMVECTOR e2 = XMVectorSubtract(V2, V0); + + // p = Direction ^ e2; + XMVECTOR p = XMVector3Cross(Direction, e2); + + // det = e1 * p; + XMVECTOR det = XMVector3Dot(e1, p); + + XMVECTOR u, v, t; + + if (XMVector3GreaterOrEqual(det, g_RayEpsilon)) + { + // Determinate is positive (front side of the triangle). + XMVECTOR s = XMVectorSubtract(Origin, V0); + + // u = s * p; + u = XMVector3Dot(s, p); + + XMVECTOR NoIntersection = XMVectorLess(u, Zero); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(u, det)); + + // q = s ^ e1; + XMVECTOR q = XMVector3Cross(s, e1); + + // v = Direction * q; + v = XMVector3Dot(Direction, q); + + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(v, Zero)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(XMVectorAdd(u, v), det)); + + // t = e2 * q; + t = XMVector3Dot(e2, q); + + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(t, Zero)); + + if (XMVector4EqualInt(NoIntersection, XMVectorTrueInt())) + { + Dist = 0.f; + return false; + } + } + else if (XMVector3LessOrEqual(det, g_RayNegEpsilon)) + { + // Determinate is negative (back side of the triangle). + XMVECTOR s = XMVectorSubtract(Origin, V0); + + // u = s * p; + u = XMVector3Dot(s, p); + + XMVECTOR NoIntersection = XMVectorGreater(u, Zero); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(u, det)); + + // q = s ^ e1; + XMVECTOR q = XMVector3Cross(s, e1); + + // v = Direction * q; + v = XMVector3Dot(Direction, q); + + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(v, Zero)); + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(XMVectorAdd(u, v), det)); + + // t = e2 * q; + t = XMVector3Dot(e2, q); + + NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(t, Zero)); + + if (XMVector4EqualInt(NoIntersection, XMVectorTrueInt())) + { + Dist = 0.f; + return false; + } + } + else + { + // Parallel ray. + Dist = 0.f; + return false; + } + + t = XMVectorDivide(t, det); + + // (u / det) and (v / dev) are the barycentric cooridinates of the intersection. + + // Store the x-component to *pDist + XMStoreFloat(&Dist, t); + + return true; + } + + + //----------------------------------------------------------------------------- + // Test if two triangles intersect. + // + // The final test of algorithm is based on Shen, Heng, and Tang, "A Fast + // Triangle-Triangle Overlap Test Using Signed Distances", Journal of Graphics + // Tools, vol. 8, no. 1, pp 17-23, 2003 and Guigue and Devillers, "Fast and + // Robust Triangle-Triangle Overlap Test Using Orientation Predicates", Journal + // of Graphics Tools, vol. 8, no. 1, pp 25-32, 2003. + // + // The final test could be considered an edge-edge separating plane test with + // the 9 possible cases narrowed down to the only two pairs of edges that can + // actaully result in a seperation. + //----------------------------------------------------------------------------- + _Use_decl_annotations_ + inline bool XM_CALLCONV Intersects(FXMVECTOR A0, FXMVECTOR A1, FXMVECTOR A2, GXMVECTOR B0, HXMVECTOR B1, HXMVECTOR B2) noexcept + { + static const XMVECTORU32 SelectY = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0 } } }; + static const XMVECTORU32 SelectZ = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } }; + static const XMVECTORU32 Select0111 = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_1 } } }; + static const XMVECTORU32 Select1011 = { { { XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1 } } }; + static const XMVECTORU32 Select1101 = { { { XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1 } } }; + + XMVECTOR Zero = XMVectorZero(); + + // Compute the normal of triangle A. + XMVECTOR N1 = XMVector3Cross(XMVectorSubtract(A1, A0), XMVectorSubtract(A2, A0)); + + // Assert that the triangle is not degenerate. + assert(!XMVector3Equal(N1, Zero)); + + // Test points of B against the plane of A. + XMVECTOR BDist = XMVector3Dot(N1, XMVectorSubtract(B0, A0)); + BDist = XMVectorSelect(BDist, XMVector3Dot(N1, XMVectorSubtract(B1, A0)), SelectY); + BDist = XMVectorSelect(BDist, XMVector3Dot(N1, XMVectorSubtract(B2, A0)), SelectZ); + + // Ensure robustness with co-planar triangles by zeroing small distances. + uint32_t BDistIsZeroCR; + XMVECTOR BDistIsZero = XMVectorGreaterR(&BDistIsZeroCR, g_RayEpsilon, XMVectorAbs(BDist)); + BDist = XMVectorSelect(BDist, Zero, BDistIsZero); + + uint32_t BDistIsLessCR; + XMVECTOR BDistIsLess = XMVectorGreaterR(&BDistIsLessCR, Zero, BDist); + + uint32_t BDistIsGreaterCR; + XMVECTOR BDistIsGreater = XMVectorGreaterR(&BDistIsGreaterCR, BDist, Zero); + + // If all the points are on the same side we don't intersect. + if (XMComparisonAllTrue(BDistIsLessCR) || XMComparisonAllTrue(BDistIsGreaterCR)) + return false; + + // Compute the normal of triangle B. + XMVECTOR N2 = XMVector3Cross(XMVectorSubtract(B1, B0), XMVectorSubtract(B2, B0)); + + // Assert that the triangle is not degenerate. + assert(!XMVector3Equal(N2, Zero)); + + // Test points of A against the plane of B. + XMVECTOR ADist = XMVector3Dot(N2, XMVectorSubtract(A0, B0)); + ADist = XMVectorSelect(ADist, XMVector3Dot(N2, XMVectorSubtract(A1, B0)), SelectY); + ADist = XMVectorSelect(ADist, XMVector3Dot(N2, XMVectorSubtract(A2, B0)), SelectZ); + + // Ensure robustness with co-planar triangles by zeroing small distances. + uint32_t ADistIsZeroCR; + XMVECTOR ADistIsZero = XMVectorGreaterR(&ADistIsZeroCR, g_RayEpsilon, XMVectorAbs(BDist)); + ADist = XMVectorSelect(ADist, Zero, ADistIsZero); + + uint32_t ADistIsLessCR; + XMVECTOR ADistIsLess = XMVectorGreaterR(&ADistIsLessCR, Zero, ADist); + + uint32_t ADistIsGreaterCR; + XMVECTOR ADistIsGreater = XMVectorGreaterR(&ADistIsGreaterCR, ADist, Zero); + + // If all the points are on the same side we don't intersect. + if (XMComparisonAllTrue(ADistIsLessCR) || XMComparisonAllTrue(ADistIsGreaterCR)) + return false; + + // Special case for co-planar triangles. + if (XMComparisonAllTrue(ADistIsZeroCR) || XMComparisonAllTrue(BDistIsZeroCR)) + { + XMVECTOR Axis, Dist, MinDist; + + // Compute an axis perpindicular to the edge (points out). + Axis = XMVector3Cross(N1, XMVectorSubtract(A1, A0)); + Dist = XMVector3Dot(Axis, A0); + + // Test points of B against the axis. + MinDist = XMVector3Dot(B0, Axis); + MinDist = XMVectorMin(MinDist, XMVector3Dot(B1, Axis)); + MinDist = XMVectorMin(MinDist, XMVector3Dot(B2, Axis)); + if (XMVector4GreaterOrEqual(MinDist, Dist)) + return false; + + // Edge (A1, A2) + Axis = XMVector3Cross(N1, XMVectorSubtract(A2, A1)); + Dist = XMVector3Dot(Axis, A1); + + MinDist = XMVector3Dot(B0, Axis); + MinDist = XMVectorMin(MinDist, XMVector3Dot(B1, Axis)); + MinDist = XMVectorMin(MinDist, XMVector3Dot(B2, Axis)); + if (XMVector4GreaterOrEqual(MinDist, Dist)) + return false; + + // Edge (A2, A0) + Axis = XMVector3Cross(N1, XMVectorSubtract(A0, A2)); + Dist = XMVector3Dot(Axis, A2); + + MinDist = XMVector3Dot(B0, Axis); + MinDist = XMVectorMin(MinDist, XMVector3Dot(B1, Axis)); + MinDist = XMVectorMin(MinDist, XMVector3Dot(B2, Axis)); + if (XMVector4GreaterOrEqual(MinDist, Dist)) + return false; + + // Edge (B0, B1) + Axis = XMVector3Cross(N2, XMVectorSubtract(B1, B0)); + Dist = XMVector3Dot(Axis, B0); + + MinDist = XMVector3Dot(A0, Axis); + MinDist = XMVectorMin(MinDist, XMVector3Dot(A1, Axis)); + MinDist = XMVectorMin(MinDist, XMVector3Dot(A2, Axis)); + if (XMVector4GreaterOrEqual(MinDist, Dist)) + return false; + + // Edge (B1, B2) + Axis = XMVector3Cross(N2, XMVectorSubtract(B2, B1)); + Dist = XMVector3Dot(Axis, B1); + + MinDist = XMVector3Dot(A0, Axis); + MinDist = XMVectorMin(MinDist, XMVector3Dot(A1, Axis)); + MinDist = XMVectorMin(MinDist, XMVector3Dot(A2, Axis)); + if (XMVector4GreaterOrEqual(MinDist, Dist)) + return false; + + // Edge (B2,B0) + Axis = XMVector3Cross(N2, XMVectorSubtract(B0, B2)); + Dist = XMVector3Dot(Axis, B2); + + MinDist = XMVector3Dot(A0, Axis); + MinDist = XMVectorMin(MinDist, XMVector3Dot(A1, Axis)); + MinDist = XMVectorMin(MinDist, XMVector3Dot(A2, Axis)); + if (XMVector4GreaterOrEqual(MinDist, Dist)) + return false; + + return true; + } + + // + // Find the single vertex of A and B (ie the vertex on the opposite side + // of the plane from the other two) and reorder the edges so we can compute + // the signed edge/edge distances. + // + // if ( (V0 >= 0 && V1 < 0 && V2 < 0) || + // (V0 > 0 && V1 <= 0 && V2 <= 0) || + // (V0 <= 0 && V1 > 0 && V2 > 0) || + // (V0 < 0 && V1 >= 0 && V2 >= 0) ) then V0 is singular; + // + // If our singular vertex is not on the positive side of the plane we reverse + // the triangle winding so that the overlap comparisons will compare the + // correct edges with the correct signs. + // + XMVECTOR ADistIsLessEqual = XMVectorOrInt(ADistIsLess, ADistIsZero); + XMVECTOR ADistIsGreaterEqual = XMVectorOrInt(ADistIsGreater, ADistIsZero); + + XMVECTOR AA0, AA1, AA2; + bool bPositiveA; + + if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsGreaterEqual, ADistIsLess, Select0111)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsGreater, ADistIsLessEqual, Select0111))) + { + // A0 is singular, crossing from positive to negative. + AA0 = A0; AA1 = A1; AA2 = A2; + bPositiveA = true; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsLessEqual, ADistIsGreater, Select0111)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsLess, ADistIsGreaterEqual, Select0111))) + { + // A0 is singular, crossing from negative to positive. + AA0 = A0; AA1 = A2; AA2 = A1; + bPositiveA = false; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsGreaterEqual, ADistIsLess, Select1011)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsGreater, ADistIsLessEqual, Select1011))) + { + // A1 is singular, crossing from positive to negative. + AA0 = A1; AA1 = A2; AA2 = A0; + bPositiveA = true; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsLessEqual, ADistIsGreater, Select1011)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsLess, ADistIsGreaterEqual, Select1011))) + { + // A1 is singular, crossing from negative to positive. + AA0 = A1; AA1 = A0; AA2 = A2; + bPositiveA = false; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsGreaterEqual, ADistIsLess, Select1101)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsGreater, ADistIsLessEqual, Select1101))) + { + // A2 is singular, crossing from positive to negative. + AA0 = A2; AA1 = A0; AA2 = A1; + bPositiveA = true; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsLessEqual, ADistIsGreater, Select1101)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(ADistIsLess, ADistIsGreaterEqual, Select1101))) + { + // A2 is singular, crossing from negative to positive. + AA0 = A2; AA1 = A1; AA2 = A0; + bPositiveA = false; + } + else + { + assert(false); + return false; + } + + XMVECTOR BDistIsLessEqual = XMVectorOrInt(BDistIsLess, BDistIsZero); + XMVECTOR BDistIsGreaterEqual = XMVectorOrInt(BDistIsGreater, BDistIsZero); + + XMVECTOR BB0, BB1, BB2; + bool bPositiveB; + + if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsGreaterEqual, BDistIsLess, Select0111)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsGreater, BDistIsLessEqual, Select0111))) + { + // B0 is singular, crossing from positive to negative. + BB0 = B0; BB1 = B1; BB2 = B2; + bPositiveB = true; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsLessEqual, BDistIsGreater, Select0111)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsLess, BDistIsGreaterEqual, Select0111))) + { + // B0 is singular, crossing from negative to positive. + BB0 = B0; BB1 = B2; BB2 = B1; + bPositiveB = false; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsGreaterEqual, BDistIsLess, Select1011)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsGreater, BDistIsLessEqual, Select1011))) + { + // B1 is singular, crossing from positive to negative. + BB0 = B1; BB1 = B2; BB2 = B0; + bPositiveB = true; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsLessEqual, BDistIsGreater, Select1011)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsLess, BDistIsGreaterEqual, Select1011))) + { + // B1 is singular, crossing from negative to positive. + BB0 = B1; BB1 = B0; BB2 = B2; + bPositiveB = false; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsGreaterEqual, BDistIsLess, Select1101)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsGreater, BDistIsLessEqual, Select1101))) + { + // B2 is singular, crossing from positive to negative. + BB0 = B2; BB1 = B0; BB2 = B1; + bPositiveB = true; + } + else if (DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsLessEqual, BDistIsGreater, Select1101)) || + DirectX::Internal::XMVector3AllTrue(XMVectorSelect(BDistIsLess, BDistIsGreaterEqual, Select1101))) + { + // B2 is singular, crossing from negative to positive. + BB0 = B2; BB1 = B1; BB2 = B0; + bPositiveB = false; + } + else + { + assert(false); + return false; + } + + XMVECTOR Delta0, Delta1; + + // Reverse the direction of the test depending on whether the singular vertices are + // the same sign or different signs. + if (bPositiveA ^ bPositiveB) + { + Delta0 = XMVectorSubtract(BB0, AA0); + Delta1 = XMVectorSubtract(AA0, BB0); + } + else + { + Delta0 = XMVectorSubtract(AA0, BB0); + Delta1 = XMVectorSubtract(BB0, AA0); + } + + // Check if the triangles overlap on the line of intersection between the + // planes of the two triangles by finding the signed line distances. + XMVECTOR Dist0 = XMVector3Dot(Delta0, XMVector3Cross(XMVectorSubtract(BB2, BB0), XMVectorSubtract(AA2, AA0))); + if (XMVector4Greater(Dist0, Zero)) + return false; + + XMVECTOR Dist1 = XMVector3Dot(Delta1, XMVector3Cross(XMVectorSubtract(BB1, BB0), XMVectorSubtract(AA1, AA0))); + if (XMVector4Greater(Dist1, Zero)) + return false; + + return true; + } + + + //----------------------------------------------------------------------------- + // Ray-triangle test + //----------------------------------------------------------------------------- + _Use_decl_annotations_ + inline PlaneIntersectionType XM_CALLCONV Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, GXMVECTOR Plane) noexcept + { + XMVECTOR One = XMVectorSplatOne(); + + assert(DirectX::Internal::XMPlaneIsUnit(Plane)); + + // Set w of the points to one so we can dot4 with a plane. + XMVECTOR TV0 = XMVectorInsert<0, 0, 0, 0, 1>(V0, One); + XMVECTOR TV1 = XMVectorInsert<0, 0, 0, 0, 1>(V1, One); + XMVECTOR TV2 = XMVectorInsert<0, 0, 0, 0, 1>(V2, One); + + XMVECTOR Outside, Inside; + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane, Outside, Inside); + + // If the triangle is outside any plane it is outside. + if (XMVector4EqualInt(Outside, XMVectorTrueInt())) + return FRONT; + + // If the triangle is inside all planes it is inside. + if (XMVector4EqualInt(Inside, XMVectorTrueInt())) + return BACK; + + // The triangle is not inside all planes or outside a plane it intersects. + return INTERSECTING; + } + + + //----------------------------------------------------------------------------- + // Test a triangle vs 6 planes (typically forming a frustum). + //----------------------------------------------------------------------------- + _Use_decl_annotations_ + inline ContainmentType XM_CALLCONV ContainedBy( + FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, + GXMVECTOR Plane0, + HXMVECTOR Plane1, HXMVECTOR Plane2, + CXMVECTOR Plane3, CXMVECTOR Plane4, CXMVECTOR Plane5) noexcept + { + XMVECTOR One = XMVectorSplatOne(); + + // Set w of the points to one so we can dot4 with a plane. + XMVECTOR TV0 = XMVectorInsert<0, 0, 0, 0, 1>(V0, One); + XMVECTOR TV1 = XMVectorInsert<0, 0, 0, 0, 1>(V1, One); + XMVECTOR TV2 = XMVectorInsert<0, 0, 0, 0, 1>(V2, One); + + XMVECTOR Outside, Inside; + + // Test against each plane. + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane0, Outside, Inside); + + XMVECTOR AnyOutside = Outside; + XMVECTOR AllInside = Inside; + + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane1, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane2, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane3, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane4, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + DirectX::Internal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane5, Outside, Inside); + AnyOutside = XMVectorOrInt(AnyOutside, Outside); + AllInside = XMVectorAndInt(AllInside, Inside); + + // If the triangle is outside any plane it is outside. + if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) + return DISJOINT; + + // If the triangle is inside all planes it is inside. + if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) + return CONTAINS; + + // The triangle is not inside all planes or outside a plane, it may intersect. + return INTERSECTS; + } + +} // namespace TriangleTests + diff --git a/Sdk/External/DirectXMath/Inc/DirectXColors.h b/Sdk/External/DirectXMath/Inc/DirectXColors.h new file mode 100644 index 0000000..21eac36 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXColors.h @@ -0,0 +1,165 @@ +//------------------------------------------------------------------------------------- +// DirectXColors.h -- C++ Color Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#include "DirectXMath.h" + +namespace DirectX +{ + + namespace Colors + { + // Standard colors (Red/Green/Blue/Alpha) + XMGLOBALCONST XMVECTORF32 AliceBlue = { { { 0.941176534f, 0.972549081f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 AntiqueWhite = { { { 0.980392218f, 0.921568692f, 0.843137324f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Aqua = { { { 0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Aquamarine = { { { 0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Azure = { { { 0.941176534f, 1.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Beige = { { { 0.960784376f, 0.960784376f, 0.862745166f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Bisque = { { { 1.000000000f, 0.894117713f, 0.768627524f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Black = { { { 0.000000000f, 0.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { { { 1.000000000f, 0.921568692f, 0.803921640f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Blue = { { { 0.000000000f, 0.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 BlueViolet = { { { 0.541176498f, 0.168627456f, 0.886274576f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Brown = { { { 0.647058845f, 0.164705887f, 0.164705887f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 BurlyWood = { { { 0.870588303f, 0.721568644f, 0.529411793f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 CadetBlue = { { { 0.372549027f, 0.619607866f, 0.627451003f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Chartreuse = { { { 0.498039246f, 1.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Chocolate = { { { 0.823529482f, 0.411764741f, 0.117647067f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Coral = { { { 1.000000000f, 0.498039246f, 0.313725501f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 CornflowerBlue = { { { 0.392156899f, 0.584313750f, 0.929411829f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Cornsilk = { { { 1.000000000f, 0.972549081f, 0.862745166f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Crimson = { { { 0.862745166f, 0.078431375f, 0.235294133f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Cyan = { { { 0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkBlue = { { { 0.000000000f, 0.000000000f, 0.545098066f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkCyan = { { { 0.000000000f, 0.545098066f, 0.545098066f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { { { 0.721568644f, 0.525490224f, 0.043137256f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkGray = { { { 0.662745118f, 0.662745118f, 0.662745118f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkGreen = { { { 0.000000000f, 0.392156899f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkKhaki = { { { 0.741176486f, 0.717647076f, 0.419607878f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkMagenta = { { { 0.545098066f, 0.000000000f, 0.545098066f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { { { 0.333333343f, 0.419607878f, 0.184313729f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkOrange = { { { 1.000000000f, 0.549019635f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkOrchid = { { { 0.600000024f, 0.196078449f, 0.800000072f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkRed = { { { 0.545098066f, 0.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkSalmon = { { { 0.913725555f, 0.588235319f, 0.478431404f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { { { 0.560784340f, 0.737254918f, 0.545098066f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { { { 0.282352954f, 0.239215702f, 0.545098066f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkSlateGray = { { { 0.184313729f, 0.309803933f, 0.309803933f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkTurquoise = { { { 0.000000000f, 0.807843208f, 0.819607913f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DarkViolet = { { { 0.580392182f, 0.000000000f, 0.827451050f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DeepPink = { { { 1.000000000f, 0.078431375f, 0.576470613f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DeepSkyBlue = { { { 0.000000000f, 0.749019623f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DimGray = { { { 0.411764741f, 0.411764741f, 0.411764741f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 DodgerBlue = { { { 0.117647067f, 0.564705908f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Firebrick = { { { 0.698039234f, 0.133333340f, 0.133333340f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 FloralWhite = { { { 1.000000000f, 0.980392218f, 0.941176534f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 ForestGreen = { { { 0.133333340f, 0.545098066f, 0.133333340f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Fuchsia = { { { 1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Gainsboro = { { { 0.862745166f, 0.862745166f, 0.862745166f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 GhostWhite = { { { 0.972549081f, 0.972549081f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Gold = { { { 1.000000000f, 0.843137324f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Goldenrod = { { { 0.854902029f, 0.647058845f, 0.125490203f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Gray = { { { 0.501960814f, 0.501960814f, 0.501960814f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Green = { { { 0.000000000f, 0.501960814f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 GreenYellow = { { { 0.678431392f, 1.000000000f, 0.184313729f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Honeydew = { { { 0.941176534f, 1.000000000f, 0.941176534f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 HotPink = { { { 1.000000000f, 0.411764741f, 0.705882370f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 IndianRed = { { { 0.803921640f, 0.360784322f, 0.360784322f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Indigo = { { { 0.294117659f, 0.000000000f, 0.509803951f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Ivory = { { { 1.000000000f, 1.000000000f, 0.941176534f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Khaki = { { { 0.941176534f, 0.901960850f, 0.549019635f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Lavender = { { { 0.901960850f, 0.901960850f, 0.980392218f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LavenderBlush = { { { 1.000000000f, 0.941176534f, 0.960784376f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LawnGreen = { { { 0.486274540f, 0.988235354f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LemonChiffon = { { { 1.000000000f, 0.980392218f, 0.803921640f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightBlue = { { { 0.678431392f, 0.847058892f, 0.901960850f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightCoral = { { { 0.941176534f, 0.501960814f, 0.501960814f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightCyan = { { { 0.878431439f, 1.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { { { 0.980392218f, 0.980392218f, 0.823529482f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightGreen = { { { 0.564705908f, 0.933333397f, 0.564705908f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightGray = { { { 0.827451050f, 0.827451050f, 0.827451050f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightPink = { { { 1.000000000f, 0.713725507f, 0.756862819f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightSalmon = { { { 1.000000000f, 0.627451003f, 0.478431404f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightSeaGreen = { { { 0.125490203f, 0.698039234f, 0.666666687f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightSkyBlue = { { { 0.529411793f, 0.807843208f, 0.980392218f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightSlateGray = { { { 0.466666698f, 0.533333361f, 0.600000024f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightSteelBlue = { { { 0.690196097f, 0.768627524f, 0.870588303f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LightYellow = { { { 1.000000000f, 1.000000000f, 0.878431439f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Lime = { { { 0.000000000f, 1.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 LimeGreen = { { { 0.196078449f, 0.803921640f, 0.196078449f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Linen = { { { 0.980392218f, 0.941176534f, 0.901960850f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Magenta = { { { 1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Maroon = { { { 0.501960814f, 0.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumAquamarine = { { { 0.400000036f, 0.803921640f, 0.666666687f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumBlue = { { { 0.000000000f, 0.000000000f, 0.803921640f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumOrchid = { { { 0.729411781f, 0.333333343f, 0.827451050f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumPurple = { { { 0.576470613f, 0.439215720f, 0.858823597f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { { { 0.235294133f, 0.701960802f, 0.443137288f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { { { 0.482352972f, 0.407843173f, 0.933333397f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { { { 0.000000000f, 0.980392218f, 0.603921592f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumTurquoise = { { { 0.282352954f, 0.819607913f, 0.800000072f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MediumVioletRed = { { { 0.780392230f, 0.082352944f, 0.521568656f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MidnightBlue = { { { 0.098039225f, 0.098039225f, 0.439215720f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MintCream = { { { 0.960784376f, 1.000000000f, 0.980392218f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 MistyRose = { { { 1.000000000f, 0.894117713f, 0.882353008f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Moccasin = { { { 1.000000000f, 0.894117713f, 0.709803939f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 NavajoWhite = { { { 1.000000000f, 0.870588303f, 0.678431392f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Navy = { { { 0.000000000f, 0.000000000f, 0.501960814f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 OldLace = { { { 0.992156923f, 0.960784376f, 0.901960850f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Olive = { { { 0.501960814f, 0.501960814f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 OliveDrab = { { { 0.419607878f, 0.556862772f, 0.137254909f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Orange = { { { 1.000000000f, 0.647058845f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 OrangeRed = { { { 1.000000000f, 0.270588249f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Orchid = { { { 0.854902029f, 0.439215720f, 0.839215755f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { { { 0.933333397f, 0.909803987f, 0.666666687f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PaleGreen = { { { 0.596078455f, 0.984313786f, 0.596078455f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PaleTurquoise = { { { 0.686274529f, 0.933333397f, 0.933333397f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PaleVioletRed = { { { 0.858823597f, 0.439215720f, 0.576470613f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PapayaWhip = { { { 1.000000000f, 0.937254965f, 0.835294187f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PeachPuff = { { { 1.000000000f, 0.854902029f, 0.725490212f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Peru = { { { 0.803921640f, 0.521568656f, 0.247058839f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Pink = { { { 1.000000000f, 0.752941251f, 0.796078503f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Plum = { { { 0.866666734f, 0.627451003f, 0.866666734f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 PowderBlue = { { { 0.690196097f, 0.878431439f, 0.901960850f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Purple = { { { 0.501960814f, 0.000000000f, 0.501960814f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Red = { { { 1.000000000f, 0.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 RosyBrown = { { { 0.737254918f, 0.560784340f, 0.560784340f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 RoyalBlue = { { { 0.254901975f, 0.411764741f, 0.882353008f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SaddleBrown = { { { 0.545098066f, 0.270588249f, 0.074509807f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Salmon = { { { 0.980392218f, 0.501960814f, 0.447058856f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SandyBrown = { { { 0.956862807f, 0.643137276f, 0.376470625f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SeaGreen = { { { 0.180392161f, 0.545098066f, 0.341176480f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SeaShell = { { { 1.000000000f, 0.960784376f, 0.933333397f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Sienna = { { { 0.627451003f, 0.321568638f, 0.176470593f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Silver = { { { 0.752941251f, 0.752941251f, 0.752941251f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SkyBlue = { { { 0.529411793f, 0.807843208f, 0.921568692f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SlateBlue = { { { 0.415686309f, 0.352941185f, 0.803921640f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SlateGray = { { { 0.439215720f, 0.501960814f, 0.564705908f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Snow = { { { 1.000000000f, 0.980392218f, 0.980392218f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SpringGreen = { { { 0.000000000f, 1.000000000f, 0.498039246f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 SteelBlue = { { { 0.274509817f, 0.509803951f, 0.705882370f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Tan = { { { 0.823529482f, 0.705882370f, 0.549019635f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Teal = { { { 0.000000000f, 0.501960814f, 0.501960814f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Thistle = { { { 0.847058892f, 0.749019623f, 0.847058892f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Tomato = { { { 1.000000000f, 0.388235331f, 0.278431386f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Transparent = { { { 0.000000000f, 0.000000000f, 0.000000000f, 0.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Turquoise = { { { 0.250980407f, 0.878431439f, 0.815686345f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Violet = { { { 0.933333397f, 0.509803951f, 0.933333397f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Wheat = { { { 0.960784376f, 0.870588303f, 0.701960802f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 White = { { { 1.000000000f, 1.000000000f, 1.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 WhiteSmoke = { { { 0.960784376f, 0.960784376f, 0.960784376f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 Yellow = { { { 1.000000000f, 1.000000000f, 0.000000000f, 1.000000000f } } }; + XMGLOBALCONST XMVECTORF32 YellowGreen = { { { 0.603921592f, 0.803921640f, 0.196078449f, 1.000000000f } } }; + + } // namespace Colors + +} // namespace DirectX + diff --git a/Sdk/External/DirectXMath/Inc/DirectXMath.h b/Sdk/External/DirectXMath/Inc/DirectXMath.h new file mode 100644 index 0000000..9f9d791 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXMath.h @@ -0,0 +1,2242 @@ +//------------------------------------------------------------------------------------- +// DirectXMath.h -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#ifndef __cplusplus +#error DirectX Math requires C++ +#endif + +#define DIRECTX_MATH_VERSION 316 + +#if defined(_MSC_VER) && (_MSC_VER < 1910) +#error DirectX Math requires Visual C++ 2017 or later. +#endif + +#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(_M_HYBRID_X86_ARM64) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_) +#define _XM_VECTORCALL_ 1 +#endif + +#if _XM_VECTORCALL_ +#define XM_CALLCONV __vectorcall +#elif defined(__GNUC__) +#define XM_CALLCONV +#else +#define XM_CALLCONV __fastcall +#endif + +#ifndef XM_DEPRECATED +#ifdef __GNUC__ +#define XM_DEPRECATED __attribute__ ((deprecated)) +#else +#define XM_DEPRECATED __declspec(deprecated("This is deprecated and will be removed in a future version.")) +#endif +#endif + +#if !defined(_XM_AVX2_INTRINSICS_) && defined(__AVX2__) && !defined(_XM_NO_INTRINSICS_) +#define _XM_AVX2_INTRINSICS_ +#endif + +#if !defined(_XM_FMA3_INTRINSICS_) && defined(_XM_AVX2_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) +#define _XM_FMA3_INTRINSICS_ +#endif + +#if !defined(_XM_F16C_INTRINSICS_) && defined(_XM_AVX2_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) +#define _XM_F16C_INTRINSICS_ +#endif + +#if !defined(_XM_F16C_INTRINSICS_) && defined(__F16C__) && !defined(_XM_NO_INTRINSICS_) +#define _XM_F16C_INTRINSICS_ +#endif + +#if defined(_XM_FMA3_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_) +#define _XM_AVX_INTRINSICS_ +#endif + +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_) +#define _XM_AVX_INTRINSICS_ +#endif + +#if !defined(_XM_AVX_INTRINSICS_) && defined(__AVX__) && !defined(_XM_NO_INTRINSICS_) +#define _XM_AVX_INTRINSICS_ +#endif + +#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_SSE4_INTRINSICS_) +#define _XM_SSE4_INTRINSICS_ +#endif + +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE3_INTRINSICS_) +#define _XM_SSE3_INTRINSICS_ +#endif + +#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) +#define _XM_SSE_INTRINSICS_ +#endif + +#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) +#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64) +#define _XM_SSE_INTRINSICS_ +#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__ +#define _XM_ARM_NEON_INTRINSICS_ +#elif !defined(_XM_NO_INTRINSICS_) +#error DirectX Math does not support this target +#endif +#endif // !_XM_ARM_NEON_INTRINSICS_ && !_XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_ + +#if defined(_XM_SSE_INTRINSICS_) && defined(_MSC_VER) && (_MSC_VER >= 1920) && !defined(__clang__) && !defined(_XM_SVML_INTRINSICS_) && !defined(_XM_DISABLE_INTEL_SVML_) +#define _XM_SVML_INTRINSICS_ +#endif + +#if !defined(_XM_NO_XMVECTOR_OVERLOADS_) && (defined(__clang__) || defined(__GNUC__)) +#define _XM_NO_XMVECTOR_OVERLOADS_ +#endif + +#pragma warning(push) +#pragma warning(disable:4514 4820) +// C4514/4820: Off by default noise +#include +#include +#pragma warning(pop) + +#ifndef _XM_NO_INTRINSICS_ + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4987) +// C4987: Off by default noise +#include +#pragma warning(pop) +#endif + +#if (defined(__clang__) || defined(__GNUC__)) && (__x86_64__ || __i386__) +#include +#endif + +#ifdef _XM_SSE_INTRINSICS_ +#include +#include + +#ifdef _XM_SSE3_INTRINSICS_ +#include +#endif + +#ifdef _XM_SSE4_INTRINSICS_ +#include +#endif + +#ifdef _XM_AVX_INTRINSICS_ +#include +#endif + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64)) +#include +#else +#include +#endif +#endif +#endif // !_XM_NO_INTRINSICS_ + +#include "sal.h" +#include + +#pragma warning(push) +#pragma warning(disable : 4005 4668) +// C4005/4668: Old header issue +#include +#pragma warning(pop) + +#ifdef __GNUC__ +#define XM_ALIGNED_DATA(x) __attribute__ ((aligned(x))) +#define XM_ALIGNED_STRUCT(x) struct __attribute__ ((aligned(x))) +#else +#define XM_ALIGNED_DATA(x) __declspec(align(x)) +#define XM_ALIGNED_STRUCT(x) __declspec(align(x)) struct +#endif + +/**************************************************************************** + * + * Conditional intrinsics + * + ****************************************************************************/ + +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + +#if defined(_XM_NO_MOVNT_) +#define XM_STREAM_PS( p, a ) _mm_store_ps((p), (a)) +#define XM256_STREAM_PS( p, a ) _mm256_store_ps((p), (a)) +#define XM_SFENCE() +#else +#define XM_STREAM_PS( p, a ) _mm_stream_ps((p), (a)) +#define XM256_STREAM_PS( p, a ) _mm256_stream_ps((p), (a)) +#define XM_SFENCE() _mm_sfence() +#endif + +#if defined(_XM_FMA3_INTRINSICS_) +#define XM_FMADD_PS( a, b, c ) _mm_fmadd_ps((a), (b), (c)) +#define XM_FNMADD_PS( a, b, c ) _mm_fnmadd_ps((a), (b), (c)) +#else +#define XM_FMADD_PS( a, b, c ) _mm_add_ps(_mm_mul_ps((a), (b)), (c)) +#define XM_FNMADD_PS( a, b, c ) _mm_sub_ps((c), _mm_mul_ps((a), (b))) +#endif + +#if defined(_XM_AVX_INTRINSICS_) && defined(_XM_FAVOR_INTEL_) +#define XM_PERMUTE_PS( v, c ) _mm_permute_ps((v), c ) +#else +#define XM_PERMUTE_PS( v, c ) _mm_shuffle_ps((v), (v), c ) +#endif + +#endif // _XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_ + +#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + +#if defined(__clang__) || defined(__GNUC__) +#define XM_PREFETCH( a ) __builtin_prefetch(a) +#elif defined(_MSC_VER) +#define XM_PREFETCH( a ) __prefetch(a) +#else +#define XM_PREFETCH( a ) +#endif + +#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_ + +namespace DirectX +{ + + /**************************************************************************** + * + * Constant definitions + * + ****************************************************************************/ + +#if defined(__XNAMATH_H__) && defined(XM_PI) +#undef XM_PI +#undef XM_2PI +#undef XM_1DIVPI +#undef XM_1DIV2PI +#undef XM_PIDIV2 +#undef XM_PIDIV4 +#undef XM_SELECT_0 +#undef XM_SELECT_1 +#undef XM_PERMUTE_0X +#undef XM_PERMUTE_0Y +#undef XM_PERMUTE_0Z +#undef XM_PERMUTE_0W +#undef XM_PERMUTE_1X +#undef XM_PERMUTE_1Y +#undef XM_PERMUTE_1Z +#undef XM_PERMUTE_1W +#undef XM_CRMASK_CR6 +#undef XM_CRMASK_CR6TRUE +#undef XM_CRMASK_CR6FALSE +#undef XM_CRMASK_CR6BOUNDS +#undef XM_CACHE_LINE_SIZE +#endif + + constexpr float XM_PI = 3.141592654f; + constexpr float XM_2PI = 6.283185307f; + constexpr float XM_1DIVPI = 0.318309886f; + constexpr float XM_1DIV2PI = 0.159154943f; + constexpr float XM_PIDIV2 = 1.570796327f; + constexpr float XM_PIDIV4 = 0.785398163f; + + constexpr uint32_t XM_SELECT_0 = 0x00000000; + constexpr uint32_t XM_SELECT_1 = 0xFFFFFFFF; + + constexpr uint32_t XM_PERMUTE_0X = 0; + constexpr uint32_t XM_PERMUTE_0Y = 1; + constexpr uint32_t XM_PERMUTE_0Z = 2; + constexpr uint32_t XM_PERMUTE_0W = 3; + constexpr uint32_t XM_PERMUTE_1X = 4; + constexpr uint32_t XM_PERMUTE_1Y = 5; + constexpr uint32_t XM_PERMUTE_1Z = 6; + constexpr uint32_t XM_PERMUTE_1W = 7; + + constexpr uint32_t XM_SWIZZLE_X = 0; + constexpr uint32_t XM_SWIZZLE_Y = 1; + constexpr uint32_t XM_SWIZZLE_Z = 2; + constexpr uint32_t XM_SWIZZLE_W = 3; + + constexpr uint32_t XM_CRMASK_CR6 = 0x000000F0; + constexpr uint32_t XM_CRMASK_CR6TRUE = 0x00000080; + constexpr uint32_t XM_CRMASK_CR6FALSE = 0x00000020; + constexpr uint32_t XM_CRMASK_CR6BOUNDS = XM_CRMASK_CR6FALSE; + + constexpr size_t XM_CACHE_LINE_SIZE = 64; + + + /**************************************************************************** + * + * Macros + * + ****************************************************************************/ + +#if defined(__XNAMATH_H__) && defined(XMComparisonAllTrue) +#undef XMComparisonAllTrue +#undef XMComparisonAnyTrue +#undef XMComparisonAllFalse +#undef XMComparisonAnyFalse +#undef XMComparisonMixed +#undef XMComparisonAllInBounds +#undef XMComparisonAnyOutOfBounds +#endif + + // Unit conversion + + inline constexpr float XMConvertToRadians(float fDegrees) noexcept { return fDegrees * (XM_PI / 180.0f); } + inline constexpr float XMConvertToDegrees(float fRadians) noexcept { return fRadians * (180.0f / XM_PI); } + + // Condition register evaluation proceeding a recording (R) comparison + + inline constexpr bool XMComparisonAllTrue(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6TRUE) == XM_CRMASK_CR6TRUE); } + inline constexpr bool XMComparisonAnyTrue(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6FALSE) != XM_CRMASK_CR6FALSE); } + inline constexpr bool XMComparisonAllFalse(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6FALSE) == XM_CRMASK_CR6FALSE); } + inline constexpr bool XMComparisonAnyFalse(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6TRUE) != XM_CRMASK_CR6TRUE); } + inline constexpr bool XMComparisonMixed(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6) == 0); } + inline constexpr bool XMComparisonAllInBounds(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6BOUNDS) == XM_CRMASK_CR6BOUNDS); } + inline constexpr bool XMComparisonAnyOutOfBounds(uint32_t CR) noexcept { return (((CR)&XM_CRMASK_CR6BOUNDS) != XM_CRMASK_CR6BOUNDS); } + + + /**************************************************************************** + * + * Data types + * + ****************************************************************************/ + +#pragma warning(push) +#pragma warning(disable:4068 4201 4365 4324 4820) + // C4068: ignore unknown pragmas + // C4201: nonstandard extension used : nameless struct/union + // C4365: Off by default noise + // C4324/4820: padding warnings + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") +#endif + +//------------------------------------------------------------------------------ +#if defined(_XM_NO_INTRINSICS_) + struct __vector4 + { + union + { + float vector4_f32[4]; + uint32_t vector4_u32[4]; + }; + }; +#endif // _XM_NO_INTRINSICS_ + + //------------------------------------------------------------------------------ + // Vector intrinsic: Four 32 bit floating point components aligned on a 16 byte + // boundary and mapped to hardware vector registers +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + using XMVECTOR = __m128; +#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + using XMVECTOR = float32x4_t; +#else + using XMVECTOR = __vector4; +#endif + + // Fix-up for (1st-3rd) XMVECTOR parameters that are pass-in-register for x86, ARM, ARM64, and vector call; by reference otherwise +#if ( defined(_M_IX86) || defined(_M_ARM) || defined(_M_ARM64) || _XM_VECTORCALL_ || __i386__ || __arm__ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_) + typedef const XMVECTOR FXMVECTOR; +#else + typedef const XMVECTOR& FXMVECTOR; +#endif + + // Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, ARM64, and vector call; by reference otherwise +#if ( defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ || __arm__ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_) + typedef const XMVECTOR GXMVECTOR; +#else + typedef const XMVECTOR& GXMVECTOR; +#endif + + // Fix-up for (5th & 6th) XMVECTOR parameter to pass in-register for ARM64 and vector call; by reference otherwise +#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_) + typedef const XMVECTOR HXMVECTOR; +#else + typedef const XMVECTOR& HXMVECTOR; +#endif + + // Fix-up for (7th+) XMVECTOR parameters to pass by reference + typedef const XMVECTOR& CXMVECTOR; + + //------------------------------------------------------------------------------ + // Conversion types for constants + XM_ALIGNED_STRUCT(16) XMVECTORF32 + { + union + { + float f[4]; + XMVECTOR v; + }; + + inline operator XMVECTOR() const noexcept { return v; } + inline operator const float* () const noexcept { return f; } +#ifdef _XM_NO_INTRINSICS_ +#elif defined(_XM_SSE_INTRINSICS_) + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__) + inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); } + inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); } +#endif + }; + + XM_ALIGNED_STRUCT(16) XMVECTORI32 + { + union + { + int32_t i[4]; + XMVECTOR v; + }; + + inline operator XMVECTOR() const noexcept { return v; } +#ifdef _XM_NO_INTRINSICS_ +#elif defined(_XM_SSE_INTRINSICS_) + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__) + inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); } + inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); } +#endif + }; + + XM_ALIGNED_STRUCT(16) XMVECTORU8 + { + union + { + uint8_t u[16]; + XMVECTOR v; + }; + + inline operator XMVECTOR() const noexcept { return v; } +#ifdef _XM_NO_INTRINSICS_ +#elif defined(_XM_SSE_INTRINSICS_) + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__) + inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); } + inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); } +#endif + }; + + XM_ALIGNED_STRUCT(16) XMVECTORU32 + { + union + { + uint32_t u[4]; + XMVECTOR v; + }; + + inline operator XMVECTOR() const noexcept { return v; } +#ifdef _XM_NO_INTRINSICS_ +#elif defined(_XM_SSE_INTRINSICS_) + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#elif defined(_XM_ARM_NEON_INTRINSICS_) && defined(__GNUC__) + inline operator int32x4_t() const noexcept { return vreinterpretq_s32_f32(v); } + inline operator uint32x4_t() const noexcept { return vreinterpretq_u32_f32(v); } +#endif + }; + + //------------------------------------------------------------------------------ + // Vector operators + +#ifndef _XM_NO_XMVECTOR_OVERLOADS_ + XMVECTOR XM_CALLCONV operator+ (FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV operator- (FXMVECTOR V) noexcept; + + XMVECTOR& XM_CALLCONV operator+= (XMVECTOR& V1, FXMVECTOR V2) noexcept; + XMVECTOR& XM_CALLCONV operator-= (XMVECTOR& V1, FXMVECTOR V2) noexcept; + XMVECTOR& XM_CALLCONV operator*= (XMVECTOR& V1, FXMVECTOR V2) noexcept; + XMVECTOR& XM_CALLCONV operator/= (XMVECTOR& V1, FXMVECTOR V2) noexcept; + + XMVECTOR& operator*= (XMVECTOR& V, float S) noexcept; + XMVECTOR& operator/= (XMVECTOR& V, float S) noexcept; + + XMVECTOR XM_CALLCONV operator+ (FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV operator- (FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV operator* (FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV operator/ (FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV operator* (FXMVECTOR V, float S) noexcept; + XMVECTOR XM_CALLCONV operator* (float S, FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV operator/ (FXMVECTOR V, float S) noexcept; +#endif /* !_XM_NO_XMVECTOR_OVERLOADS_ */ + + //------------------------------------------------------------------------------ + // Matrix type: Sixteen 32 bit floating point components aligned on a + // 16 byte boundary and mapped to four hardware vector registers + + struct XMMATRIX; + + // Fix-up for (1st) XMMATRIX parameter to pass in-register for ARM64 and vector call; by reference otherwise +#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_) + typedef const XMMATRIX FXMMATRIX; +#else + typedef const XMMATRIX& FXMMATRIX; +#endif + + // Fix-up for (2nd+) XMMATRIX parameters to pass by reference + typedef const XMMATRIX& CXMMATRIX; + +#ifdef _XM_NO_INTRINSICS_ + struct XMMATRIX +#else + XM_ALIGNED_STRUCT(16) XMMATRIX +#endif + { +#ifdef _XM_NO_INTRINSICS_ + union + { + XMVECTOR r[4]; + struct + { + float _11, _12, _13, _14; + float _21, _22, _23, _24; + float _31, _32, _33, _34; + float _41, _42, _43, _44; + }; + float m[4][4]; + }; +#else + XMVECTOR r[4]; +#endif + + XMMATRIX() = default; + + XMMATRIX(const XMMATRIX&) = default; + +#if defined(_MSC_VER) && (_MSC_FULL_VER < 191426431) + XMMATRIX& operator= (const XMMATRIX& M) noexcept { r[0] = M.r[0]; r[1] = M.r[1]; r[2] = M.r[2]; r[3] = M.r[3]; return *this; } +#else + XMMATRIX& operator=(const XMMATRIX&) = default; + + XMMATRIX(XMMATRIX&&) = default; + XMMATRIX& operator=(XMMATRIX&&) = default; +#endif + + constexpr XMMATRIX(FXMVECTOR R0, FXMVECTOR R1, FXMVECTOR R2, CXMVECTOR R3) noexcept : r{ R0,R1,R2,R3 } {} + XMMATRIX(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33) noexcept; + explicit XMMATRIX(_In_reads_(16) const float* pArray) noexcept; + +#ifdef _XM_NO_INTRINSICS_ + float operator() (size_t Row, size_t Column) const noexcept { return m[Row][Column]; } + float& operator() (size_t Row, size_t Column) noexcept { return m[Row][Column]; } +#endif + + XMMATRIX operator+ () const noexcept { return *this; } + XMMATRIX operator- () const noexcept; + + XMMATRIX& XM_CALLCONV operator+= (FXMMATRIX M) noexcept; + XMMATRIX& XM_CALLCONV operator-= (FXMMATRIX M) noexcept; + XMMATRIX& XM_CALLCONV operator*= (FXMMATRIX M) noexcept; + XMMATRIX& operator*= (float S) noexcept; + XMMATRIX& operator/= (float S) noexcept; + + XMMATRIX XM_CALLCONV operator+ (FXMMATRIX M) const noexcept; + XMMATRIX XM_CALLCONV operator- (FXMMATRIX M) const noexcept; + XMMATRIX XM_CALLCONV operator* (FXMMATRIX M) const noexcept; + XMMATRIX operator* (float S) const noexcept; + XMMATRIX operator/ (float S) const noexcept; + + friend XMMATRIX XM_CALLCONV operator* (float S, FXMMATRIX M) noexcept; + }; + + //------------------------------------------------------------------------------ + // 2D Vector; 32 bit floating point components + struct XMFLOAT2 + { + float x; + float y; + + XMFLOAT2() = default; + + XMFLOAT2(const XMFLOAT2&) = default; + XMFLOAT2& operator=(const XMFLOAT2&) = default; + + XMFLOAT2(XMFLOAT2&&) = default; + XMFLOAT2& operator=(XMFLOAT2&&) = default; + + constexpr XMFLOAT2(float _x, float _y) noexcept : x(_x), y(_y) {} + explicit XMFLOAT2(_In_reads_(2) const float* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + }; + + // 2D Vector; 32 bit floating point components aligned on a 16 byte boundary + XM_ALIGNED_STRUCT(16) XMFLOAT2A : public XMFLOAT2 + { + XMFLOAT2A() = default; + + XMFLOAT2A(const XMFLOAT2A&) = default; + XMFLOAT2A& operator=(const XMFLOAT2A&) = default; + + XMFLOAT2A(XMFLOAT2A&&) = default; + XMFLOAT2A& operator=(XMFLOAT2A&&) = default; + + constexpr XMFLOAT2A(float _x, float _y) noexcept : XMFLOAT2(_x, _y) {} + explicit XMFLOAT2A(_In_reads_(2) const float* pArray) noexcept : XMFLOAT2(pArray) {} + }; + + //------------------------------------------------------------------------------ + // 2D Vector; 32 bit signed integer components + struct XMINT2 + { + int32_t x; + int32_t y; + + XMINT2() = default; + + XMINT2(const XMINT2&) = default; + XMINT2& operator=(const XMINT2&) = default; + + XMINT2(XMINT2&&) = default; + XMINT2& operator=(XMINT2&&) = default; + + constexpr XMINT2(int32_t _x, int32_t _y) noexcept : x(_x), y(_y) {} + explicit XMINT2(_In_reads_(2) const int32_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + }; + + // 2D Vector; 32 bit unsigned integer components + struct XMUINT2 + { + uint32_t x; + uint32_t y; + + XMUINT2() = default; + + XMUINT2(const XMUINT2&) = default; + XMUINT2& operator=(const XMUINT2&) = default; + + XMUINT2(XMUINT2&&) = default; + XMUINT2& operator=(XMUINT2&&) = default; + + constexpr XMUINT2(uint32_t _x, uint32_t _y) noexcept : x(_x), y(_y) {} + explicit XMUINT2(_In_reads_(2) const uint32_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + }; + + //------------------------------------------------------------------------------ + // 3D Vector; 32 bit floating point components + struct XMFLOAT3 + { + float x; + float y; + float z; + + XMFLOAT3() = default; + + XMFLOAT3(const XMFLOAT3&) = default; + XMFLOAT3& operator=(const XMFLOAT3&) = default; + + XMFLOAT3(XMFLOAT3&&) = default; + XMFLOAT3& operator=(XMFLOAT3&&) = default; + + constexpr XMFLOAT3(float _x, float _y, float _z) noexcept : x(_x), y(_y), z(_z) {} + explicit XMFLOAT3(_In_reads_(3) const float* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} + }; + + // 3D Vector; 32 bit floating point components aligned on a 16 byte boundary + XM_ALIGNED_STRUCT(16) XMFLOAT3A : public XMFLOAT3 + { + XMFLOAT3A() = default; + + XMFLOAT3A(const XMFLOAT3A&) = default; + XMFLOAT3A& operator=(const XMFLOAT3A&) = default; + + XMFLOAT3A(XMFLOAT3A&&) = default; + XMFLOAT3A& operator=(XMFLOAT3A&&) = default; + + constexpr XMFLOAT3A(float _x, float _y, float _z) noexcept : XMFLOAT3(_x, _y, _z) {} + explicit XMFLOAT3A(_In_reads_(3) const float* pArray) noexcept : XMFLOAT3(pArray) {} + }; + + //------------------------------------------------------------------------------ + // 3D Vector; 32 bit signed integer components + struct XMINT3 + { + int32_t x; + int32_t y; + int32_t z; + + XMINT3() = default; + + XMINT3(const XMINT3&) = default; + XMINT3& operator=(const XMINT3&) = default; + + XMINT3(XMINT3&&) = default; + XMINT3& operator=(XMINT3&&) = default; + + constexpr XMINT3(int32_t _x, int32_t _y, int32_t _z) noexcept : x(_x), y(_y), z(_z) {} + explicit XMINT3(_In_reads_(3) const int32_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} + }; + + // 3D Vector; 32 bit unsigned integer components + struct XMUINT3 + { + uint32_t x; + uint32_t y; + uint32_t z; + + XMUINT3() = default; + + XMUINT3(const XMUINT3&) = default; + XMUINT3& operator=(const XMUINT3&) = default; + + XMUINT3(XMUINT3&&) = default; + XMUINT3& operator=(XMUINT3&&) = default; + + constexpr XMUINT3(uint32_t _x, uint32_t _y, uint32_t _z) noexcept : x(_x), y(_y), z(_z) {} + explicit XMUINT3(_In_reads_(3) const uint32_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} + }; + + //------------------------------------------------------------------------------ + // 4D Vector; 32 bit floating point components + struct XMFLOAT4 + { + float x; + float y; + float z; + float w; + + XMFLOAT4() = default; + + XMFLOAT4(const XMFLOAT4&) = default; + XMFLOAT4& operator=(const XMFLOAT4&) = default; + + XMFLOAT4(XMFLOAT4&&) = default; + XMFLOAT4& operator=(XMFLOAT4&&) = default; + + constexpr XMFLOAT4(float _x, float _y, float _z, float _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMFLOAT4(_In_reads_(4) const float* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + }; + + // 4D Vector; 32 bit floating point components aligned on a 16 byte boundary + XM_ALIGNED_STRUCT(16) XMFLOAT4A : public XMFLOAT4 + { + XMFLOAT4A() = default; + + XMFLOAT4A(const XMFLOAT4A&) = default; + XMFLOAT4A& operator=(const XMFLOAT4A&) = default; + + XMFLOAT4A(XMFLOAT4A&&) = default; + XMFLOAT4A& operator=(XMFLOAT4A&&) = default; + + constexpr XMFLOAT4A(float _x, float _y, float _z, float _w) noexcept : XMFLOAT4(_x, _y, _z, _w) {} + explicit XMFLOAT4A(_In_reads_(4) const float* pArray) noexcept : XMFLOAT4(pArray) {} + }; + + //------------------------------------------------------------------------------ + // 4D Vector; 32 bit signed integer components + struct XMINT4 + { + int32_t x; + int32_t y; + int32_t z; + int32_t w; + + XMINT4() = default; + + XMINT4(const XMINT4&) = default; + XMINT4& operator=(const XMINT4&) = default; + + XMINT4(XMINT4&&) = default; + XMINT4& operator=(XMINT4&&) = default; + + constexpr XMINT4(int32_t _x, int32_t _y, int32_t _z, int32_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMINT4(_In_reads_(4) const int32_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + }; + + // 4D Vector; 32 bit unsigned integer components + struct XMUINT4 + { + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t w; + + XMUINT4() = default; + + XMUINT4(const XMUINT4&) = default; + XMUINT4& operator=(const XMUINT4&) = default; + + XMUINT4(XMUINT4&&) = default; + XMUINT4& operator=(XMUINT4&&) = default; + + constexpr XMUINT4(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMUINT4(_In_reads_(4) const uint32_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + }; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" +#endif + + //------------------------------------------------------------------------------ + // 3x3 Matrix: 32 bit floating point components + struct XMFLOAT3X3 + { + union + { + struct + { + float _11, _12, _13; + float _21, _22, _23; + float _31, _32, _33; + }; + float m[3][3]; + }; + + XMFLOAT3X3() = default; + + XMFLOAT3X3(const XMFLOAT3X3&) = default; + XMFLOAT3X3& operator=(const XMFLOAT3X3&) = default; + + XMFLOAT3X3(XMFLOAT3X3&&) = default; + XMFLOAT3X3& operator=(XMFLOAT3X3&&) = default; + + constexpr XMFLOAT3X3(float m00, float m01, float m02, + float m10, float m11, float m12, + float m20, float m21, float m22) noexcept + : _11(m00), _12(m01), _13(m02), + _21(m10), _22(m11), _23(m12), + _31(m20), _32(m21), _33(m22) {} + explicit XMFLOAT3X3(_In_reads_(9) const float* pArray) noexcept; + + float operator() (size_t Row, size_t Column) const noexcept { return m[Row][Column]; } + float& operator() (size_t Row, size_t Column) noexcept { return m[Row][Column]; } + }; + + //------------------------------------------------------------------------------ + // 4x3 Row-major Matrix: 32 bit floating point components + struct XMFLOAT4X3 + { + union + { + struct + { + float _11, _12, _13; + float _21, _22, _23; + float _31, _32, _33; + float _41, _42, _43; + }; + float m[4][3]; + float f[12]; + }; + + XMFLOAT4X3() = default; + + XMFLOAT4X3(const XMFLOAT4X3&) = default; + XMFLOAT4X3& operator=(const XMFLOAT4X3&) = default; + + XMFLOAT4X3(XMFLOAT4X3&&) = default; + XMFLOAT4X3& operator=(XMFLOAT4X3&&) = default; + + constexpr XMFLOAT4X3(float m00, float m01, float m02, + float m10, float m11, float m12, + float m20, float m21, float m22, + float m30, float m31, float m32) noexcept + : _11(m00), _12(m01), _13(m02), + _21(m10), _22(m11), _23(m12), + _31(m20), _32(m21), _33(m22), + _41(m30), _42(m31), _43(m32) {} + explicit XMFLOAT4X3(_In_reads_(12) const float* pArray) noexcept; + + float operator() (size_t Row, size_t Column) const noexcept { return m[Row][Column]; } + float& operator() (size_t Row, size_t Column) noexcept { return m[Row][Column]; } + }; + + // 4x3 Row-major Matrix: 32 bit floating point components aligned on a 16 byte boundary + XM_ALIGNED_STRUCT(16) XMFLOAT4X3A : public XMFLOAT4X3 + { + XMFLOAT4X3A() = default; + + XMFLOAT4X3A(const XMFLOAT4X3A&) = default; + XMFLOAT4X3A& operator=(const XMFLOAT4X3A&) = default; + + XMFLOAT4X3A(XMFLOAT4X3A&&) = default; + XMFLOAT4X3A& operator=(XMFLOAT4X3A&&) = default; + + constexpr XMFLOAT4X3A(float m00, float m01, float m02, + float m10, float m11, float m12, + float m20, float m21, float m22, + float m30, float m31, float m32) noexcept : + XMFLOAT4X3(m00, m01, m02, m10, m11, m12, m20, m21, m22, m30, m31, m32) {} + explicit XMFLOAT4X3A(_In_reads_(12) const float* pArray) noexcept : XMFLOAT4X3(pArray) {} + }; + + //------------------------------------------------------------------------------ + // 3x4 Column-major Matrix: 32 bit floating point components + struct XMFLOAT3X4 + { + union + { + struct + { + float _11, _12, _13, _14; + float _21, _22, _23, _24; + float _31, _32, _33, _34; + }; + float m[3][4]; + float f[12]; + }; + + XMFLOAT3X4() = default; + + XMFLOAT3X4(const XMFLOAT3X4&) = default; + XMFLOAT3X4& operator=(const XMFLOAT3X4&) = default; + + XMFLOAT3X4(XMFLOAT3X4&&) = default; + XMFLOAT3X4& operator=(XMFLOAT3X4&&) = default; + + constexpr XMFLOAT3X4(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23) noexcept + : _11(m00), _12(m01), _13(m02), _14(m03), + _21(m10), _22(m11), _23(m12), _24(m13), + _31(m20), _32(m21), _33(m22), _34(m23) {} + explicit XMFLOAT3X4(_In_reads_(12) const float* pArray) noexcept; + + float operator() (size_t Row, size_t Column) const noexcept { return m[Row][Column]; } + float& operator() (size_t Row, size_t Column) noexcept { return m[Row][Column]; } + }; + + // 3x4 Column-major Matrix: 32 bit floating point components aligned on a 16 byte boundary + XM_ALIGNED_STRUCT(16) XMFLOAT3X4A : public XMFLOAT3X4 + { + XMFLOAT3X4A() = default; + + XMFLOAT3X4A(const XMFLOAT3X4A&) = default; + XMFLOAT3X4A& operator=(const XMFLOAT3X4A&) = default; + + XMFLOAT3X4A(XMFLOAT3X4A&&) = default; + XMFLOAT3X4A& operator=(XMFLOAT3X4A&&) = default; + + constexpr XMFLOAT3X4A(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23) noexcept : + XMFLOAT3X4(m00, m01, m02, m03, m10, m11, m12, m13, m20, m21, m22, m23) {} + explicit XMFLOAT3X4A(_In_reads_(12) const float* pArray) noexcept : XMFLOAT3X4(pArray) {} + }; + + //------------------------------------------------------------------------------ + // 4x4 Matrix: 32 bit floating point components + struct XMFLOAT4X4 + { + union + { + struct + { + float _11, _12, _13, _14; + float _21, _22, _23, _24; + float _31, _32, _33, _34; + float _41, _42, _43, _44; + }; + float m[4][4]; + }; + + XMFLOAT4X4() = default; + + XMFLOAT4X4(const XMFLOAT4X4&) = default; + XMFLOAT4X4& operator=(const XMFLOAT4X4&) = default; + + XMFLOAT4X4(XMFLOAT4X4&&) = default; + XMFLOAT4X4& operator=(XMFLOAT4X4&&) = default; + + constexpr XMFLOAT4X4(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33) noexcept + : _11(m00), _12(m01), _13(m02), _14(m03), + _21(m10), _22(m11), _23(m12), _24(m13), + _31(m20), _32(m21), _33(m22), _34(m23), + _41(m30), _42(m31), _43(m32), _44(m33) {} + explicit XMFLOAT4X4(_In_reads_(16) const float* pArray) noexcept; + + float operator() (size_t Row, size_t Column) const noexcept { return m[Row][Column]; } + float& operator() (size_t Row, size_t Column) noexcept { return m[Row][Column]; } + }; + + // 4x4 Matrix: 32 bit floating point components aligned on a 16 byte boundary + XM_ALIGNED_STRUCT(16) XMFLOAT4X4A : public XMFLOAT4X4 + { + XMFLOAT4X4A() = default; + + XMFLOAT4X4A(const XMFLOAT4X4A&) = default; + XMFLOAT4X4A& operator=(const XMFLOAT4X4A&) = default; + + XMFLOAT4X4A(XMFLOAT4X4A&&) = default; + XMFLOAT4X4A& operator=(XMFLOAT4X4A&&) = default; + + constexpr XMFLOAT4X4A(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33) noexcept + : XMFLOAT4X4(m00, m01, m02, m03, m10, m11, m12, m13, m20, m21, m22, m23, m30, m31, m32, m33) {} + explicit XMFLOAT4X4A(_In_reads_(16) const float* pArray) noexcept : XMFLOAT4X4(pArray) {} + }; + + //////////////////////////////////////////////////////////////////////////////// + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +#pragma warning(pop) + +/**************************************************************************** + * + * Data conversion operations + * + ****************************************************************************/ + + XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat(FXMVECTOR VInt, uint32_t DivExponent) noexcept; + XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt(FXMVECTOR VFloat, uint32_t MulExponent) noexcept; + XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat(FXMVECTOR VUInt, uint32_t DivExponent) noexcept; + XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt(FXMVECTOR VFloat, uint32_t MulExponent) noexcept; + +#if defined(__XNAMATH_H__) && defined(XMVectorSetBinaryConstant) +#undef XMVectorSetBinaryConstant +#undef XMVectorSplatConstant +#undef XMVectorSplatConstantInt +#endif + + XMVECTOR XM_CALLCONV XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2, uint32_t C3) noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent) noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatConstantInt(int32_t IntConstant) noexcept; + + /**************************************************************************** + * + * Load operations + * + ****************************************************************************/ + + XMVECTOR XM_CALLCONV XMLoadInt(_In_ const uint32_t* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat(_In_ const float* pSource) noexcept; + + XMVECTOR XM_CALLCONV XMLoadInt2(_In_reads_(2) const uint32_t* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadInt2A(_In_reads_(2) const uint32_t* PSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat2(_In_ const XMFLOAT2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat2A(_In_ const XMFLOAT2A* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadSInt2(_In_ const XMINT2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUInt2(_In_ const XMUINT2* pSource) noexcept; + + XMVECTOR XM_CALLCONV XMLoadInt3(_In_reads_(3) const uint32_t* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadInt3A(_In_reads_(3) const uint32_t* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat3(_In_ const XMFLOAT3* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat3A(_In_ const XMFLOAT3A* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadSInt3(_In_ const XMINT3* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUInt3(_In_ const XMUINT3* pSource) noexcept; + + XMVECTOR XM_CALLCONV XMLoadInt4(_In_reads_(4) const uint32_t* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadInt4A(_In_reads_(4) const uint32_t* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat4(_In_ const XMFLOAT4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat4A(_In_ const XMFLOAT4A* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadSInt4(_In_ const XMINT4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUInt4(_In_ const XMUINT4* pSource) noexcept; + + XMMATRIX XM_CALLCONV XMLoadFloat3x3(_In_ const XMFLOAT3X3* pSource) noexcept; + XMMATRIX XM_CALLCONV XMLoadFloat4x3(_In_ const XMFLOAT4X3* pSource) noexcept; + XMMATRIX XM_CALLCONV XMLoadFloat4x3A(_In_ const XMFLOAT4X3A* pSource) noexcept; + XMMATRIX XM_CALLCONV XMLoadFloat3x4(_In_ const XMFLOAT3X4* pSource) noexcept; + XMMATRIX XM_CALLCONV XMLoadFloat3x4A(_In_ const XMFLOAT3X4A* pSource) noexcept; + XMMATRIX XM_CALLCONV XMLoadFloat4x4(_In_ const XMFLOAT4X4* pSource) noexcept; + XMMATRIX XM_CALLCONV XMLoadFloat4x4A(_In_ const XMFLOAT4X4A* pSource) noexcept; + + /**************************************************************************** + * + * Store operations + * + ****************************************************************************/ + + void XM_CALLCONV XMStoreInt(_Out_ uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat(_Out_ float* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreInt2(_Out_writes_(2) uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreInt2A(_Out_writes_(2) uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat2(_Out_ XMFLOAT2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat2A(_Out_ XMFLOAT2A* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreSInt2(_Out_ XMINT2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUInt2(_Out_ XMUINT2* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreInt3(_Out_writes_(3) uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreInt3A(_Out_writes_(3) uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat3(_Out_ XMFLOAT3* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat3A(_Out_ XMFLOAT3A* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreSInt3(_Out_ XMINT3* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUInt3(_Out_ XMUINT3* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreInt4(_Out_writes_(4) uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreInt4A(_Out_writes_(4) uint32_t* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat4(_Out_ XMFLOAT4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat4A(_Out_ XMFLOAT4A* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreSInt4(_Out_ XMINT4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUInt4(_Out_ XMUINT4* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreFloat3x3(_Out_ XMFLOAT3X3* pDestination, _In_ FXMMATRIX M) noexcept; + void XM_CALLCONV XMStoreFloat4x3(_Out_ XMFLOAT4X3* pDestination, _In_ FXMMATRIX M) noexcept; + void XM_CALLCONV XMStoreFloat4x3A(_Out_ XMFLOAT4X3A* pDestination, _In_ FXMMATRIX M) noexcept; + void XM_CALLCONV XMStoreFloat3x4(_Out_ XMFLOAT3X4* pDestination, _In_ FXMMATRIX M) noexcept; + void XM_CALLCONV XMStoreFloat3x4A(_Out_ XMFLOAT3X4A* pDestination, _In_ FXMMATRIX M) noexcept; + void XM_CALLCONV XMStoreFloat4x4(_Out_ XMFLOAT4X4* pDestination, _In_ FXMMATRIX M) noexcept; + void XM_CALLCONV XMStoreFloat4x4A(_Out_ XMFLOAT4X4A* pDestination, _In_ FXMMATRIX M) noexcept; + + /**************************************************************************** + * + * General vector operations + * + ****************************************************************************/ + + XMVECTOR XM_CALLCONV XMVectorZero() noexcept; + XMVECTOR XM_CALLCONV XMVectorSet(float x, float y, float z, float w) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetInt(uint32_t x, uint32_t y, uint32_t z, uint32_t w) noexcept; + XMVECTOR XM_CALLCONV XMVectorReplicate(float Value) noexcept; + XMVECTOR XM_CALLCONV XMVectorReplicatePtr(_In_ const float* pValue) noexcept; + XMVECTOR XM_CALLCONV XMVectorReplicateInt(uint32_t Value) noexcept; + XMVECTOR XM_CALLCONV XMVectorReplicateIntPtr(_In_ const uint32_t* pValue) noexcept; + XMVECTOR XM_CALLCONV XMVectorTrueInt() noexcept; + XMVECTOR XM_CALLCONV XMVectorFalseInt() noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatX(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatY(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatZ(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatW(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatOne() noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatInfinity() noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatQNaN() noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatEpsilon() noexcept; + XMVECTOR XM_CALLCONV XMVectorSplatSignMask() noexcept; + + float XM_CALLCONV XMVectorGetByIndex(FXMVECTOR V, size_t i) noexcept; + float XM_CALLCONV XMVectorGetX(FXMVECTOR V) noexcept; + float XM_CALLCONV XMVectorGetY(FXMVECTOR V) noexcept; + float XM_CALLCONV XMVectorGetZ(FXMVECTOR V) noexcept; + float XM_CALLCONV XMVectorGetW(FXMVECTOR V) noexcept; + + void XM_CALLCONV XMVectorGetByIndexPtr(_Out_ float* f, _In_ FXMVECTOR V, _In_ size_t i) noexcept; + void XM_CALLCONV XMVectorGetXPtr(_Out_ float* x, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorGetYPtr(_Out_ float* y, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorGetZPtr(_Out_ float* z, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorGetWPtr(_Out_ float* w, _In_ FXMVECTOR V) noexcept; + + uint32_t XM_CALLCONV XMVectorGetIntByIndex(FXMVECTOR V, size_t i) noexcept; + uint32_t XM_CALLCONV XMVectorGetIntX(FXMVECTOR V) noexcept; + uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V) noexcept; + uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V) noexcept; + uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V) noexcept; + + void XM_CALLCONV XMVectorGetIntByIndexPtr(_Out_ uint32_t* x, _In_ FXMVECTOR V, _In_ size_t i) noexcept; + void XM_CALLCONV XMVectorGetIntXPtr(_Out_ uint32_t* x, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t* y, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t* z, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t* w, _In_ FXMVECTOR V) noexcept; + + XMVECTOR XM_CALLCONV XMVectorSetByIndex(FXMVECTOR V, float f, size_t i) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetX(FXMVECTOR V, float x) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w) noexcept; + + XMVECTOR XM_CALLCONV XMVectorSetByIndexPtr(_In_ FXMVECTOR V, _In_ const float* f, _In_ size_t i) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetXPtr(_In_ FXMVECTOR V, _In_ const float* x) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetYPtr(_In_ FXMVECTOR V, _In_ const float* y) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetZPtr(_In_ FXMVECTOR V, _In_ const float* z) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetWPtr(_In_ FXMVECTOR V, _In_ const float* w) noexcept; + + XMVECTOR XM_CALLCONV XMVectorSetIntByIndex(FXMVECTOR V, uint32_t x, size_t i) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntX(FXMVECTOR V, uint32_t x) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w) noexcept; + + XMVECTOR XM_CALLCONV XMVectorSetIntByIndexPtr(_In_ FXMVECTOR V, _In_ const uint32_t* x, _In_ size_t i) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntXPtr(_In_ FXMVECTOR V, _In_ const uint32_t* x) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntYPtr(_In_ FXMVECTOR V, _In_ const uint32_t* y) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntZPtr(_In_ FXMVECTOR V, _In_ const uint32_t* z) noexcept; + XMVECTOR XM_CALLCONV XMVectorSetIntWPtr(_In_ FXMVECTOR V, _In_ const uint32_t* w) noexcept; + +#if defined(__XNAMATH_H__) && defined(XMVectorSwizzle) +#undef XMVectorSwizzle +#endif + + XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept; + XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept; + XMVECTOR XM_CALLCONV XMVectorSelectControl(uint32_t VectorIndex0, uint32_t VectorIndex1, uint32_t VectorIndex2, uint32_t VectorIndex3) noexcept; + XMVECTOR XM_CALLCONV XMVectorSelect(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Control) noexcept; + XMVECTOR XM_CALLCONV XMVectorMergeXY(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorMergeZW(FXMVECTOR V1, FXMVECTOR V2) noexcept; + +#if defined(__XNAMATH_H__) && defined(XMVectorShiftLeft) +#undef XMVectorShiftLeft +#undef XMVectorRotateLeft +#undef XMVectorRotateRight +#undef XMVectorInsert +#endif + + XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements) noexcept; + XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements) noexcept; + XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements) noexcept; + XMVECTOR XM_CALLCONV XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS, uint32_t VSLeftRotateElements, + uint32_t Select0, uint32_t Select1, uint32_t Select2, uint32_t Select3) noexcept; + + XMVECTOR XM_CALLCONV XMVectorEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorEqualR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorEqualIntR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V, _In_ FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorNearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon) noexcept; + XMVECTOR XM_CALLCONV XMVectorNotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorNotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorGreater(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorGreaterR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorGreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorGreaterOrEqualR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorLess(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorLessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorInBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; + XMVECTOR XM_CALLCONV XMVectorInBoundsR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V, _In_ FXMVECTOR Bounds) noexcept; + + XMVECTOR XM_CALLCONV XMVectorIsNaN(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorIsInfinite(FXMVECTOR V) noexcept; + + XMVECTOR XM_CALLCONV XMVectorMin(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorMax(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorRound(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorTruncate(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorFloor(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorCeiling(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorClamp(FXMVECTOR V, FXMVECTOR Min, FXMVECTOR Max) noexcept; + XMVECTOR XM_CALLCONV XMVectorSaturate(FXMVECTOR V) noexcept; + + XMVECTOR XM_CALLCONV XMVectorAndInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorAndCInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorOrInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorNorInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorXorInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + + XMVECTOR XM_CALLCONV XMVectorNegate(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorAdd(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorSum(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorAddAngles(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorSubtract(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorSubtractAngles(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorMultiply(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorMultiplyAdd(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3) noexcept; + XMVECTOR XM_CALLCONV XMVectorDivide(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3) noexcept; + XMVECTOR XM_CALLCONV XMVectorScale(FXMVECTOR V, float ScaleFactor) noexcept; + XMVECTOR XM_CALLCONV XMVectorReciprocalEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorReciprocal(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSqrtEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSqrt(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorReciprocalSqrtEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorReciprocalSqrt(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorExp10(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorExpE(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorExp(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorLog2(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorLog10(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorLogE(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorLog(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorPow(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorAbs(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorMod(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVectorModAngles(FXMVECTOR Angles) noexcept; + XMVECTOR XM_CALLCONV XMVectorSin(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSinEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorCos(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorCosEst(FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorSinCos(_Out_ XMVECTOR* pSin, _Out_ XMVECTOR* pCos, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMVectorSinCosEst(_Out_ XMVECTOR* pSin, _Out_ XMVECTOR* pCos, _In_ FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorTan(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorTanEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorSinH(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorCosH(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorTanH(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorASin(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorASinEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorACos(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorACosEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorATan(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorATanEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVectorATan2(FXMVECTOR Y, FXMVECTOR X) noexcept; + XMVECTOR XM_CALLCONV XMVectorATan2Est(FXMVECTOR Y, FXMVECTOR X) noexcept; + XMVECTOR XM_CALLCONV XMVectorLerp(FXMVECTOR V0, FXMVECTOR V1, float t) noexcept; + XMVECTOR XM_CALLCONV XMVectorLerpV(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR T) noexcept; + XMVECTOR XM_CALLCONV XMVectorHermite(FXMVECTOR Position0, FXMVECTOR Tangent0, FXMVECTOR Position1, GXMVECTOR Tangent1, float t) noexcept; + XMVECTOR XM_CALLCONV XMVectorHermiteV(FXMVECTOR Position0, FXMVECTOR Tangent0, FXMVECTOR Position1, GXMVECTOR Tangent1, HXMVECTOR T) noexcept; + XMVECTOR XM_CALLCONV XMVectorCatmullRom(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, GXMVECTOR Position3, float t) noexcept; + XMVECTOR XM_CALLCONV XMVectorCatmullRomV(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, GXMVECTOR Position3, HXMVECTOR T) noexcept; + XMVECTOR XM_CALLCONV XMVectorBaryCentric(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, float f, float g) noexcept; + XMVECTOR XM_CALLCONV XMVectorBaryCentricV(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, GXMVECTOR F, HXMVECTOR G) noexcept; + + /**************************************************************************** + * + * 2D vector operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMVector2Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector2EqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector2EqualIntR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2NearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon) noexcept; + bool XM_CALLCONV XMVector2NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2NotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector2GreaterR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector2GreaterOrEqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2Less(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2LessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector2InBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; + + bool XM_CALLCONV XMVector2IsNaN(FXMVECTOR V) noexcept; + bool XM_CALLCONV XMVector2IsInfinite(FXMVECTOR V) noexcept; + + XMVECTOR XM_CALLCONV XMVector2Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector2Cross(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector2LengthSq(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2ReciprocalLength(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2LengthEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2Length(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2NormalizeEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2Normalize(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2ClampLength(FXMVECTOR V, float LengthMin, float LengthMax) noexcept; + XMVECTOR XM_CALLCONV XMVector2ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax) noexcept; + XMVECTOR XM_CALLCONV XMVector2Reflect(FXMVECTOR Incident, FXMVECTOR Normal) noexcept; + XMVECTOR XM_CALLCONV XMVector2Refract(FXMVECTOR Incident, FXMVECTOR Normal, float RefractionIndex) noexcept; + XMVECTOR XM_CALLCONV XMVector2RefractV(FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex) noexcept; + XMVECTOR XM_CALLCONV XMVector2Orthogonal(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector2AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2) noexcept; + XMVECTOR XM_CALLCONV XMVector2AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2) noexcept; + XMVECTOR XM_CALLCONV XMVector2AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector2LinePointDistance(FXMVECTOR LinePoint1, FXMVECTOR LinePoint2, FXMVECTOR Point) noexcept; + XMVECTOR XM_CALLCONV XMVector2IntersectLine(FXMVECTOR Line1Point1, FXMVECTOR Line1Point2, FXMVECTOR Line2Point1, GXMVECTOR Line2Point2) noexcept; + XMVECTOR XM_CALLCONV XMVector2Transform(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT4* XM_CALLCONV XMVector2TransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (VectorCount - 1)) XMFLOAT4* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT2) + InputStride * (VectorCount - 1)) const XMFLOAT2* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + XMVECTOR XM_CALLCONV XMVector2TransformCoord(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream(_Out_writes_bytes_(sizeof(XMFLOAT2) + OutputStride * (VectorCount - 1)) XMFLOAT2* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT2) + InputStride * (VectorCount - 1)) const XMFLOAT2* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + XMVECTOR XM_CALLCONV XMVector2TransformNormal(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT2* XM_CALLCONV XMVector2TransformNormalStream(_Out_writes_bytes_(sizeof(XMFLOAT2) + OutputStride * (VectorCount - 1)) XMFLOAT2* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT2) + InputStride * (VectorCount - 1)) const XMFLOAT2* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + + /**************************************************************************** + * + * 3D vector operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMVector3Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector3EqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector3EqualIntR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3NearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon) noexcept; + bool XM_CALLCONV XMVector3NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3NotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector3GreaterR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector3GreaterOrEqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3Less(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3LessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector3InBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; + + bool XM_CALLCONV XMVector3IsNaN(FXMVECTOR V) noexcept; + bool XM_CALLCONV XMVector3IsInfinite(FXMVECTOR V) noexcept; + + XMVECTOR XM_CALLCONV XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector3Cross(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector3LengthSq(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3ReciprocalLength(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3LengthEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3Length(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3NormalizeEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3Normalize(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3ClampLength(FXMVECTOR V, float LengthMin, float LengthMax) noexcept; + XMVECTOR XM_CALLCONV XMVector3ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax) noexcept; + XMVECTOR XM_CALLCONV XMVector3Reflect(FXMVECTOR Incident, FXMVECTOR Normal) noexcept; + XMVECTOR XM_CALLCONV XMVector3Refract(FXMVECTOR Incident, FXMVECTOR Normal, float RefractionIndex) noexcept; + XMVECTOR XM_CALLCONV XMVector3RefractV(FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex) noexcept; + XMVECTOR XM_CALLCONV XMVector3Orthogonal(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector3AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2) noexcept; + XMVECTOR XM_CALLCONV XMVector3AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2) noexcept; + XMVECTOR XM_CALLCONV XMVector3AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector3LinePointDistance(FXMVECTOR LinePoint1, FXMVECTOR LinePoint2, FXMVECTOR Point) noexcept; + void XM_CALLCONV XMVector3ComponentsFromNormal(_Out_ XMVECTOR* pParallel, _Out_ XMVECTOR* pPerpendicular, _In_ FXMVECTOR V, _In_ FXMVECTOR Normal) noexcept; + XMVECTOR XM_CALLCONV XMVector3Rotate(FXMVECTOR V, FXMVECTOR RotationQuaternion) noexcept; + XMVECTOR XM_CALLCONV XMVector3InverseRotate(FXMVECTOR V, FXMVECTOR RotationQuaternion) noexcept; + XMVECTOR XM_CALLCONV XMVector3Transform(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT4* XM_CALLCONV XMVector3TransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (VectorCount - 1)) XMFLOAT4* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) const XMFLOAT3* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + XMVECTOR XM_CALLCONV XMVector3TransformCoord(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream(_Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) XMFLOAT3* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) const XMFLOAT3* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + XMVECTOR XM_CALLCONV XMVector3TransformNormal(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT3* XM_CALLCONV XMVector3TransformNormalStream(_Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) XMFLOAT3* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) const XMFLOAT3* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + XMVECTOR XM_CALLCONV XMVector3Project(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ, + FXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World) noexcept; + XMFLOAT3* XM_CALLCONV XMVector3ProjectStream(_Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) XMFLOAT3* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) const XMFLOAT3* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, + _In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ, + _In_ FXMMATRIX Projection, _In_ CXMMATRIX View, _In_ CXMMATRIX World) noexcept; + XMVECTOR XM_CALLCONV XMVector3Unproject(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ, + FXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World) noexcept; + XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream(_Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) XMFLOAT3* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) const XMFLOAT3* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, + _In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ, + _In_ FXMMATRIX Projection, _In_ CXMMATRIX View, _In_ CXMMATRIX World) noexcept; + + /**************************************************************************** + * + * 4D vector operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMVector4Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector4EqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector4EqualIntR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4NearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon) noexcept; + bool XM_CALLCONV XMVector4NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4NotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector4GreaterR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + uint32_t XM_CALLCONV XMVector4GreaterOrEqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4Less(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4LessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; + bool XM_CALLCONV XMVector4InBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; + + bool XM_CALLCONV XMVector4IsNaN(FXMVECTOR V) noexcept; + bool XM_CALLCONV XMVector4IsInfinite(FXMVECTOR V) noexcept; + + XMVECTOR XM_CALLCONV XMVector4Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector4Cross(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3) noexcept; + XMVECTOR XM_CALLCONV XMVector4LengthSq(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4ReciprocalLength(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4LengthEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4Length(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4NormalizeEst(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4Normalize(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4ClampLength(FXMVECTOR V, float LengthMin, float LengthMax) noexcept; + XMVECTOR XM_CALLCONV XMVector4ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax) noexcept; + XMVECTOR XM_CALLCONV XMVector4Reflect(FXMVECTOR Incident, FXMVECTOR Normal) noexcept; + XMVECTOR XM_CALLCONV XMVector4Refract(FXMVECTOR Incident, FXMVECTOR Normal, float RefractionIndex) noexcept; + XMVECTOR XM_CALLCONV XMVector4RefractV(FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex) noexcept; + XMVECTOR XM_CALLCONV XMVector4Orthogonal(FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMVector4AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2) noexcept; + XMVECTOR XM_CALLCONV XMVector4AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2) noexcept; + XMVECTOR XM_CALLCONV XMVector4AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMVector4Transform(FXMVECTOR V, FXMMATRIX M) noexcept; + XMFLOAT4* XM_CALLCONV XMVector4TransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (VectorCount - 1)) XMFLOAT4* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT4) + InputStride * (VectorCount - 1)) const XMFLOAT4* pInputStream, + _In_ size_t InputStride, _In_ size_t VectorCount, _In_ FXMMATRIX M) noexcept; + + /**************************************************************************** + * + * Matrix operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMMatrixIsNaN(FXMMATRIX M) noexcept; + bool XM_CALLCONV XMMatrixIsInfinite(FXMMATRIX M) noexcept; + bool XM_CALLCONV XMMatrixIsIdentity(FXMMATRIX M) noexcept; + + XMMATRIX XM_CALLCONV XMMatrixMultiply(FXMMATRIX M1, CXMMATRIX M2) noexcept; + XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose(FXMMATRIX M1, CXMMATRIX M2) noexcept; + XMMATRIX XM_CALLCONV XMMatrixTranspose(FXMMATRIX M) noexcept; + XMMATRIX XM_CALLCONV XMMatrixInverse(_Out_opt_ XMVECTOR* pDeterminant, _In_ FXMMATRIX M) noexcept; + XMMATRIX XM_CALLCONV XMMatrixVectorTensorProduct(FXMVECTOR V1, FXMVECTOR V2) noexcept; + XMVECTOR XM_CALLCONV XMMatrixDeterminant(FXMMATRIX M) noexcept; + + _Success_(return) + bool XM_CALLCONV XMMatrixDecompose(_Out_ XMVECTOR* outScale, _Out_ XMVECTOR* outRotQuat, _Out_ XMVECTOR* outTrans, _In_ FXMMATRIX M) noexcept; + + XMMATRIX XM_CALLCONV XMMatrixIdentity() noexcept; + XMMATRIX XM_CALLCONV XMMatrixSet(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33) noexcept; + XMMATRIX XM_CALLCONV XMMatrixTranslation(float OffsetX, float OffsetY, float OffsetZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector(FXMVECTOR Offset) noexcept; + XMMATRIX XM_CALLCONV XMMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixScalingFromVector(FXMVECTOR Scale) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationX(float Angle) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationY(float Angle) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationZ(float Angle) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw(float Pitch, float Yaw, float Roll) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector(FXMVECTOR Angles) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationNormal(FXMVECTOR NormalAxis, float Angle) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationAxis(FXMVECTOR Axis, float Angle) noexcept; + XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion(FXMVECTOR Quaternion) noexcept; + XMMATRIX XM_CALLCONV XMMatrixTransformation2D(FXMVECTOR ScalingOrigin, float ScalingOrientation, FXMVECTOR Scaling, + FXMVECTOR RotationOrigin, float Rotation, GXMVECTOR Translation) noexcept; + XMMATRIX XM_CALLCONV XMMatrixTransformation(FXMVECTOR ScalingOrigin, FXMVECTOR ScalingOrientationQuaternion, FXMVECTOR Scaling, + GXMVECTOR RotationOrigin, HXMVECTOR RotationQuaternion, HXMVECTOR Translation) noexcept; + XMMATRIX XM_CALLCONV XMMatrixAffineTransformation2D(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, float Rotation, FXMVECTOR Translation) noexcept; + XMMATRIX XM_CALLCONV XMMatrixAffineTransformation(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, FXMVECTOR RotationQuaternion, GXMVECTOR Translation) noexcept; + XMMATRIX XM_CALLCONV XMMatrixReflect(FXMVECTOR ReflectionPlane) noexcept; + XMMATRIX XM_CALLCONV XMMatrixShadow(FXMVECTOR ShadowPlane, FXMVECTOR LightPosition) noexcept; + + XMMATRIX XM_CALLCONV XMMatrixLookAtLH(FXMVECTOR EyePosition, FXMVECTOR FocusPosition, FXMVECTOR UpDirection) noexcept; + XMMATRIX XM_CALLCONV XMMatrixLookAtRH(FXMVECTOR EyePosition, FXMVECTOR FocusPosition, FXMVECTOR UpDirection) noexcept; + XMMATRIX XM_CALLCONV XMMatrixLookToLH(FXMVECTOR EyePosition, FXMVECTOR EyeDirection, FXMVECTOR UpDirection) noexcept; + XMMATRIX XM_CALLCONV XMMatrixLookToRH(FXMVECTOR EyePosition, FXMVECTOR EyeDirection, FXMVECTOR UpDirection) noexcept; + XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH(float ViewWidth, float ViewHeight, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH(float ViewWidth, float ViewHeight, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH(float FovAngleY, float AspectRatio, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH(float FovAngleY, float AspectRatio, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixOrthographicLH(float ViewWidth, float ViewHeight, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixOrthographicRH(float ViewWidth, float ViewHeight, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ) noexcept; + XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ) noexcept; + + + /**************************************************************************** + * + * Quaternion operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMQuaternionEqual(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; + bool XM_CALLCONV XMQuaternionNotEqual(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; + + bool XM_CALLCONV XMQuaternionIsNaN(FXMVECTOR Q) noexcept; + bool XM_CALLCONV XMQuaternionIsInfinite(FXMVECTOR Q) noexcept; + bool XM_CALLCONV XMQuaternionIsIdentity(FXMVECTOR Q) noexcept; + + XMVECTOR XM_CALLCONV XMQuaternionDot(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionMultiply(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionLengthSq(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionLength(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionNormalize(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionConjugate(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionInverse(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionLn(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionExp(FXMVECTOR Q) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionSlerp(FXMVECTOR Q0, FXMVECTOR Q1, float t) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionSlerpV(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR T) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionSquad(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3, float t) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionSquadV(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3, HXMVECTOR T) noexcept; + void XM_CALLCONV XMQuaternionSquadSetup(_Out_ XMVECTOR* pA, _Out_ XMVECTOR* pB, _Out_ XMVECTOR* pC, _In_ FXMVECTOR Q0, _In_ FXMVECTOR Q1, _In_ FXMVECTOR Q2, _In_ GXMVECTOR Q3) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionBaryCentric(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, float f, float g) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR F, HXMVECTOR G) noexcept; + + XMVECTOR XM_CALLCONV XMQuaternionIdentity() noexcept; + XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw(float Pitch, float Yaw, float Roll) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector(FXMVECTOR Angles) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionRotationNormal(FXMVECTOR NormalAxis, float Angle) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionRotationAxis(FXMVECTOR Axis, float Angle) noexcept; + XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix(FXMMATRIX M) noexcept; + + void XM_CALLCONV XMQuaternionToAxisAngle(_Out_ XMVECTOR* pAxis, _Out_ float* pAngle, _In_ FXMVECTOR Q) noexcept; + + /**************************************************************************** + * + * Plane operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMPlaneEqual(FXMVECTOR P1, FXMVECTOR P2) noexcept; + bool XM_CALLCONV XMPlaneNearEqual(FXMVECTOR P1, FXMVECTOR P2, FXMVECTOR Epsilon) noexcept; + bool XM_CALLCONV XMPlaneNotEqual(FXMVECTOR P1, FXMVECTOR P2) noexcept; + + bool XM_CALLCONV XMPlaneIsNaN(FXMVECTOR P) noexcept; + bool XM_CALLCONV XMPlaneIsInfinite(FXMVECTOR P) noexcept; + + XMVECTOR XM_CALLCONV XMPlaneDot(FXMVECTOR P, FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMPlaneDotCoord(FXMVECTOR P, FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMPlaneDotNormal(FXMVECTOR P, FXMVECTOR V) noexcept; + XMVECTOR XM_CALLCONV XMPlaneNormalizeEst(FXMVECTOR P) noexcept; + XMVECTOR XM_CALLCONV XMPlaneNormalize(FXMVECTOR P) noexcept; + XMVECTOR XM_CALLCONV XMPlaneIntersectLine(FXMVECTOR P, FXMVECTOR LinePoint1, FXMVECTOR LinePoint2) noexcept; + void XM_CALLCONV XMPlaneIntersectPlane(_Out_ XMVECTOR* pLinePoint1, _Out_ XMVECTOR* pLinePoint2, _In_ FXMVECTOR P1, _In_ FXMVECTOR P2) noexcept; + XMVECTOR XM_CALLCONV XMPlaneTransform(FXMVECTOR P, FXMMATRIX M) noexcept; + XMFLOAT4* XM_CALLCONV XMPlaneTransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (PlaneCount - 1)) XMFLOAT4* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(XMFLOAT4) + InputStride * (PlaneCount - 1)) const XMFLOAT4* pInputStream, + _In_ size_t InputStride, _In_ size_t PlaneCount, _In_ FXMMATRIX M) noexcept; + + XMVECTOR XM_CALLCONV XMPlaneFromPointNormal(FXMVECTOR Point, FXMVECTOR Normal) noexcept; + XMVECTOR XM_CALLCONV XMPlaneFromPoints(FXMVECTOR Point1, FXMVECTOR Point2, FXMVECTOR Point3) noexcept; + + /**************************************************************************** + * + * Color operations + * + ****************************************************************************/ + + bool XM_CALLCONV XMColorEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; + bool XM_CALLCONV XMColorNotEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; + bool XM_CALLCONV XMColorGreater(FXMVECTOR C1, FXMVECTOR C2) noexcept; + bool XM_CALLCONV XMColorGreaterOrEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; + bool XM_CALLCONV XMColorLess(FXMVECTOR C1, FXMVECTOR C2) noexcept; + bool XM_CALLCONV XMColorLessOrEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; + + bool XM_CALLCONV XMColorIsNaN(FXMVECTOR C) noexcept; + bool XM_CALLCONV XMColorIsInfinite(FXMVECTOR C) noexcept; + + XMVECTOR XM_CALLCONV XMColorNegative(FXMVECTOR C) noexcept; + XMVECTOR XM_CALLCONV XMColorModulate(FXMVECTOR C1, FXMVECTOR C2) noexcept; + XMVECTOR XM_CALLCONV XMColorAdjustSaturation(FXMVECTOR C, float Saturation) noexcept; + XMVECTOR XM_CALLCONV XMColorAdjustContrast(FXMVECTOR C, float Contrast) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToYUV_UHD(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorYUVToRGB_UHD(FXMVECTOR yuv) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz) noexcept; + + XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz) noexcept; + XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb) noexcept; + + XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb) noexcept; + XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb) noexcept; + + + /**************************************************************************** + * + * Miscellaneous operations + * + ****************************************************************************/ + + bool XMVerifyCPUSupport() noexcept; + + XMVECTOR XM_CALLCONV XMFresnelTerm(FXMVECTOR CosIncidentAngle, FXMVECTOR RefractionIndex) noexcept; + + bool XMScalarNearEqual(float S1, float S2, float Epsilon) noexcept; + float XMScalarModAngle(float Value) noexcept; + + float XMScalarSin(float Value) noexcept; + float XMScalarSinEst(float Value) noexcept; + + float XMScalarCos(float Value) noexcept; + float XMScalarCosEst(float Value) noexcept; + + void XMScalarSinCos(_Out_ float* pSin, _Out_ float* pCos, float Value) noexcept; + void XMScalarSinCosEst(_Out_ float* pSin, _Out_ float* pCos, float Value) noexcept; + + float XMScalarASin(float Value) noexcept; + float XMScalarASinEst(float Value) noexcept; + + float XMScalarACos(float Value) noexcept; + float XMScalarACosEst(float Value) noexcept; + + /**************************************************************************** + * + * Templates + * + ****************************************************************************/ + +#if defined(__XNAMATH_H__) && defined(XMMin) +#undef XMMin +#undef XMMax +#endif + + template inline T XMMin(T a, T b) noexcept { return (a < b) ? a : b; } + template inline T XMMax(T a, T b) noexcept { return (a > b) ? a : b; } + + //------------------------------------------------------------------------------ + +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + +// PermuteHelper internal template (SSE only) + namespace Internal + { + // Slow path fallback for permutes that do not map to a single SSE shuffle opcode. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) noexcept + { + static const XMVECTORU32 selectMask = + { { { + WhichX ? 0xFFFFFFFF : 0, + WhichY ? 0xFFFFFFFF : 0, + WhichZ ? 0xFFFFFFFF : 0, + WhichW ? 0xFFFFFFFF : 0, + } } }; + + XMVECTOR shuffled1 = XM_PERMUTE_PS(v1, Shuffle); + XMVECTOR shuffled2 = XM_PERMUTE_PS(v2, Shuffle); + + XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1); + XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2); + + return _mm_or_ps(masked1, masked2); + } + }; + + // Fast path for permutes that only read from the first vector. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR) noexcept { return XM_PERMUTE_PS(v1, Shuffle); } + }; + + // Fast path for permutes that only read from the second vector. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR, FXMVECTOR v2) noexcept { return XM_PERMUTE_PS(v2, Shuffle); } + }; + + // Fast path for permutes that read XY from the first vector, ZW from the second. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) noexcept { return _mm_shuffle_ps(v1, v2, Shuffle); } + }; + + // Fast path for permutes that read XY from the second vector, ZW from the first. + template struct PermuteHelper + { + static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) noexcept { return _mm_shuffle_ps(v2, v1, Shuffle); } + }; + } + +#endif // _XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_ + + // General permute template + template + inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2) noexcept + { + static_assert(PermuteX <= 7, "PermuteX template parameter out of range"); + static_assert(PermuteY <= 7, "PermuteY template parameter out of range"); + static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range"); + static_assert(PermuteW <= 7, "PermuteW template parameter out of range"); + +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + constexpr uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3); + + constexpr bool WhichX = PermuteX > 3; + constexpr bool WhichY = PermuteY > 3; + constexpr bool WhichZ = PermuteZ > 3; + constexpr bool WhichW = PermuteW > 3; + + return Internal::PermuteHelper::Permute(V1, V2); +#else + + return XMVectorPermute(V1, V2, PermuteX, PermuteY, PermuteZ, PermuteW); + +#endif + } + + // Special-case permute templates + template<> inline constexpr XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 2, 3>(FXMVECTOR V1, FXMVECTOR) noexcept { return V1; } + template<> inline constexpr XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 6, 7>(FXMVECTOR, FXMVECTOR V2) noexcept { return V2; } + +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 4, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_movelh_ps(V1, V2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<6, 7, 2, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_movehl_ps(V1, V2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 4, 1, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_unpacklo_ps(V1, V2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 6, 3, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_unpackhi_ps(V1, V2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(V1), _mm_castps_pd(V2))); } +#endif + +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 2, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x1); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 2, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 2, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x3); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 6, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x4); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 6, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x5); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 6, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x6); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 6, 3>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x7); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 2, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x8); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 2, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x9); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 2, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xA); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 2, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xB); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xC); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xD); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); } +#endif + +#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + + // If the indices are all in the range 0-3 or 4-7, then use XMVectorSwizzle instead + // The mirror cases are not spelled out here as the programmer can always swap the arguments + // (i.e. prefer permutes where the X element comes from the V1 vector instead of the V2 vector) + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 4, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_low_f32(V1), vget_low_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 4, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_low_f32(V1)), vget_low_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 5, 4>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_low_f32(V1), vrev64_f32(vget_low_f32(V2))); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 5, 4>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_low_f32(V1)), vrev64_f32(vget_low_f32(V2))); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_high_f32(V1), vget_high_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_high_f32(V1)), vget_high_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 7, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_high_f32(V1), vrev64_f32(vget_high_f32(V2))); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 7, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_high_f32(V1)), vrev64_f32(vget_high_f32(V2))); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_low_f32(V1), vget_high_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 6, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_low_f32(V1)), vget_high_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 7, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_low_f32(V1), vrev64_f32(vget_high_f32(V2))); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 7, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_low_f32(V1)), vrev64_f32(vget_high_f32(V2))); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 4, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_high_f32(V1)), vget_low_f32(V2)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 5, 4>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vget_high_f32(V1), vrev64_f32(vget_low_f32(V2))); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 5, 4>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vcombine_f32(vrev64_f32(vget_high_f32(V1)), vrev64_f32(vget_low_f32(V2))); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 4, 2, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vtrnq_f32(V1, V2).val[0]; } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 5, 3, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vtrnq_f32(V1, V2).val[1]; } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 4, 1, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vzipq_f32(V1, V2).val[0]; } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 6, 3, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vzipq_f32(V1, V2).val[1]; } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 2, 4, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vuzpq_f32(V1, V2).val[0]; } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 3, 5, 7>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vuzpq_f32(V1, V2).val[1]; } + + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 2, 3, 4>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vextq_f32(V1, V2, 1); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 4, 5>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vextq_f32(V1, V2, 2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 4, 5, 6>(FXMVECTOR V1, FXMVECTOR V2) noexcept { return vextq_f32(V1, V2, 3); } + +#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_ + + //------------------------------------------------------------------------------ + + // General swizzle template + template + inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V) noexcept + { + static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range"); + static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range"); + static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range"); + static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range"); + +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + return XM_PERMUTE_PS(V, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX)); +#else + + return XMVectorSwizzle(V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW); + +#endif + } + + // Specialized swizzles + template<> inline constexpr XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 2, 3>(FXMVECTOR V) noexcept { return V; } + +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 0, 1>(FXMVECTOR V) noexcept { return _mm_movelh_ps(V, V); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 2, 3>(FXMVECTOR V) noexcept { return _mm_movehl_ps(V, V); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 1, 1>(FXMVECTOR V) noexcept { return _mm_unpacklo_ps(V, V); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 2, 3, 3>(FXMVECTOR V) noexcept { return _mm_unpackhi_ps(V, V); } +#endif + +#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 2, 2>(FXMVECTOR V) noexcept { return _mm_moveldup_ps(V); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 1, 3, 3>(FXMVECTOR V) noexcept { return _mm_movehdup_ps(V); } +#endif + +#if defined(_XM_AVX2_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) && defined(_XM_FAVOR_INTEL_) + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 0, 0>(FXMVECTOR V) noexcept { return _mm_broadcastss_ps(V); } +#endif + +#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 0, 0>(FXMVECTOR V) noexcept { return vdupq_lane_f32(vget_low_f32(V), 0); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 1, 1, 1>(FXMVECTOR V) noexcept { return vdupq_lane_f32(vget_low_f32(V), 1); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 2, 2, 2>(FXMVECTOR V) noexcept { return vdupq_lane_f32(vget_high_f32(V), 0); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 3, 3, 3>(FXMVECTOR V) noexcept { return vdupq_lane_f32(vget_high_f32(V), 1); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 0, 3, 2>(FXMVECTOR V) noexcept { return vrev64q_f32(V); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 0, 1>(FXMVECTOR V) noexcept { float32x2_t vt = vget_low_f32(V); return vcombine_f32(vt, vt); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 2, 3>(FXMVECTOR V) noexcept { float32x2_t vt = vget_high_f32(V); return vcombine_f32(vt, vt); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 0, 1, 0>(FXMVECTOR V) noexcept { float32x2_t vt = vrev64_f32(vget_low_f32(V)); return vcombine_f32(vt, vt); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 2, 3, 2>(FXMVECTOR V) noexcept { float32x2_t vt = vrev64_f32(vget_high_f32(V)); return vcombine_f32(vt, vt); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 3, 2>(FXMVECTOR V) noexcept { return vcombine_f32(vget_low_f32(V), vrev64_f32(vget_high_f32(V))); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 0, 2, 3>(FXMVECTOR V) noexcept { return vcombine_f32(vrev64_f32(vget_low_f32(V)), vget_high_f32(V)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 1, 0>(FXMVECTOR V) noexcept { return vcombine_f32(vget_high_f32(V), vrev64_f32(vget_low_f32(V))); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 2, 0, 1>(FXMVECTOR V) noexcept { return vcombine_f32(vrev64_f32(vget_high_f32(V)), vget_low_f32(V)); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 2, 1, 0>(FXMVECTOR V) noexcept { return vcombine_f32(vrev64_f32(vget_high_f32(V)), vrev64_f32(vget_low_f32(V))); } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 2, 2>(FXMVECTOR V) noexcept { return vtrnq_f32(V, V).val[0]; } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 1, 3, 3>(FXMVECTOR V) noexcept { return vtrnq_f32(V, V).val[1]; } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 1, 1>(FXMVECTOR V) noexcept { return vzipq_f32(V, V).val[0]; } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 2, 3, 3>(FXMVECTOR V) noexcept { return vzipq_f32(V, V).val[1]; } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 2, 0, 2>(FXMVECTOR V) noexcept { return vuzpq_f32(V, V).val[0]; } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 3, 1, 3>(FXMVECTOR V) noexcept { return vuzpq_f32(V, V).val[1]; } + + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 2, 3, 0>(FXMVECTOR V) noexcept { return vextq_f32(V, V, 1); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 0, 1>(FXMVECTOR V) noexcept { return vextq_f32(V, V, 2); } + template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 0, 1, 2>(FXMVECTOR V) noexcept { return vextq_f32(V, V, 3); } + +#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_ + + //------------------------------------------------------------------------------ + + template + inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2) noexcept + { + static_assert(Elements < 4, "Elements template parameter out of range"); + return XMVectorPermute(V1, V2); + } + + template + inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V) noexcept + { + static_assert(Elements < 4, "Elements template parameter out of range"); + return XMVectorSwizzle(V); + } + + template + inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V) noexcept + { + static_assert(Elements < 4, "Elements template parameter out of range"); + return XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V); + } + + template + inline XMVECTOR XM_CALLCONV XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS) noexcept + { + XMVECTOR Control = XMVectorSelectControl(Select0 & 1, Select1 & 1, Select2 & 1, Select3 & 1); + return XMVectorSelect(VD, XMVectorRotateLeft(VS), Control); + } + + /**************************************************************************** + * + * Globals + * + ****************************************************************************/ + + // The purpose of the following global constants is to prevent redundant + // reloading of the constants when they are referenced by more than one + // separate inline math routine called within the same function. Declaring + // a constant locally within a routine is sufficient to prevent redundant + // reloads of that constant when that single routine is called multiple + // times in a function, but if the constant is used (and declared) in a + // separate math routine it would be reloaded. + +#ifndef XMGLOBALCONST +#if defined(__GNUC__) && !defined(__MINGW32__) +#define XMGLOBALCONST extern const __attribute__((weak)) +#else +#define XMGLOBALCONST extern const __declspec(selectany) +#endif +#endif + + XMGLOBALCONST XMVECTORF32 g_XMSinCoefficients0 = { { { -0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f } } }; + XMGLOBALCONST XMVECTORF32 g_XMSinCoefficients1 = { { { -2.3889859e-08f, -0.16665852f /*Est1*/, +0.0083139502f /*Est2*/, -0.00018524670f /*Est3*/ } } }; + XMGLOBALCONST XMVECTORF32 g_XMCosCoefficients0 = { { { -0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f } } }; + XMGLOBALCONST XMVECTORF32 g_XMCosCoefficients1 = { { { -2.6051615e-07f, -0.49992746f /*Est1*/, +0.041493919f /*Est2*/, -0.0012712436f /*Est3*/ } } }; + XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients0 = { { { 1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f } } }; + XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients1 = { { { 2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f } } }; + XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients2 = { { { 5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f } } }; + XMGLOBALCONST XMVECTORF32 g_XMArcCoefficients0 = { { { +1.5707963050f, -0.2145988016f, +0.0889789874f, -0.0501743046f } } }; + XMGLOBALCONST XMVECTORF32 g_XMArcCoefficients1 = { { { +0.0308918810f, -0.0170881256f, +0.0066700901f, -0.0012624911f } } }; + XMGLOBALCONST XMVECTORF32 g_XMATanCoefficients0 = { { { -0.3333314528f, +0.1999355085f, -0.1420889944f, +0.1065626393f } } }; + XMGLOBALCONST XMVECTORF32 g_XMATanCoefficients1 = { { { -0.0752896400f, +0.0429096138f, -0.0161657367f, +0.0028662257f } } }; + XMGLOBALCONST XMVECTORF32 g_XMATanEstCoefficients0 = { { { +0.999866f, +0.999866f, +0.999866f, +0.999866f } } }; + XMGLOBALCONST XMVECTORF32 g_XMATanEstCoefficients1 = { { { -0.3302995f, +0.180141f, -0.085133f, +0.0208351f } } }; + XMGLOBALCONST XMVECTORF32 g_XMTanEstCoefficients = { { { 2.484f, -1.954923183e-1f, 2.467401101f, XM_1DIVPI } } }; + XMGLOBALCONST XMVECTORF32 g_XMArcEstCoefficients = { { { +1.5707288f, -0.2121144f, +0.0742610f, -0.0187293f } } }; + XMGLOBALCONST XMVECTORF32 g_XMPiConstants0 = { { { XM_PI, XM_2PI, XM_1DIVPI, XM_1DIV2PI } } }; + XMGLOBALCONST XMVECTORF32 g_XMIdentityR0 = { { { 1.0f, 0.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMIdentityR1 = { { { 0.0f, 1.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMIdentityR2 = { { { 0.0f, 0.0f, 1.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMIdentityR3 = { { { 0.0f, 0.0f, 0.0f, 1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR0 = { { { -1.0f, 0.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR1 = { { { 0.0f, -1.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR2 = { { { 0.0f, 0.0f, -1.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR3 = { { { 0.0f, 0.0f, 0.0f, -1.0f } } }; + XMGLOBALCONST XMVECTORU32 g_XMNegativeZero = { { { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMNegate3 = { { { 0x80000000, 0x80000000, 0x80000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskXY = { { { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMMask3 = { { { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskX = { { { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskY = { { { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskZ = { { { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskW = { { { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF } } }; + XMGLOBALCONST XMVECTORF32 g_XMOne = { { { 1.0f, 1.0f, 1.0f, 1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMOne3 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMZero = { { { 0.0f, 0.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMTwo = { { { 2.f, 2.f, 2.f, 2.f } } }; + XMGLOBALCONST XMVECTORF32 g_XMFour = { { { 4.f, 4.f, 4.f, 4.f } } }; + XMGLOBALCONST XMVECTORF32 g_XMSix = { { { 6.f, 6.f, 6.f, 6.f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegativeOne = { { { -1.0f, -1.0f, -1.0f, -1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMOneHalf = { { { 0.5f, 0.5f, 0.5f, 0.5f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegativeOneHalf = { { { -0.5f, -0.5f, -0.5f, -0.5f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegativeTwoPi = { { { -XM_2PI, -XM_2PI, -XM_2PI, -XM_2PI } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegativePi = { { { -XM_PI, -XM_PI, -XM_PI, -XM_PI } } }; + XMGLOBALCONST XMVECTORF32 g_XMHalfPi = { { { XM_PIDIV2, XM_PIDIV2, XM_PIDIV2, XM_PIDIV2 } } }; + XMGLOBALCONST XMVECTORF32 g_XMPi = { { { XM_PI, XM_PI, XM_PI, XM_PI } } }; + XMGLOBALCONST XMVECTORF32 g_XMReciprocalPi = { { { XM_1DIVPI, XM_1DIVPI, XM_1DIVPI, XM_1DIVPI } } }; + XMGLOBALCONST XMVECTORF32 g_XMTwoPi = { { { XM_2PI, XM_2PI, XM_2PI, XM_2PI } } }; + XMGLOBALCONST XMVECTORF32 g_XMReciprocalTwoPi = { { { XM_1DIV2PI, XM_1DIV2PI, XM_1DIV2PI, XM_1DIV2PI } } }; + XMGLOBALCONST XMVECTORF32 g_XMEpsilon = { { { 1.192092896e-7f, 1.192092896e-7f, 1.192092896e-7f, 1.192092896e-7f } } }; + XMGLOBALCONST XMVECTORI32 g_XMInfinity = { { { 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMQNaN = { { { 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMQNaNTest = { { { 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF } } }; + XMGLOBALCONST XMVECTORI32 g_XMAbsMask = { { { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF } } }; + XMGLOBALCONST XMVECTORI32 g_XMFltMin = { { { 0x00800000, 0x00800000, 0x00800000, 0x00800000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMFltMax = { { { 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF } } }; + XMGLOBALCONST XMVECTORU32 g_XMNegOneMask = { { { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskA8R8G8B8 = { { { 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipA8R8G8B8 = { { { 0x00000000, 0x00000000, 0x00000000, 0x80000000 } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixAA8R8G8B8 = { { { 0.0f, 0.0f, 0.0f, float(0x80000000U) } } }; + XMGLOBALCONST XMVECTORF32 g_XMNormalizeA8R8G8B8 = { { { 1.0f / (255.0f * float(0x10000)), 1.0f / (255.0f * float(0x100)), 1.0f / 255.0f, 1.0f / (255.0f * float(0x1000000)) } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskA2B10G10R10 = { { { 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipA2B10G10R10 = { { { 0x00000200, 0x00080000, 0x20000000, 0x80000000 } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixAA2B10G10R10 = { { { -512.0f, -512.0f * float(0x400), -512.0f * float(0x100000), float(0x80000000U) } } }; + XMGLOBALCONST XMVECTORF32 g_XMNormalizeA2B10G10R10 = { { { 1.0f / 511.0f, 1.0f / (511.0f * float(0x400)), 1.0f / (511.0f * float(0x100000)), 1.0f / (3.0f * float(0x40000000)) } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskX16Y16 = { { { 0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16 = { { { 0x00008000, 0x00000000, 0x00000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16 = { { { -32768.0f, 0.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16 = { { { 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskX16Y16Z16W16 = { { { 0x0000FFFF, 0x0000FFFF, 0xFFFF0000, 0xFFFF0000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16Z16W16 = { { { 0x00008000, 0x00008000, 0x00000000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16Z16W16 = { { { -32768.0f, -32768.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16Z16W16 = { { { 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 1.0f / (32767.0f * 65536.0f) } } }; + XMGLOBALCONST XMVECTORF32 g_XMNoFraction = { { { 8388608.0f, 8388608.0f, 8388608.0f, 8388608.0f } } }; + XMGLOBALCONST XMVECTORI32 g_XMMaskByte = { { { 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegateX = { { { -1.0f, 1.0f, 1.0f, 1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegateY = { { { 1.0f, -1.0f, 1.0f, 1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegateZ = { { { 1.0f, 1.0f, -1.0f, 1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMNegateW = { { { 1.0f, 1.0f, 1.0f, -1.0f } } }; + XMGLOBALCONST XMVECTORU32 g_XMSelect0101 = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1 } } }; + XMGLOBALCONST XMVECTORU32 g_XMSelect1010 = { { { XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } }; + XMGLOBALCONST XMVECTORI32 g_XMOneHalfMinusEpsilon = { { { 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD } } }; + XMGLOBALCONST XMVECTORU32 g_XMSelect1000 = { { { XM_SELECT_1, XM_SELECT_0, XM_SELECT_0, XM_SELECT_0 } } }; + XMGLOBALCONST XMVECTORU32 g_XMSelect1100 = { { { XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0 } } }; + XMGLOBALCONST XMVECTORU32 g_XMSelect1110 = { { { XM_SELECT_1, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 } } }; + XMGLOBALCONST XMVECTORU32 g_XMSelect1011 = { { { XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1 } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixupY16 = { { { 1.0f, 1.0f / 65536.0f, 0.0f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixupY16W16 = { { { 1.0f, 1.0f, 1.0f / 65536.0f, 1.0f / 65536.0f } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipY = { { { 0, 0x80000000, 0, 0 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipZ = { { { 0, 0, 0x80000000, 0 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipW = { { { 0, 0, 0, 0x80000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipYZ = { { { 0, 0x80000000, 0x80000000, 0 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipZW = { { { 0, 0, 0x80000000, 0x80000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMFlipYW = { { { 0, 0x80000000, 0, 0x80000000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMMaskDec4 = { { { 0x3FF, 0x3FF << 10, 0x3FF << 20, static_cast(0xC0000000) } } }; + XMGLOBALCONST XMVECTORI32 g_XMXorDec4 = { { { 0x200, 0x200 << 10, 0x200 << 20, 0 } } }; + XMGLOBALCONST XMVECTORF32 g_XMAddUDec4 = { { { 0, 0, 0, 32768.0f * 65536.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMAddDec4 = { { { -512.0f, -512.0f * 1024.0f, -512.0f * 1024.0f * 1024.0f, 0 } } }; + XMGLOBALCONST XMVECTORF32 g_XMMulDec4 = { { { 1.0f, 1.0f / 1024.0f, 1.0f / (1024.0f * 1024.0f), 1.0f / (1024.0f * 1024.0f * 1024.0f) } } }; + XMGLOBALCONST XMVECTORU32 g_XMMaskByte4 = { { { 0xFF, 0xFF00, 0xFF0000, 0xFF000000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMXorByte4 = { { { 0x80, 0x8000, 0x800000, 0x00000000 } } }; + XMGLOBALCONST XMVECTORF32 g_XMAddByte4 = { { { -128.0f, -128.0f * 256.0f, -128.0f * 65536.0f, 0 } } }; + XMGLOBALCONST XMVECTORF32 g_XMFixUnsigned = { { { 32768.0f * 65536.0f, 32768.0f * 65536.0f, 32768.0f * 65536.0f, 32768.0f * 65536.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMMaxInt = { { { 65536.0f * 32768.0f - 128.0f, 65536.0f * 32768.0f - 128.0f, 65536.0f * 32768.0f - 128.0f, 65536.0f * 32768.0f - 128.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMMaxUInt = { { { 65536.0f * 65536.0f - 256.0f, 65536.0f * 65536.0f - 256.0f, 65536.0f * 65536.0f - 256.0f, 65536.0f * 65536.0f - 256.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMUnsignedFix = { { { 32768.0f * 65536.0f, 32768.0f * 65536.0f, 32768.0f * 65536.0f, 32768.0f * 65536.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMsrgbScale = { { { 12.92f, 12.92f, 12.92f, 1.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMsrgbA = { { { 0.055f, 0.055f, 0.055f, 0.0f } } }; + XMGLOBALCONST XMVECTORF32 g_XMsrgbA1 = { { { 1.055f, 1.055f, 1.055f, 1.0f } } }; + XMGLOBALCONST XMVECTORI32 g_XMExponentBias = { { { 127, 127, 127, 127 } } }; + XMGLOBALCONST XMVECTORI32 g_XMSubnormalExponent = { { { -126, -126, -126, -126 } } }; + XMGLOBALCONST XMVECTORI32 g_XMNumTrailing = { { { 23, 23, 23, 23 } } }; + XMGLOBALCONST XMVECTORI32 g_XMMinNormal = { { { 0x00800000, 0x00800000, 0x00800000, 0x00800000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMNegInfinity = { { { 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMNegQNaN = { { { 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000 } } }; + XMGLOBALCONST XMVECTORI32 g_XMBin128 = { { { 0x43000000, 0x43000000, 0x43000000, 0x43000000 } } }; + XMGLOBALCONST XMVECTORU32 g_XMBinNeg150 = { { { 0xC3160000, 0xC3160000, 0xC3160000, 0xC3160000 } } }; + XMGLOBALCONST XMVECTORI32 g_XM253 = { { { 253, 253, 253, 253 } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst1 = { { { -6.93147182e-1f, -6.93147182e-1f, -6.93147182e-1f, -6.93147182e-1f } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst2 = { { { +2.40226462e-1f, +2.40226462e-1f, +2.40226462e-1f, +2.40226462e-1f } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst3 = { { { -5.55036440e-2f, -5.55036440e-2f, -5.55036440e-2f, -5.55036440e-2f } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst4 = { { { +9.61597636e-3f, +9.61597636e-3f, +9.61597636e-3f, +9.61597636e-3f } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst5 = { { { -1.32823968e-3f, -1.32823968e-3f, -1.32823968e-3f, -1.32823968e-3f } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst6 = { { { +1.47491097e-4f, +1.47491097e-4f, +1.47491097e-4f, +1.47491097e-4f } } }; + XMGLOBALCONST XMVECTORF32 g_XMExpEst7 = { { { -1.08635004e-5f, -1.08635004e-5f, -1.08635004e-5f, -1.08635004e-5f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst0 = { { { +1.442693f, +1.442693f, +1.442693f, +1.442693f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst1 = { { { -0.721242f, -0.721242f, -0.721242f, -0.721242f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst2 = { { { +0.479384f, +0.479384f, +0.479384f, +0.479384f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst3 = { { { -0.350295f, -0.350295f, -0.350295f, -0.350295f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst4 = { { { +0.248590f, +0.248590f, +0.248590f, +0.248590f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst5 = { { { -0.145700f, -0.145700f, -0.145700f, -0.145700f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst6 = { { { +0.057148f, +0.057148f, +0.057148f, +0.057148f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLogEst7 = { { { -0.010578f, -0.010578f, -0.010578f, -0.010578f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLgE = { { { +1.442695f, +1.442695f, +1.442695f, +1.442695f } } }; + XMGLOBALCONST XMVECTORF32 g_XMInvLgE = { { { +6.93147182e-1f, +6.93147182e-1f, +6.93147182e-1f, +6.93147182e-1f } } }; + XMGLOBALCONST XMVECTORF32 g_XMLg10 = { { { +3.321928f, +3.321928f, +3.321928f, +3.321928f } } }; + XMGLOBALCONST XMVECTORF32 g_XMInvLg10 = { { { +3.010299956e-1f, +3.010299956e-1f, +3.010299956e-1f, +3.010299956e-1f } } }; + XMGLOBALCONST XMVECTORF32 g_UByteMax = { { { 255.0f, 255.0f, 255.0f, 255.0f } } }; + XMGLOBALCONST XMVECTORF32 g_ByteMin = { { { -127.0f, -127.0f, -127.0f, -127.0f } } }; + XMGLOBALCONST XMVECTORF32 g_ByteMax = { { { 127.0f, 127.0f, 127.0f, 127.0f } } }; + XMGLOBALCONST XMVECTORF32 g_ShortMin = { { { -32767.0f, -32767.0f, -32767.0f, -32767.0f } } }; + XMGLOBALCONST XMVECTORF32 g_ShortMax = { { { 32767.0f, 32767.0f, 32767.0f, 32767.0f } } }; + XMGLOBALCONST XMVECTORF32 g_UShortMax = { { { 65535.0f, 65535.0f, 65535.0f, 65535.0f } } }; + + /**************************************************************************** + * + * Implementation + * + ****************************************************************************/ + +#pragma warning(push) +#pragma warning(disable:4068 4214 4204 4365 4616 4640 6001 6101) + // C4068/4616: ignore unknown pragmas + // C4214/4204: nonstandard extension used + // C4365/4640: Off by default noise + // C6001/6101: False positives + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") +#pragma prefast(disable : 26495, "Union initialization confuses /analyze") +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast" +#endif + +//------------------------------------------------------------------------------ + + inline XMVECTOR XM_CALLCONV XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2, uint32_t C3) noexcept + { +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = (0 - (C0 & 1)) & 0x3F800000; + vResult.u[1] = (0 - (C1 & 1)) & 0x3F800000; + vResult.u[2] = (0 - (C2 & 1)) & 0x3F800000; + vResult.u[3] = (0 - (C3 & 1)) & 0x3F800000; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = (0 - (C0 & 1)) & 0x3F800000; + vResult.u[1] = (0 - (C1 & 1)) & 0x3F800000; + vResult.u[2] = (0 - (C2 & 1)) & 0x3F800000; + vResult.u[3] = (0 - (C3 & 1)) & 0x3F800000; + return vResult.v; +#else // XM_SSE_INTRINSICS_ + static const XMVECTORU32 g_vMask1 = { { { 1, 1, 1, 1 } } }; + // Move the parms to a vector + __m128i vTemp = _mm_set_epi32(static_cast(C3), static_cast(C2), static_cast(C1), static_cast(C0)); + // Mask off the low bits + vTemp = _mm_and_si128(vTemp, g_vMask1); + // 0xFFFFFFFF on true bits + vTemp = _mm_cmpeq_epi32(vTemp, g_vMask1); + // 0xFFFFFFFF -> 1.0f, 0x00000000 -> 0.0f + vTemp = _mm_and_si128(vTemp, g_XMOne); + return _mm_castsi128_ps(vTemp); +#endif + } + + //------------------------------------------------------------------------------ + + inline XMVECTOR XM_CALLCONV XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent) noexcept + { + assert(IntConstant >= -16 && IntConstant <= 15); + assert(DivExponent < 32); +#if defined(_XM_NO_INTRINSICS_) + + using DirectX::XMConvertVectorIntToFloat; + + XMVECTORI32 V = { { { IntConstant, IntConstant, IntConstant, IntConstant } } }; + return XMConvertVectorIntToFloat(V.v, DivExponent); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Splat the int + int32x4_t vScale = vdupq_n_s32(IntConstant); + // Convert to a float + XMVECTOR vResult = vcvtq_f32_s32(vScale); + // Convert DivExponent into 1.0f/(1<(&vScale)[0]); + return vResult; +#else // XM_SSE_INTRINSICS_ + // Splat the int + __m128i vScale = _mm_set1_epi32(IntConstant); + // Convert to a float + XMVECTOR vResult = _mm_cvtepi32_ps(vScale); + // Convert DivExponent into 1.0f/(1<(uScale)); + // Multiply by the reciprocal (Perform a right shift by DivExponent) + vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(vScale)); + return vResult; +#endif + } + + //------------------------------------------------------------------------------ + + inline XMVECTOR XM_CALLCONV XMVectorSplatConstantInt(int32_t IntConstant) noexcept + { + assert(IntConstant >= -16 && IntConstant <= 15); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORI32 V = { { { IntConstant, IntConstant, IntConstant, IntConstant } } }; + return V.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t V = vdupq_n_s32(IntConstant); + return reinterpret_cast(&V)[0]; +#else // XM_SSE_INTRINSICS_ + __m128i V = _mm_set1_epi32(IntConstant); + return _mm_castsi128_ps(V); +#endif + } + +#include "DirectXMathConvert.inl" +#include "DirectXMathVector.inl" +#include "DirectXMathMatrix.inl" +#include "DirectXMathMisc.inl" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +#pragma warning(pop) + +} // namespace DirectX + diff --git a/Sdk/External/DirectXMath/Inc/DirectXMathConvert.inl b/Sdk/External/DirectXMath/Inc/DirectXMathConvert.inl new file mode 100644 index 0000000..8097f20 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXMathConvert.inl @@ -0,0 +1,2187 @@ +//------------------------------------------------------------------------------------- +// DirectXMathConvert.inl -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +/**************************************************************************** + * + * Data conversion + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +#pragma warning(push) +#pragma warning(disable:4701) +// C4701: false positives + +inline XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat +( + FXMVECTOR VInt, + uint32_t DivExponent +) noexcept +{ + assert(DivExponent < 32); +#if defined(_XM_NO_INTRINSICS_) + float fScale = 1.0f / static_cast(1U << DivExponent); + uint32_t ElementIndex = 0; + XMVECTOR Result; + do { + auto iTemp = static_cast(VInt.vector4_u32[ElementIndex]); + Result.vector4_f32[ElementIndex] = static_cast(iTemp)* fScale; + } while (++ElementIndex < 4); + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fScale = 1.0f / (float)(1U << DivExponent); + float32x4_t vResult = vcvtq_f32_s32(vreinterpretq_s32_f32(VInt)); + return vmulq_n_f32(vResult, fScale); +#else // _XM_SSE_INTRINSICS_ + // Convert to floats + XMVECTOR vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt)); + // Convert DivExponent into 1.0f/(1<(uScale)); + vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(vScale)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt +( + FXMVECTOR VFloat, + uint32_t MulExponent +) noexcept +{ + assert(MulExponent < 32); +#if defined(_XM_NO_INTRINSICS_) + // Get the scalar factor. + auto fScale = static_cast(1U << MulExponent); + uint32_t ElementIndex = 0; + XMVECTOR Result; + do { + int32_t iResult; + float fTemp = VFloat.vector4_f32[ElementIndex] * fScale; + if (fTemp <= -(65536.0f * 32768.0f)) + { + iResult = (-0x7FFFFFFF) - 1; + } + else if (fTemp > (65536.0f * 32768.0f) - 128.0f) + { + iResult = 0x7FFFFFFF; + } + else { + iResult = static_cast(fTemp); + } + Result.vector4_u32[ElementIndex] = static_cast(iResult); + } while (++ElementIndex < 4); + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent)); + // In case of positive overflow, detect it + uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt); + // Float to int conversion + int32x4_t vResulti = vcvtq_s32_f32(vResult); + // If there was positive overflow, set to 0x7FFFFFFF + vResult = vreinterpretq_f32_u32(vandq_u32(vOverflow, g_XMAbsMask)); + vOverflow = vbicq_u32(vreinterpretq_u32_s32(vResulti), vOverflow); + vOverflow = vorrq_u32(vOverflow, vreinterpretq_u32_f32(vResult)); + return vreinterpretq_f32_u32(vOverflow); +#else // _XM_SSE_INTRINSICS_ + XMVECTOR vResult = _mm_set_ps1(static_cast(1U << MulExponent)); + vResult = _mm_mul_ps(vResult, VFloat); + // In case of positive overflow, detect it + XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxInt); + // Float to int conversion + __m128i vResulti = _mm_cvttps_epi32(vResult); + // If there was positive overflow, set to 0x7FFFFFFF + vResult = _mm_and_ps(vOverflow, g_XMAbsMask); + vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); + vOverflow = _mm_or_ps(vOverflow, vResult); + return vOverflow; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat +( + FXMVECTOR VUInt, + uint32_t DivExponent +) noexcept +{ + assert(DivExponent < 32); +#if defined(_XM_NO_INTRINSICS_) + float fScale = 1.0f / static_cast(1U << DivExponent); + uint32_t ElementIndex = 0; + XMVECTOR Result; + do { + Result.vector4_f32[ElementIndex] = static_cast(VUInt.vector4_u32[ElementIndex])* fScale; + } while (++ElementIndex < 4); + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fScale = 1.0f / (float)(1U << DivExponent); + float32x4_t vResult = vcvtq_f32_u32(vreinterpretq_u32_f32(VUInt)); + return vmulq_n_f32(vResult, fScale); +#else // _XM_SSE_INTRINSICS_ + // For the values that are higher than 0x7FFFFFFF, a fixup is needed + // Determine which ones need the fix. + XMVECTOR vMask = _mm_and_ps(VUInt, g_XMNegativeZero); + // Force all values positive + XMVECTOR vResult = _mm_xor_ps(VUInt, vMask); + // Convert to floats + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Convert 0x80000000 -> 0xFFFFFFFF + __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); + // For only the ones that are too big, add the fixup + vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); + vResult = _mm_add_ps(vResult, vMask); + // Convert DivExponent into 1.0f/(1<(uScale)); + vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(iMask)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt +( + FXMVECTOR VFloat, + uint32_t MulExponent +) noexcept +{ + assert(MulExponent < 32); +#if defined(_XM_NO_INTRINSICS_) + // Get the scalar factor. + auto fScale = static_cast(1U << MulExponent); + uint32_t ElementIndex = 0; + XMVECTOR Result; + do { + uint32_t uResult; + float fTemp = VFloat.vector4_f32[ElementIndex] * fScale; + if (fTemp <= 0.0f) + { + uResult = 0; + } + else if (fTemp >= (65536.0f * 65536.0f)) + { + uResult = 0xFFFFFFFFU; + } + else { + uResult = static_cast(fTemp); + } + Result.vector4_u32[ElementIndex] = uResult; + } while (++ElementIndex < 4); + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent)); + // In case of overflow, detect it + uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt); + // Float to int conversion + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + // If there was overflow, set to 0xFFFFFFFFU + vResult = vreinterpretq_f32_u32(vbicq_u32(vResulti, vOverflow)); + vOverflow = vorrq_u32(vOverflow, vreinterpretq_u32_f32(vResult)); + return vreinterpretq_f32_u32(vOverflow); +#else // _XM_SSE_INTRINSICS_ + XMVECTOR vResult = _mm_set_ps1(static_cast(1U << MulExponent)); + vResult = _mm_mul_ps(vResult, VFloat); + // Clamp to >=0 + vResult = _mm_max_ps(vResult, g_XMZero); + // Any numbers that are too big, set to 0xFFFFFFFFU + XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); + XMVECTOR vValue = g_XMUnsignedFix; + // Too large for a signed integer? + XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); + // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise + vValue = _mm_and_ps(vValue, vMask); + // Perform fixup only on numbers too large (Keeps low bit precision) + vResult = _mm_sub_ps(vResult, vValue); + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Convert from signed to unsigned pnly if greater than 0x80000000 + vMask = _mm_and_ps(vMask, g_XMNegativeZero); + vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); + // On those that are too large, set to 0xFFFFFFFF + vResult = _mm_or_ps(vResult, vOverflow); + return vResult; +#endif +} + +#pragma warning(pop) + +/**************************************************************************** + * + * Vector and matrix load operations + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt(const uint32_t* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = *pSource; + V.vector4_u32[1] = 0; + V.vector4_u32[2] = 0; + V.vector4_u32[3] = 0; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t zero = vdupq_n_u32(0); + return vreinterpretq_f32_u32(vld1q_lane_u32(pSource, zero, 0)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_load_ss(reinterpret_cast(pSource)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat(const float* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = *pSource; + V.vector4_f32[1] = 0.f; + V.vector4_f32[2] = 0.f; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t zero = vdupq_n_f32(0); + return vld1q_lane_f32(pSource, zero, 0); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_load_ss(pSource); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt2(const uint32_t* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = pSource[0]; + V.vector4_u32[1] = pSource[1]; + V.vector4_u32[2] = 0; + V.vector4_u32[3] = 0; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t x = vld1_u32(pSource); + uint32x2_t zero = vdup_n_u32(0); + return vreinterpretq_f32_u32(vcombine_u32(x, zero)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt2A(const uint32_t* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = pSource[0]; + V.vector4_u32[1] = pSource[1]; + V.vector4_u32[2] = 0; + V.vector4_u32[3] = 0; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + uint32x2_t x = vld1_u32_ex(pSource, 64); +#else + uint32x2_t x = vld1_u32(pSource); +#endif + uint32x2_t zero = vdup_n_u32(0); + return vreinterpretq_f32_u32(vcombine_u32(x, zero)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat2(const XMFLOAT2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = pSource->x; + V.vector4_f32[1] = pSource->y; + V.vector4_f32[2] = 0.f; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t x = vld1_f32(reinterpret_cast(pSource)); + float32x2_t zero = vdup_n_f32(0); + return vcombine_f32(x, zero); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = pSource->x; + V.vector4_f32[1] = pSource->y; + V.vector4_f32[2] = 0.f; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + float32x2_t x = vld1_f32_ex(reinterpret_cast(pSource), 64); +#else + float32x2_t x = vld1_f32(reinterpret_cast(pSource)); +#endif + float32x2_t zero = vdup_n_f32(0); + return vcombine_f32(x, zero); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadSInt2(const XMINT2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = static_cast(pSource->x); + V.vector4_f32[1] = static_cast(pSource->y); + V.vector4_f32[2] = 0.f; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x2_t x = vld1_s32(reinterpret_cast(pSource)); + float32x2_t v = vcvt_f32_s32(x); + float32x2_t zero = vdup_n_f32(0); + return vcombine_f32(v, zero); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 V = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + return _mm_cvtepi32_ps(_mm_castps_si128(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUInt2(const XMUINT2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = static_cast(pSource->x); + V.vector4_f32[1] = static_cast(pSource->y); + V.vector4_f32[2] = 0.f; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t x = vld1_u32(reinterpret_cast(pSource)); + float32x2_t v = vcvt_f32_u32(x); + float32x2_t zero = vdup_n_f32(0); + return vcombine_f32(v, zero); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 V = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + // For the values that are higher than 0x7FFFFFFF, a fixup is needed + // Determine which ones need the fix. + XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero); + // Force all values positive + XMVECTOR vResult = _mm_xor_ps(V, vMask); + // Convert to floats + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Convert 0x80000000 -> 0xFFFFFFFF + __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); + // For only the ones that are too big, add the fixup + vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); + vResult = _mm_add_ps(vResult, vMask); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt3(const uint32_t* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = pSource[0]; + V.vector4_u32[1] = pSource[1]; + V.vector4_u32[2] = pSource[2]; + V.vector4_u32[3] = 0; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t x = vld1_u32(pSource); + uint32x2_t zero = vdup_n_u32(0); + uint32x2_t y = vld1_lane_u32(pSource + 2, zero, 0); + return vreinterpretq_f32_u32(vcombine_u32(x, y)); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); + return _mm_insert_ps(xy, z, 0x20); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); + return _mm_movelh_ps(xy, z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt3A(const uint32_t* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = pSource[0]; + V.vector4_u32[1] = pSource[1]; + V.vector4_u32[2] = pSource[2]; + V.vector4_u32[3] = 0; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Reads an extra integer which is zero'd +#ifdef _MSC_VER + uint32x4_t V = vld1q_u32_ex(pSource, 128); +#else + uint32x4_t V = vld1q_u32(pSource); +#endif + return vreinterpretq_f32_u32(vsetq_lane_u32(0, V, 3)); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); + return _mm_insert_ps(xy, z, 0x20); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); + return _mm_movelh_ps(xy, z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat3(const XMFLOAT3* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = pSource->x; + V.vector4_f32[1] = pSource->y; + V.vector4_f32[2] = pSource->z; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t x = vld1_f32(reinterpret_cast(pSource)); + float32x2_t zero = vdup_n_f32(0); + float32x2_t y = vld1_lane_f32(reinterpret_cast(pSource) + 2, zero, 0); + return vcombine_f32(x, y); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(&pSource->z); + return _mm_insert_ps(xy, z, 0x20); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(&pSource->z); + return _mm_movelh_ps(xy, z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = pSource->x; + V.vector4_f32[1] = pSource->y; + V.vector4_f32[2] = pSource->z; + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Reads an extra float which is zero'd +#ifdef _MSC_VER + float32x4_t V = vld1q_f32_ex(reinterpret_cast(pSource), 128); +#else + float32x4_t V = vld1q_f32(reinterpret_cast(pSource)); +#endif + return vsetq_lane_f32(0, V, 3); +#elif defined(_XM_SSE_INTRINSICS_) + // Reads an extra float which is zero'd + __m128 V = _mm_load_ps(&pSource->x); + return _mm_and_ps(V, g_XMMask3); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadSInt3(const XMINT3* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR V; + V.vector4_f32[0] = static_cast(pSource->x); + V.vector4_f32[1] = static_cast(pSource->y); + V.vector4_f32[2] = static_cast(pSource->z); + V.vector4_f32[3] = 0.f; + return V; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x2_t x = vld1_s32(reinterpret_cast(pSource)); + int32x2_t zero = vdup_n_s32(0); + int32x2_t y = vld1_lane_s32(reinterpret_cast(pSource) + 2, zero, 0); + int32x4_t v = vcombine_s32(x, y); + return vcvtq_f32_s32(v); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(reinterpret_cast(&pSource->z)); + __m128 V = _mm_movelh_ps(xy, z); + return _mm_cvtepi32_ps(_mm_castps_si128(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUInt3(const XMUINT3* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = static_cast(pSource->x); + V.vector4_f32[1] = static_cast(pSource->y); + V.vector4_f32[2] = static_cast(pSource->z); + V.vector4_f32[3] = 0.f; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t x = vld1_u32(reinterpret_cast(pSource)); + uint32x2_t zero = vdup_n_u32(0); + uint32x2_t y = vld1_lane_u32(reinterpret_cast(pSource) + 2, zero, 0); + uint32x4_t v = vcombine_u32(x, y); + return vcvtq_f32_u32(v); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); + __m128 z = _mm_load_ss(reinterpret_cast(&pSource->z)); + __m128 V = _mm_movelh_ps(xy, z); + // For the values that are higher than 0x7FFFFFFF, a fixup is needed + // Determine which ones need the fix. + XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero); + // Force all values positive + XMVECTOR vResult = _mm_xor_ps(V, vMask); + // Convert to floats + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Convert 0x80000000 -> 0xFFFFFFFF + __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); + // For only the ones that are too big, add the fixup + vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); + vResult = _mm_add_ps(vResult, vMask); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt4(const uint32_t* pSource) noexcept +{ + assert(pSource); + +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = pSource[0]; + V.vector4_u32[1] = pSource[1]; + V.vector4_u32[2] = pSource[2]; + V.vector4_u32[3] = pSource[3]; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vld1q_u32(pSource)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadInt4A(const uint32_t* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_u32[0] = pSource[0]; + V.vector4_u32[1] = pSource[1]; + V.vector4_u32[2] = pSource[2]; + V.vector4_u32[3] = pSource[3]; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + return vld1q_u32_ex(pSource, 128); +#else + return vreinterpretq_f32_u32(vld1q_u32(pSource)); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_load_si128(reinterpret_cast(pSource)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat4(const XMFLOAT4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = pSource->x; + V.vector4_f32[1] = pSource->y; + V.vector4_f32[2] = pSource->z; + V.vector4_f32[3] = pSource->w; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vld1q_f32(reinterpret_cast(pSource)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_loadu_ps(&pSource->x); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = pSource->x; + V.vector4_f32[1] = pSource->y; + V.vector4_f32[2] = pSource->z; + V.vector4_f32[3] = pSource->w; + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + return vld1q_f32_ex(reinterpret_cast(pSource), 128); +#else + return vld1q_f32(reinterpret_cast(pSource)); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_load_ps(&pSource->x); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadSInt4(const XMINT4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR V; + V.vector4_f32[0] = static_cast(pSource->x); + V.vector4_f32[1] = static_cast(pSource->y); + V.vector4_f32[2] = static_cast(pSource->z); + V.vector4_f32[3] = static_cast(pSource->w); + return V; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t v = vld1q_s32(reinterpret_cast(pSource)); + return vcvtq_f32_s32(v); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); + return _mm_cvtepi32_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUInt4(const XMUINT4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR V; + V.vector4_f32[0] = static_cast(pSource->x); + V.vector4_f32[1] = static_cast(pSource->y); + V.vector4_f32[2] = static_cast(pSource->z); + V.vector4_f32[3] = static_cast(pSource->w); + return V; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t v = vld1q_u32(reinterpret_cast(pSource)); + return vcvtq_f32_u32(v); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); + // For the values that are higher than 0x7FFFFFFF, a fixup is needed + // Determine which ones need the fix. + XMVECTOR vMask = _mm_and_ps(_mm_castsi128_ps(V), g_XMNegativeZero); + // Force all values positive + XMVECTOR vResult = _mm_xor_ps(_mm_castsi128_ps(V), vMask); + // Convert to floats + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Convert 0x80000000 -> 0xFFFFFFFF + __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); + // For only the ones that are too big, add the fixup + vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); + vResult = _mm_add_ps(vResult, vMask); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat3x3(const XMFLOAT3X3* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[0][1]; + M.r[0].vector4_f32[2] = pSource->m[0][2]; + M.r[0].vector4_f32[3] = 0.0f; + + M.r[1].vector4_f32[0] = pSource->m[1][0]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[1][2]; + M.r[1].vector4_f32[3] = 0.0f; + + M.r[2].vector4_f32[0] = pSource->m[2][0]; + M.r[2].vector4_f32[1] = pSource->m[2][1]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = 0.0f; + M.r[3].vector4_f32[0] = 0.0f; + M.r[3].vector4_f32[1] = 0.0f; + M.r[3].vector4_f32[2] = 0.0f; + M.r[3].vector4_f32[3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); + float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); + float32x2_t v2 = vcreate_f32(static_cast(*reinterpret_cast(&pSource->m[2][2]))); + float32x4_t T = vextq_f32(v0, v1, 3); + + XMMATRIX M; + M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v0), g_XMMask3)); + M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T), g_XMMask3)); + M.r[2] = vcombine_f32(vget_high_f32(v1), v2); + M.r[3] = g_XMIdentityR3; + return M; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 Z = _mm_setzero_ps(); + + __m128 V1 = _mm_loadu_ps(&pSource->m[0][0]); + __m128 V2 = _mm_loadu_ps(&pSource->m[1][1]); + __m128 V3 = _mm_load_ss(&pSource->m[2][2]); + + __m128 T1 = _mm_unpackhi_ps(V1, Z); + __m128 T2 = _mm_unpacklo_ps(V2, Z); + __m128 T3 = _mm_shuffle_ps(V3, T2, _MM_SHUFFLE(0, 1, 0, 0)); + __m128 T4 = _mm_movehl_ps(T2, T3); + __m128 T5 = _mm_movehl_ps(Z, T1); + + XMMATRIX M; + M.r[0] = _mm_movelh_ps(V1, T1); + M.r[1] = _mm_add_ps(T4, T5); + M.r[2] = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 3, 2)); + M.r[3] = g_XMIdentityR3; + return M; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat4x3(const XMFLOAT4X3* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[0][1]; + M.r[0].vector4_f32[2] = pSource->m[0][2]; + M.r[0].vector4_f32[3] = 0.0f; + + M.r[1].vector4_f32[0] = pSource->m[1][0]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[1][2]; + M.r[1].vector4_f32[3] = 0.0f; + + M.r[2].vector4_f32[0] = pSource->m[2][0]; + M.r[2].vector4_f32[1] = pSource->m[2][1]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = 0.0f; + + M.r[3].vector4_f32[0] = pSource->m[3][0]; + M.r[3].vector4_f32[1] = pSource->m[3][1]; + M.r[3].vector4_f32[2] = pSource->m[3][2]; + M.r[3].vector4_f32[3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); + float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); + float32x4_t v2 = vld1q_f32(&pSource->m[2][2]); + + float32x4_t T1 = vextq_f32(v0, v1, 3); + float32x4_t T2 = vcombine_f32(vget_high_f32(v1), vget_low_f32(v2)); + float32x4_t T3 = vextq_f32(v2, v2, 1); + + XMMATRIX M; + M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v0), g_XMMask3)); + M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); + M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); + M.r[3] = vsetq_lane_f32(1.f, T3, 3); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + // Use unaligned load instructions to + // load the 12 floats + // vTemp1 = x1,y1,z1,x2 + XMVECTOR vTemp1 = _mm_loadu_ps(&pSource->m[0][0]); + // vTemp2 = y2,z2,x3,y3 + XMVECTOR vTemp2 = _mm_loadu_ps(&pSource->m[1][1]); + // vTemp4 = z3,x4,y4,z4 + XMVECTOR vTemp4 = _mm_loadu_ps(&pSource->m[2][2]); + // vTemp3 = x3,y3,z3,z3 + XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2)); + // vTemp2 = y2,z2,x2,x2 + vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0)); + // vTemp2 = x2,y2,z2,z2 + vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2)); + // vTemp1 = x1,y1,z1,0 + vTemp1 = _mm_and_ps(vTemp1, g_XMMask3); + // vTemp2 = x2,y2,z2,0 + vTemp2 = _mm_and_ps(vTemp2, g_XMMask3); + // vTemp3 = x3,y3,z3,0 + vTemp3 = _mm_and_ps(vTemp3, g_XMMask3); + // vTemp4i = x4,y4,z4,0 + __m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8); + // vTemp4i = x4,y4,z4,1.0f + vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3); + XMMATRIX M(vTemp1, + vTemp2, + vTemp3, + _mm_castsi128_ps(vTemp4i)); + return M; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[0][1]; + M.r[0].vector4_f32[2] = pSource->m[0][2]; + M.r[0].vector4_f32[3] = 0.0f; + + M.r[1].vector4_f32[0] = pSource->m[1][0]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[1][2]; + M.r[1].vector4_f32[3] = 0.0f; + + M.r[2].vector4_f32[0] = pSource->m[2][0]; + M.r[2].vector4_f32[1] = pSource->m[2][1]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = 0.0f; + + M.r[3].vector4_f32[0] = pSource->m[3][0]; + M.r[3].vector4_f32[1] = pSource->m[3][1]; + M.r[3].vector4_f32[2] = pSource->m[3][2]; + M.r[3].vector4_f32[3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128); + float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128); + float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128); +#else + float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); + float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); + float32x4_t v2 = vld1q_f32(&pSource->m[2][2]); +#endif + + float32x4_t T1 = vextq_f32(v0, v1, 3); + float32x4_t T2 = vcombine_f32(vget_high_f32(v1), vget_low_f32(v2)); + float32x4_t T3 = vextq_f32(v2, v2, 1); + + XMMATRIX M; + M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v0), g_XMMask3)); + M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); + M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); + M.r[3] = vsetq_lane_f32(1.f, T3, 3); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + // Use aligned load instructions to + // load the 12 floats + // vTemp1 = x1,y1,z1,x2 + XMVECTOR vTemp1 = _mm_load_ps(&pSource->m[0][0]); + // vTemp2 = y2,z2,x3,y3 + XMVECTOR vTemp2 = _mm_load_ps(&pSource->m[1][1]); + // vTemp4 = z3,x4,y4,z4 + XMVECTOR vTemp4 = _mm_load_ps(&pSource->m[2][2]); + // vTemp3 = x3,y3,z3,z3 + XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2)); + // vTemp2 = y2,z2,x2,x2 + vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0)); + // vTemp2 = x2,y2,z2,z2 + vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2)); + // vTemp1 = x1,y1,z1,0 + vTemp1 = _mm_and_ps(vTemp1, g_XMMask3); + // vTemp2 = x2,y2,z2,0 + vTemp2 = _mm_and_ps(vTemp2, g_XMMask3); + // vTemp3 = x3,y3,z3,0 + vTemp3 = _mm_and_ps(vTemp3, g_XMMask3); + // vTemp4i = x4,y4,z4,0 + __m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8); + // vTemp4i = x4,y4,z4,1.0f + vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3); + XMMATRIX M(vTemp1, + vTemp2, + vTemp3, + _mm_castsi128_ps(vTemp4i)); + return M; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat3x4(const XMFLOAT3X4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[1][0]; + M.r[0].vector4_f32[2] = pSource->m[2][0]; + M.r[0].vector4_f32[3] = 0.0f; + + M.r[1].vector4_f32[0] = pSource->m[0][1]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[2][1]; + M.r[1].vector4_f32[3] = 0.0f; + + M.r[2].vector4_f32[0] = pSource->m[0][2]; + M.r[2].vector4_f32[1] = pSource->m[1][2]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = 0.0f; + + M.r[3].vector4_f32[0] = pSource->m[0][3]; + M.r[3].vector4_f32[1] = pSource->m[1][3]; + M.r[3].vector4_f32[2] = pSource->m[2][3]; + M.r[3].vector4_f32[3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2x4_t vTemp0 = vld4_f32(&pSource->_11); + float32x4_t vTemp1 = vld1q_f32(&pSource->_31); + + float32x2_t l = vget_low_f32(vTemp1); + float32x4_t T0 = vcombine_f32(vTemp0.val[0], l); + float32x2_t rl = vrev64_f32(l); + float32x4_t T1 = vcombine_f32(vTemp0.val[1], rl); + + float32x2_t h = vget_high_f32(vTemp1); + float32x4_t T2 = vcombine_f32(vTemp0.val[2], h); + float32x2_t rh = vrev64_f32(h); + float32x4_t T3 = vcombine_f32(vTemp0.val[3], rh); + + XMMATRIX M = {}; + M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T0), g_XMMask3)); + M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); + M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); + M.r[3] = vsetq_lane_f32(1.f, T3, 3); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + M.r[0] = _mm_loadu_ps(&pSource->_11); + M.r[1] = _mm_loadu_ps(&pSource->_21); + M.r[2] = _mm_loadu_ps(&pSource->_31); + M.r[3] = g_XMIdentityR3; + + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + XMMATRIX mResult; + + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); + return mResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[1][0]; + M.r[0].vector4_f32[2] = pSource->m[2][0]; + M.r[0].vector4_f32[3] = 0.0f; + + M.r[1].vector4_f32[0] = pSource->m[0][1]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[2][1]; + M.r[1].vector4_f32[3] = 0.0f; + + M.r[2].vector4_f32[0] = pSource->m[0][2]; + M.r[2].vector4_f32[1] = pSource->m[1][2]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = 0.0f; + + M.r[3].vector4_f32[0] = pSource->m[0][3]; + M.r[3].vector4_f32[1] = pSource->m[1][3]; + M.r[3].vector4_f32[2] = pSource->m[2][3]; + M.r[3].vector4_f32[3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128); + float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128); +#else + float32x2x4_t vTemp0 = vld4_f32(&pSource->_11); + float32x4_t vTemp1 = vld1q_f32(&pSource->_31); +#endif + + float32x2_t l = vget_low_f32(vTemp1); + float32x4_t T0 = vcombine_f32(vTemp0.val[0], l); + float32x2_t rl = vrev64_f32(l); + float32x4_t T1 = vcombine_f32(vTemp0.val[1], rl); + + float32x2_t h = vget_high_f32(vTemp1); + float32x4_t T2 = vcombine_f32(vTemp0.val[2], h); + float32x2_t rh = vrev64_f32(h); + float32x4_t T3 = vcombine_f32(vTemp0.val[3], rh); + + XMMATRIX M = {}; + M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T0), g_XMMask3)); + M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); + M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); + M.r[3] = vsetq_lane_f32(1.f, T3, 3); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + M.r[0] = _mm_load_ps(&pSource->_11); + M.r[1] = _mm_load_ps(&pSource->_21); + M.r[2] = _mm_load_ps(&pSource->_31); + M.r[3] = g_XMIdentityR3; + + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + XMMATRIX mResult; + + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); + return mResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat4x4(const XMFLOAT4X4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[0][1]; + M.r[0].vector4_f32[2] = pSource->m[0][2]; + M.r[0].vector4_f32[3] = pSource->m[0][3]; + + M.r[1].vector4_f32[0] = pSource->m[1][0]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[1][2]; + M.r[1].vector4_f32[3] = pSource->m[1][3]; + + M.r[2].vector4_f32[0] = pSource->m[2][0]; + M.r[2].vector4_f32[1] = pSource->m[2][1]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = pSource->m[2][3]; + + M.r[3].vector4_f32[0] = pSource->m[3][0]; + M.r[3].vector4_f32[1] = pSource->m[3][1]; + M.r[3].vector4_f32[2] = pSource->m[3][2]; + M.r[3].vector4_f32[3] = pSource->m[3][3]; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX M; + M.r[0] = vld1q_f32(reinterpret_cast(&pSource->_11)); + M.r[1] = vld1q_f32(reinterpret_cast(&pSource->_21)); + M.r[2] = vld1q_f32(reinterpret_cast(&pSource->_31)); + M.r[3] = vld1q_f32(reinterpret_cast(&pSource->_41)); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + M.r[0] = _mm_loadu_ps(&pSource->_11); + M.r[1] = _mm_loadu_ps(&pSource->_21); + M.r[2] = _mm_loadu_ps(&pSource->_31); + M.r[3] = _mm_loadu_ps(&pSource->_41); + return M; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept +{ + assert(pSource); + assert((reinterpret_cast(pSource) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.r[0].vector4_f32[0] = pSource->m[0][0]; + M.r[0].vector4_f32[1] = pSource->m[0][1]; + M.r[0].vector4_f32[2] = pSource->m[0][2]; + M.r[0].vector4_f32[3] = pSource->m[0][3]; + + M.r[1].vector4_f32[0] = pSource->m[1][0]; + M.r[1].vector4_f32[1] = pSource->m[1][1]; + M.r[1].vector4_f32[2] = pSource->m[1][2]; + M.r[1].vector4_f32[3] = pSource->m[1][3]; + + M.r[2].vector4_f32[0] = pSource->m[2][0]; + M.r[2].vector4_f32[1] = pSource->m[2][1]; + M.r[2].vector4_f32[2] = pSource->m[2][2]; + M.r[2].vector4_f32[3] = pSource->m[2][3]; + + M.r[3].vector4_f32[0] = pSource->m[3][0]; + M.r[3].vector4_f32[1] = pSource->m[3][1]; + M.r[3].vector4_f32[2] = pSource->m[3][2]; + M.r[3].vector4_f32[3] = pSource->m[3][3]; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX M; +#ifdef _MSC_VER + M.r[0] = vld1q_f32_ex(reinterpret_cast(&pSource->_11), 128); + M.r[1] = vld1q_f32_ex(reinterpret_cast(&pSource->_21), 128); + M.r[2] = vld1q_f32_ex(reinterpret_cast(&pSource->_31), 128); + M.r[3] = vld1q_f32_ex(reinterpret_cast(&pSource->_41), 128); +#else + M.r[0] = vld1q_f32(reinterpret_cast(&pSource->_11)); + M.r[1] = vld1q_f32(reinterpret_cast(&pSource->_21)); + M.r[2] = vld1q_f32(reinterpret_cast(&pSource->_31)); + M.r[3] = vld1q_f32(reinterpret_cast(&pSource->_41)); +#endif + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + M.r[0] = _mm_load_ps(&pSource->_11); + M.r[1] = _mm_load_ps(&pSource->_21); + M.r[2] = _mm_load_ps(&pSource->_31); + M.r[3] = _mm_load_ps(&pSource->_41); + return M; +#endif +} + +/**************************************************************************** + * + * Vector and matrix store operations + * + ****************************************************************************/ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + *pDestination = XMVectorGetIntX(V); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_u32(pDestination, *reinterpret_cast(&V), 0); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_ss(reinterpret_cast(pDestination), V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat +( + float* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + *pDestination = XMVectorGetX(V); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_f32(pDestination, V, 0); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_ss(pDestination, V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt2 +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination[0] = V.vector4_u32[0]; + pDestination[1] = V.vector4_u32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); + vst1_u32(pDestination, VL); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt2A +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + pDestination[0] = V.vector4_u32[0]; + pDestination[1] = V.vector4_u32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); +#ifdef _MSC_VER + vst1_u32_ex(pDestination, VL, 64); +#else + vst1_u32(pDestination, VL); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat2 +( + XMFLOAT2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = V.vector4_f32[0]; + pDestination->y = V.vector4_f32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + vst1_f32(reinterpret_cast(pDestination), VL); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat2A +( + XMFLOAT2A* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = V.vector4_f32[0]; + pDestination->y = V.vector4_f32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); +#ifdef _MSC_VER + vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); +#else + vst1_f32(reinterpret_cast(pDestination), VL); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreSInt2 +( + XMINT2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = static_cast(V.vector4_f32[0]); + pDestination->y = static_cast(V.vector4_f32[1]); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t v = vget_low_f32(V); + int32x2_t iv = vcvt_s32_f32(v); + vst1_s32(reinterpret_cast(pDestination), iv); +#elif defined(_XM_SSE_INTRINSICS_) + // In case of positive overflow, detect it + XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); + // Float to int conversion + __m128i vResulti = _mm_cvttps_epi32(V); + // If there was positive overflow, set to 0x7FFFFFFF + XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); + vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); + vOverflow = _mm_or_ps(vOverflow, vResult); + // Write two ints + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(vOverflow)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUInt2 +( + XMUINT2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = static_cast(V.vector4_f32[0]); + pDestination->y = static_cast(V.vector4_f32[1]); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t v = vget_low_f32(V); + uint32x2_t iv = vcvt_u32_f32(v); + vst1_u32(reinterpret_cast(pDestination), iv); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to >=0 + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + // Any numbers that are too big, set to 0xFFFFFFFFU + XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); + XMVECTOR vValue = g_XMUnsignedFix; + // Too large for a signed integer? + XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); + // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise + vValue = _mm_and_ps(vValue, vMask); + // Perform fixup only on numbers too large (Keeps low bit precision) + vResult = _mm_sub_ps(vResult, vValue); + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Convert from signed to unsigned pnly if greater than 0x80000000 + vMask = _mm_and_ps(vMask, g_XMNegativeZero); + vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); + // On those that are too large, set to 0xFFFFFFFF + vResult = _mm_or_ps(vResult, vOverflow); + // Write two uints + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(vResult)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt3 +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination[0] = V.vector4_u32[0]; + pDestination[1] = V.vector4_u32[1]; + pDestination[2] = V.vector4_u32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); + vst1_u32(pDestination, VL); + vst1q_lane_u32(pDestination + 2, *reinterpret_cast(&V), 2); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); + __m128 z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(reinterpret_cast(&pDestination[2]), z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt3A +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + pDestination[0] = V.vector4_u32[0]; + pDestination[1] = V.vector4_u32[1]; + pDestination[2] = V.vector4_u32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); +#ifdef _MSC_VER + vst1_u32_ex(pDestination, VL, 64); +#else + vst1_u32(pDestination, VL); +#endif + vst1q_lane_u32(pDestination + 2, *reinterpret_cast(&V), 2); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); + __m128 z = _mm_movehl_ps(V, V); + _mm_store_ss(reinterpret_cast(&pDestination[2]), z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3 +( + XMFLOAT3* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = V.vector4_f32[0]; + pDestination->y = V.vector4_f32[1]; + pDestination->z = V.vector4_f32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + vst1_f32(reinterpret_cast(pDestination), VL); + vst1q_lane_f32(reinterpret_cast(pDestination) + 2, V, 2); +#elif defined(_XM_SSE4_INTRINSICS_) + * reinterpret_cast(&pDestination->x) = _mm_extract_ps(V, 0); + *reinterpret_cast(&pDestination->y) = _mm_extract_ps(V, 1); + *reinterpret_cast(&pDestination->z) = _mm_extract_ps(V, 2); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); + __m128 z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(&pDestination->z, z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3A +( + XMFLOAT3A* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = V.vector4_f32[0]; + pDestination->y = V.vector4_f32[1]; + pDestination->z = V.vector4_f32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); +#ifdef _MSC_VER + vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); +#else + vst1_f32(reinterpret_cast(pDestination), VL); +#endif + vst1q_lane_f32(reinterpret_cast(pDestination) + 2, V, 2); +#elif defined(_XM_SSE4_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); + *reinterpret_cast(&pDestination->z) = _mm_extract_ps(V, 2); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); + __m128 z = _mm_movehl_ps(V, V); + _mm_store_ss(&pDestination->z, z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreSInt3 +( + XMINT3* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = static_cast(V.vector4_f32[0]); + pDestination->y = static_cast(V.vector4_f32[1]); + pDestination->z = static_cast(V.vector4_f32[2]); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t v = vcvtq_s32_f32(V); + int32x2_t vL = vget_low_s32(v); + vst1_s32(reinterpret_cast(pDestination), vL); + vst1q_lane_s32(reinterpret_cast(pDestination) + 2, v, 2); +#elif defined(_XM_SSE_INTRINSICS_) + // In case of positive overflow, detect it + XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); + // Float to int conversion + __m128i vResulti = _mm_cvttps_epi32(V); + // If there was positive overflow, set to 0x7FFFFFFF + XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); + vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); + vOverflow = _mm_or_ps(vOverflow, vResult); + // Write 3 uints + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(vOverflow)); + __m128 z = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(reinterpret_cast(&pDestination->z), z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUInt3 +( + XMUINT3* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = static_cast(V.vector4_f32[0]); + pDestination->y = static_cast(V.vector4_f32[1]); + pDestination->z = static_cast(V.vector4_f32[2]); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t v = vcvtq_u32_f32(V); + uint32x2_t vL = vget_low_u32(v); + vst1_u32(reinterpret_cast(pDestination), vL); + vst1q_lane_u32(reinterpret_cast(pDestination) + 2, v, 2); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to >=0 + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + // Any numbers that are too big, set to 0xFFFFFFFFU + XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); + XMVECTOR vValue = g_XMUnsignedFix; + // Too large for a signed integer? + XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); + // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise + vValue = _mm_and_ps(vValue, vMask); + // Perform fixup only on numbers too large (Keeps low bit precision) + vResult = _mm_sub_ps(vResult, vValue); + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Convert from signed to unsigned pnly if greater than 0x80000000 + vMask = _mm_and_ps(vMask, g_XMNegativeZero); + vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); + // On those that are too large, set to 0xFFFFFFFF + vResult = _mm_or_ps(vResult, vOverflow); + // Write 3 uints + _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(vResult)); + __m128 z = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(reinterpret_cast(&pDestination->z), z); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt4 +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination[0] = V.vector4_u32[0]; + pDestination[1] = V.vector4_u32[1]; + pDestination[2] = V.vector4_u32[2]; + pDestination[3] = V.vector4_u32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_u32(pDestination, vreinterpretq_u32_f32(V)); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreInt4A +( + uint32_t* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + pDestination[0] = V.vector4_u32[0]; + pDestination[1] = V.vector4_u32[1]; + pDestination[2] = V.vector4_u32[2]; + pDestination[3] = V.vector4_u32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + vst1q_u32_ex(pDestination, V, 128); +#else + vst1q_u32(pDestination, vreinterpretq_u32_f32(V)); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat4 +( + XMFLOAT4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = V.vector4_f32[0]; + pDestination->y = V.vector4_f32[1]; + pDestination->z = V.vector4_f32[2]; + pDestination->w = V.vector4_f32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_f32(reinterpret_cast(pDestination), V); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_storeu_ps(&pDestination->x, V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat4A +( + XMFLOAT4A* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = V.vector4_f32[0]; + pDestination->y = V.vector4_f32[1]; + pDestination->z = V.vector4_f32[2]; + pDestination->w = V.vector4_f32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + vst1q_f32_ex(reinterpret_cast(pDestination), V, 128); +#else + vst1q_f32(reinterpret_cast(pDestination), V); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_ps(&pDestination->x, V); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreSInt4 +( + XMINT4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = static_cast(V.vector4_f32[0]); + pDestination->y = static_cast(V.vector4_f32[1]); + pDestination->z = static_cast(V.vector4_f32[2]); + pDestination->w = static_cast(V.vector4_f32[3]); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t v = vcvtq_s32_f32(V); + vst1q_s32(reinterpret_cast(pDestination), v); +#elif defined(_XM_SSE_INTRINSICS_) + // In case of positive overflow, detect it + XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); + // Float to int conversion + __m128i vResulti = _mm_cvttps_epi32(V); + // If there was positive overflow, set to 0x7FFFFFFF + XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); + vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); + vOverflow = _mm_or_ps(vOverflow, vResult); + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vOverflow)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUInt4 +( + XMUINT4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + pDestination->x = static_cast(V.vector4_f32[0]); + pDestination->y = static_cast(V.vector4_f32[1]); + pDestination->z = static_cast(V.vector4_f32[2]); + pDestination->w = static_cast(V.vector4_f32[3]); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t v = vcvtq_u32_f32(V); + vst1q_u32(reinterpret_cast(pDestination), v); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to >=0 + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + // Any numbers that are too big, set to 0xFFFFFFFFU + XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); + XMVECTOR vValue = g_XMUnsignedFix; + // Too large for a signed integer? + XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); + // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise + vValue = _mm_and_ps(vValue, vMask); + // Perform fixup only on numbers too large (Keeps low bit precision) + vResult = _mm_sub_ps(vResult, vValue); + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Convert from signed to unsigned pnly if greater than 0x80000000 + vMask = _mm_and_ps(vMask, g_XMNegativeZero); + vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); + // On those that are too large, set to 0xFFFFFFFF + vResult = _mm_or_ps(vResult, vOverflow); + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vResult)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3x3 +( + XMFLOAT3X3* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[0].vector4_f32[1]; + pDestination->m[0][2] = M.r[0].vector4_f32[2]; + + pDestination->m[1][0] = M.r[1].vector4_f32[0]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[1].vector4_f32[2]; + + pDestination->m[2][0] = M.r[2].vector4_f32[0]; + pDestination->m[2][1] = M.r[2].vector4_f32[1]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); + float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); + vst1q_f32(&pDestination->m[0][0], T2); + + T1 = vextq_f32(M.r[1], M.r[1], 1); + T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); + vst1q_f32(&pDestination->m[1][1], T2); + + vst1q_lane_f32(&pDestination->m[2][2], M.r[2], 2); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp1 = M.r[0]; + XMVECTOR vTemp2 = M.r[1]; + XMVECTOR vTemp3 = M.r[2]; + XMVECTOR vWork = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 0, 2, 2)); + vTemp1 = _mm_shuffle_ps(vTemp1, vWork, _MM_SHUFFLE(2, 0, 1, 0)); + _mm_storeu_ps(&pDestination->m[0][0], vTemp1); + vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); + _mm_storeu_ps(&pDestination->m[1][1], vTemp2); + vTemp3 = XM_PERMUTE_PS(vTemp3, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(&pDestination->m[2][2], vTemp3); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat4x3 +( + XMFLOAT4X3* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[0].vector4_f32[1]; + pDestination->m[0][2] = M.r[0].vector4_f32[2]; + + pDestination->m[1][0] = M.r[1].vector4_f32[0]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[1].vector4_f32[2]; + + pDestination->m[2][0] = M.r[2].vector4_f32[0]; + pDestination->m[2][1] = M.r[2].vector4_f32[1]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + + pDestination->m[3][0] = M.r[3].vector4_f32[0]; + pDestination->m[3][1] = M.r[3].vector4_f32[1]; + pDestination->m[3][2] = M.r[3].vector4_f32[2]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); + float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); + vst1q_f32(&pDestination->m[0][0], T2); + + T1 = vextq_f32(M.r[1], M.r[1], 1); + T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); + vst1q_f32(&pDestination->m[1][1], T2); + + T1 = vdupq_lane_f32(vget_high_f32(M.r[2]), 0); + T2 = vextq_f32(T1, M.r[3], 3); + vst1q_f32(&pDestination->m[2][2], T2); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp1 = M.r[0]; + XMVECTOR vTemp2 = M.r[1]; + XMVECTOR vTemp3 = M.r[2]; + XMVECTOR vTemp4 = M.r[3]; + XMVECTOR vTemp2x = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); + vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(2, 2, 0, 0)); + vTemp1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 2, 1, 0)); + vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(0, 0, 2, 2)); + vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 1, 2, 0)); + _mm_storeu_ps(&pDestination->m[0][0], vTemp1); + _mm_storeu_ps(&pDestination->m[1][1], vTemp2x); + _mm_storeu_ps(&pDestination->m[2][2], vTemp3); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat4x3A +( + XMFLOAT4X3A* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[0].vector4_f32[1]; + pDestination->m[0][2] = M.r[0].vector4_f32[2]; + + pDestination->m[1][0] = M.r[1].vector4_f32[0]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[1].vector4_f32[2]; + + pDestination->m[2][0] = M.r[2].vector4_f32[0]; + pDestination->m[2][1] = M.r[2].vector4_f32[1]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + + pDestination->m[3][0] = M.r[3].vector4_f32[0]; + pDestination->m[3][1] = M.r[3].vector4_f32[1]; + pDestination->m[3][2] = M.r[3].vector4_f32[2]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); + float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); + vst1q_f32_ex(&pDestination->m[0][0], T2, 128); + + T1 = vextq_f32(M.r[1], M.r[1], 1); + T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); + vst1q_f32_ex(&pDestination->m[1][1], T2, 128); + + T1 = vdupq_lane_f32(vget_high_f32(M.r[2]), 0); + T2 = vextq_f32(T1, M.r[3], 3); + vst1q_f32_ex(&pDestination->m[2][2], T2, 128); +#else + float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); + float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); + vst1q_f32(&pDestination->m[0][0], T2); + + T1 = vextq_f32(M.r[1], M.r[1], 1); + T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); + vst1q_f32(&pDestination->m[1][1], T2); + + T1 = vdupq_lane_f32(vget_high_f32(M.r[2]), 0); + T2 = vextq_f32(T1, M.r[3], 3); + vst1q_f32(&pDestination->m[2][2], T2); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + // x1,y1,z1,w1 + XMVECTOR vTemp1 = M.r[0]; + // x2,y2,z2,w2 + XMVECTOR vTemp2 = M.r[1]; + // x3,y3,z3,w3 + XMVECTOR vTemp3 = M.r[2]; + // x4,y4,z4,w4 + XMVECTOR vTemp4 = M.r[3]; + // z1,z1,x2,y2 + XMVECTOR vTemp = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(1, 0, 2, 2)); + // y2,z2,x3,y3 (Final) + vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); + // x1,y1,z1,x2 (Final) + vTemp1 = _mm_shuffle_ps(vTemp1, vTemp, _MM_SHUFFLE(2, 0, 1, 0)); + // z3,z3,x4,x4 + vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(0, 0, 2, 2)); + // z3,x4,y4,z4 (Final) + vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 1, 2, 0)); + // Store in 3 operations + _mm_store_ps(&pDestination->m[0][0], vTemp1); + _mm_store_ps(&pDestination->m[1][1], vTemp2); + _mm_store_ps(&pDestination->m[2][2], vTemp3); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3x4 +( + XMFLOAT3X4* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[1].vector4_f32[0]; + pDestination->m[0][2] = M.r[2].vector4_f32[0]; + pDestination->m[0][3] = M.r[3].vector4_f32[0]; + + pDestination->m[1][0] = M.r[0].vector4_f32[1]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[2].vector4_f32[1]; + pDestination->m[1][3] = M.r[3].vector4_f32[1]; + + pDestination->m[2][0] = M.r[0].vector4_f32[2]; + pDestination->m[2][1] = M.r[1].vector4_f32[2]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + pDestination->m[2][3] = M.r[3].vector4_f32[2]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]); + float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]); + + float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); + float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); + + vst1q_f32(&pDestination->m[0][0], T0.val[0]); + vst1q_f32(&pDestination->m[1][0], T0.val[1]); + vst1q_f32(&pDestination->m[2][0], T1.val[0]); +#elif defined(_XM_SSE_INTRINSICS_) + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + + // x.x,y.x,z.x,w.x + XMVECTOR r0 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + XMVECTOR r1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + XMVECTOR r2 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + + _mm_storeu_ps(&pDestination->m[0][0], r0); + _mm_storeu_ps(&pDestination->m[1][0], r1); + _mm_storeu_ps(&pDestination->m[2][0], r2); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3x4A +( + XMFLOAT3X4A* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[1].vector4_f32[0]; + pDestination->m[0][2] = M.r[2].vector4_f32[0]; + pDestination->m[0][3] = M.r[3].vector4_f32[0]; + + pDestination->m[1][0] = M.r[0].vector4_f32[1]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[2].vector4_f32[1]; + pDestination->m[1][3] = M.r[3].vector4_f32[1]; + + pDestination->m[2][0] = M.r[0].vector4_f32[2]; + pDestination->m[2][1] = M.r[1].vector4_f32[2]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + pDestination->m[2][3] = M.r[3].vector4_f32[2]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]); + float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]); + + float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); + float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); + +#ifdef _MSC_VER + vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128); + vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128); + vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128); +#else + vst1q_f32(&pDestination->m[0][0], T0.val[0]); + vst1q_f32(&pDestination->m[1][0], T0.val[1]); + vst1q_f32(&pDestination->m[2][0], T1.val[0]); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + + // x.x,y.x,z.x,w.x + XMVECTOR r0 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + XMVECTOR r1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + XMVECTOR r2 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + + _mm_store_ps(&pDestination->m[0][0], r0); + _mm_store_ps(&pDestination->m[1][0], r1); + _mm_store_ps(&pDestination->m[2][0], r2); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat4x4 +( + XMFLOAT4X4* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[0].vector4_f32[1]; + pDestination->m[0][2] = M.r[0].vector4_f32[2]; + pDestination->m[0][3] = M.r[0].vector4_f32[3]; + + pDestination->m[1][0] = M.r[1].vector4_f32[0]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[1].vector4_f32[2]; + pDestination->m[1][3] = M.r[1].vector4_f32[3]; + + pDestination->m[2][0] = M.r[2].vector4_f32[0]; + pDestination->m[2][1] = M.r[2].vector4_f32[1]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + pDestination->m[2][3] = M.r[2].vector4_f32[3]; + + pDestination->m[3][0] = M.r[3].vector4_f32[0]; + pDestination->m[3][1] = M.r[3].vector4_f32[1]; + pDestination->m[3][2] = M.r[3].vector4_f32[2]; + pDestination->m[3][3] = M.r[3].vector4_f32[3]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_f32(reinterpret_cast(&pDestination->_11), M.r[0]); + vst1q_f32(reinterpret_cast(&pDestination->_21), M.r[1]); + vst1q_f32(reinterpret_cast(&pDestination->_31), M.r[2]); + vst1q_f32(reinterpret_cast(&pDestination->_41), M.r[3]); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_storeu_ps(&pDestination->_11, M.r[0]); + _mm_storeu_ps(&pDestination->_21, M.r[1]); + _mm_storeu_ps(&pDestination->_31, M.r[2]); + _mm_storeu_ps(&pDestination->_41, M.r[3]); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat4x4A +( + XMFLOAT4X4A* pDestination, + FXMMATRIX M +) noexcept +{ + assert(pDestination); + assert((reinterpret_cast(pDestination) & 0xF) == 0); +#if defined(_XM_NO_INTRINSICS_) + + pDestination->m[0][0] = M.r[0].vector4_f32[0]; + pDestination->m[0][1] = M.r[0].vector4_f32[1]; + pDestination->m[0][2] = M.r[0].vector4_f32[2]; + pDestination->m[0][3] = M.r[0].vector4_f32[3]; + + pDestination->m[1][0] = M.r[1].vector4_f32[0]; + pDestination->m[1][1] = M.r[1].vector4_f32[1]; + pDestination->m[1][2] = M.r[1].vector4_f32[2]; + pDestination->m[1][3] = M.r[1].vector4_f32[3]; + + pDestination->m[2][0] = M.r[2].vector4_f32[0]; + pDestination->m[2][1] = M.r[2].vector4_f32[1]; + pDestination->m[2][2] = M.r[2].vector4_f32[2]; + pDestination->m[2][3] = M.r[2].vector4_f32[3]; + + pDestination->m[3][0] = M.r[3].vector4_f32[0]; + pDestination->m[3][1] = M.r[3].vector4_f32[1]; + pDestination->m[3][2] = M.r[3].vector4_f32[2]; + pDestination->m[3][3] = M.r[3].vector4_f32[3]; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#ifdef _MSC_VER + vst1q_f32_ex(reinterpret_cast(&pDestination->_11), M.r[0], 128); + vst1q_f32_ex(reinterpret_cast(&pDestination->_21), M.r[1], 128); + vst1q_f32_ex(reinterpret_cast(&pDestination->_31), M.r[2], 128); + vst1q_f32_ex(reinterpret_cast(&pDestination->_41), M.r[3], 128); +#else + vst1q_f32(reinterpret_cast(&pDestination->_11), M.r[0]); + vst1q_f32(reinterpret_cast(&pDestination->_21), M.r[1]); + vst1q_f32(reinterpret_cast(&pDestination->_31), M.r[2]); + vst1q_f32(reinterpret_cast(&pDestination->_41), M.r[3]); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_ps(&pDestination->_11, M.r[0]); + _mm_store_ps(&pDestination->_21, M.r[1]); + _mm_store_ps(&pDestination->_31, M.r[2]); + _mm_store_ps(&pDestination->_41, M.r[3]); +#endif +} + diff --git a/Sdk/External/DirectXMath/Inc/DirectXMathMatrix.inl b/Sdk/External/DirectXMath/Inc/DirectXMathMatrix.inl new file mode 100644 index 0000000..5337457 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXMathMatrix.inl @@ -0,0 +1,3422 @@ +//------------------------------------------------------------------------------------- +// DirectXMathMatrix.inl -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +/**************************************************************************** + * + * Matrix + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(push) +#pragma float_control(precise, on) +#endif + +// Return true if any entry in the matrix is NaN +inline bool XM_CALLCONV XMMatrixIsNaN(FXMMATRIX M) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + size_t i = 16; + auto pWork = reinterpret_cast(&M.m[0][0]); + do { + // Fetch value into integer unit + uint32_t uTest = pWork[0]; + // Remove sign + uTest &= 0x7FFFFFFFU; + // NaN is 0x7F800001 through 0x7FFFFFFF inclusive + uTest -= 0x7F800001U; + if (uTest < 0x007FFFFFU) + { + break; // NaN found + } + ++pWork; // Next entry + } while (--i); + return (i != 0); // i == 0 if nothing matched +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Load in registers + float32x4_t vX = M.r[0]; + float32x4_t vY = M.r[1]; + float32x4_t vZ = M.r[2]; + float32x4_t vW = M.r[3]; + // Test themselves to check for NaN + uint32x4_t xmask = vmvnq_u32(vceqq_f32(vX, vX)); + uint32x4_t ymask = vmvnq_u32(vceqq_f32(vY, vY)); + uint32x4_t zmask = vmvnq_u32(vceqq_f32(vZ, vZ)); + uint32x4_t wmask = vmvnq_u32(vceqq_f32(vW, vW)); + // Or all the results + xmask = vorrq_u32(xmask, zmask); + ymask = vorrq_u32(ymask, wmask); + xmask = vorrq_u32(xmask, ymask); + // If any tested true, return true + uint8x8x2_t vTemp = vzip_u8( + vget_low_u8(vreinterpretq_u8_u32(xmask)), + vget_high_u8(vreinterpretq_u8_u32(xmask))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + return (r != 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Load in registers + XMVECTOR vX = M.r[0]; + XMVECTOR vY = M.r[1]; + XMVECTOR vZ = M.r[2]; + XMVECTOR vW = M.r[3]; + // Test themselves to check for NaN + vX = _mm_cmpneq_ps(vX, vX); + vY = _mm_cmpneq_ps(vY, vY); + vZ = _mm_cmpneq_ps(vZ, vZ); + vW = _mm_cmpneq_ps(vW, vW); + // Or all the results + vX = _mm_or_ps(vX, vZ); + vY = _mm_or_ps(vY, vW); + vX = _mm_or_ps(vX, vY); + // If any tested true, return true + return (_mm_movemask_ps(vX) != 0); +#else +#endif +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(pop) +#endif + +//------------------------------------------------------------------------------ + +// Return true if any entry in the matrix is +/-INF +inline bool XM_CALLCONV XMMatrixIsInfinite(FXMMATRIX M) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + size_t i = 16; + auto pWork = reinterpret_cast(&M.m[0][0]); + do { + // Fetch value into integer unit + uint32_t uTest = pWork[0]; + // Remove sign + uTest &= 0x7FFFFFFFU; + // INF is 0x7F800000 + if (uTest == 0x7F800000U) + { + break; // INF found + } + ++pWork; // Next entry + } while (--i); + return (i != 0); // i == 0 if nothing matched +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Load in registers + float32x4_t vX = M.r[0]; + float32x4_t vY = M.r[1]; + float32x4_t vZ = M.r[2]; + float32x4_t vW = M.r[3]; + // Mask off the sign bits + vX = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vX), g_XMAbsMask)); + vY = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vY), g_XMAbsMask)); + vZ = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vZ), g_XMAbsMask)); + vW = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vW), g_XMAbsMask)); + // Compare to infinity + uint32x4_t xmask = vceqq_f32(vX, g_XMInfinity); + uint32x4_t ymask = vceqq_f32(vY, g_XMInfinity); + uint32x4_t zmask = vceqq_f32(vZ, g_XMInfinity); + uint32x4_t wmask = vceqq_f32(vW, g_XMInfinity); + // Or the answers together + xmask = vorrq_u32(xmask, zmask); + ymask = vorrq_u32(ymask, wmask); + xmask = vorrq_u32(xmask, ymask); + // If any tested true, return true + uint8x8x2_t vTemp = vzip_u8( + vget_low_u8(vreinterpretq_u8_u32(xmask)), + vget_high_u8(vreinterpretq_u8_u32(xmask))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + return (r != 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Mask off the sign bits + XMVECTOR vTemp1 = _mm_and_ps(M.r[0], g_XMAbsMask); + XMVECTOR vTemp2 = _mm_and_ps(M.r[1], g_XMAbsMask); + XMVECTOR vTemp3 = _mm_and_ps(M.r[2], g_XMAbsMask); + XMVECTOR vTemp4 = _mm_and_ps(M.r[3], g_XMAbsMask); + // Compare to infinity + vTemp1 = _mm_cmpeq_ps(vTemp1, g_XMInfinity); + vTemp2 = _mm_cmpeq_ps(vTemp2, g_XMInfinity); + vTemp3 = _mm_cmpeq_ps(vTemp3, g_XMInfinity); + vTemp4 = _mm_cmpeq_ps(vTemp4, g_XMInfinity); + // Or the answers together + vTemp1 = _mm_or_ps(vTemp1, vTemp2); + vTemp3 = _mm_or_ps(vTemp3, vTemp4); + vTemp1 = _mm_or_ps(vTemp1, vTemp3); + // If any are infinity, the signs are true. + return (_mm_movemask_ps(vTemp1) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +// Return true if the XMMatrix is equal to identity +inline bool XM_CALLCONV XMMatrixIsIdentity(FXMMATRIX M) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + // Use the integer pipeline to reduce branching to a minimum + auto pWork = reinterpret_cast(&M.m[0][0]); + // Convert 1.0f to zero and or them together + uint32_t uOne = pWork[0] ^ 0x3F800000U; + // Or all the 0.0f entries together + uint32_t uZero = pWork[1]; + uZero |= pWork[2]; + uZero |= pWork[3]; + // 2nd row + uZero |= pWork[4]; + uOne |= pWork[5] ^ 0x3F800000U; + uZero |= pWork[6]; + uZero |= pWork[7]; + // 3rd row + uZero |= pWork[8]; + uZero |= pWork[9]; + uOne |= pWork[10] ^ 0x3F800000U; + uZero |= pWork[11]; + // 4th row + uZero |= pWork[12]; + uZero |= pWork[13]; + uZero |= pWork[14]; + uOne |= pWork[15] ^ 0x3F800000U; + // If all zero entries are zero, the uZero==0 + uZero &= 0x7FFFFFFF; // Allow -0.0f + // If all 1.0f entries are 1.0f, then uOne==0 + uOne |= uZero; + return (uOne == 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t xmask = vceqq_f32(M.r[0], g_XMIdentityR0); + uint32x4_t ymask = vceqq_f32(M.r[1], g_XMIdentityR1); + uint32x4_t zmask = vceqq_f32(M.r[2], g_XMIdentityR2); + uint32x4_t wmask = vceqq_f32(M.r[3], g_XMIdentityR3); + xmask = vandq_u32(xmask, zmask); + ymask = vandq_u32(ymask, wmask); + xmask = vandq_u32(xmask, ymask); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(xmask)), vget_high_u8(vreinterpretq_u8_u32(xmask))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + return (r == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0], g_XMIdentityR0); + XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1], g_XMIdentityR1); + XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2], g_XMIdentityR2); + XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3], g_XMIdentityR3); + vTemp1 = _mm_and_ps(vTemp1, vTemp2); + vTemp3 = _mm_and_ps(vTemp3, vTemp4); + vTemp1 = _mm_and_ps(vTemp1, vTemp3); + return (_mm_movemask_ps(vTemp1) == 0x0f); +#endif +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +// Perform a 4x4 matrix multiply by a 4x4 matrix +inline XMMATRIX XM_CALLCONV XMMatrixMultiply +( + FXMMATRIX M1, + CXMMATRIX M2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMMATRIX mResult; + // Cache the invariants in registers + float x = M1.m[0][0]; + float y = M1.m[0][1]; + float z = M1.m[0][2]; + float w = M1.m[0][3]; + // Perform the operation on the first row + mResult.m[0][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w); + mResult.m[0][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w); + mResult.m[0][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w); + mResult.m[0][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w); + // Repeat for all the other rows + x = M1.m[1][0]; + y = M1.m[1][1]; + z = M1.m[1][2]; + w = M1.m[1][3]; + mResult.m[1][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w); + mResult.m[1][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w); + mResult.m[1][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w); + mResult.m[1][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w); + x = M1.m[2][0]; + y = M1.m[2][1]; + z = M1.m[2][2]; + w = M1.m[2][3]; + mResult.m[2][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w); + mResult.m[2][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w); + mResult.m[2][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w); + mResult.m[2][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w); + x = M1.m[3][0]; + y = M1.m[3][1]; + z = M1.m[3][2]; + w = M1.m[3][3]; + mResult.m[3][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w); + mResult.m[3][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w); + mResult.m[3][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w); + mResult.m[3][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w); + return mResult; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX mResult; + float32x2_t VL = vget_low_f32(M1.r[0]); + float32x2_t VH = vget_high_f32(M1.r[0]); + // Perform the operation on the first row + float32x4_t vX = vmulq_lane_f32(M2.r[0], VL, 0); + float32x4_t vY = vmulq_lane_f32(M2.r[1], VL, 1); + float32x4_t vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + float32x4_t vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + mResult.r[0] = vaddq_f32(vZ, vW); + // Repeat for the other 3 rows + VL = vget_low_f32(M1.r[1]); + VH = vget_high_f32(M1.r[1]); + vX = vmulq_lane_f32(M2.r[0], VL, 0); + vY = vmulq_lane_f32(M2.r[1], VL, 1); + vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + mResult.r[1] = vaddq_f32(vZ, vW); + VL = vget_low_f32(M1.r[2]); + VH = vget_high_f32(M1.r[2]); + vX = vmulq_lane_f32(M2.r[0], VL, 0); + vY = vmulq_lane_f32(M2.r[1], VL, 1); + vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + mResult.r[2] = vaddq_f32(vZ, vW); + VL = vget_low_f32(M1.r[3]); + VH = vget_high_f32(M1.r[3]); + vX = vmulq_lane_f32(M2.r[0], VL, 0); + vY = vmulq_lane_f32(M2.r[1], VL, 1); + vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + mResult.r[3] = vaddq_f32(vZ, vW); + return mResult; +#elif defined(_XM_AVX2_INTRINSICS_) + __m256 t0 = _mm256_castps128_ps256(M1.r[0]); + t0 = _mm256_insertf128_ps(t0, M1.r[1], 1); + __m256 t1 = _mm256_castps128_ps256(M1.r[2]); + t1 = _mm256_insertf128_ps(t1, M1.r[3], 1); + + __m256 u0 = _mm256_castps128_ps256(M2.r[0]); + u0 = _mm256_insertf128_ps(u0, M2.r[1], 1); + __m256 u1 = _mm256_castps128_ps256(M2.r[2]); + u1 = _mm256_insertf128_ps(u1, M2.r[3], 1); + + __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00); + __m256 c0 = _mm256_mul_ps(a0, b0); + __m256 c1 = _mm256_mul_ps(a1, b0); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1)); + b0 = _mm256_permute2f128_ps(u0, u0, 0x11); + __m256 c2 = _mm256_fmadd_ps(a0, b0, c0); + __m256 c3 = _mm256_fmadd_ps(a1, b0, c1); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00); + __m256 c4 = _mm256_mul_ps(a0, b1); + __m256 c5 = _mm256_mul_ps(a1, b1); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3)); + b1 = _mm256_permute2f128_ps(u1, u1, 0x11); + __m256 c6 = _mm256_fmadd_ps(a0, b1, c4); + __m256 c7 = _mm256_fmadd_ps(a1, b1, c5); + + t0 = _mm256_add_ps(c2, c6); + t1 = _mm256_add_ps(c3, c7); + + XMMATRIX mResult; + mResult.r[0] = _mm256_castps256_ps128(t0); + mResult.r[1] = _mm256_extractf128_ps(t0, 1); + mResult.r[2] = _mm256_castps256_ps128(t1); + mResult.r[3] = _mm256_extractf128_ps(t1, 1); + return mResult; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX mResult; + // Splat the component X,Y,Z then W +#if defined(_XM_AVX_INTRINSICS_) + XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 0); + XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 1); + XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 2); + XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 3); +#else + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + XMVECTOR vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + // Perform the operation on the first row + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + // Perform a binary add to reduce cumulative errors + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + mResult.r[0] = vX; + // Repeat for the other 3 rows +#if defined(_XM_AVX_INTRINSICS_) + vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 3); +#else + vW = M1.r[1]; + vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + mResult.r[1] = vX; +#if defined(_XM_AVX_INTRINSICS_) + vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 3); +#else + vW = M1.r[2]; + vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + mResult.r[2] = vX; +#if defined(_XM_AVX_INTRINSICS_) + vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 3); +#else + vW = M1.r[3]; + vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + mResult.r[3] = vX; + return mResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose +( + FXMMATRIX M1, + CXMMATRIX M2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMMATRIX mResult; + // Cache the invariants in registers + float x = M2.m[0][0]; + float y = M2.m[1][0]; + float z = M2.m[2][0]; + float w = M2.m[3][0]; + // Perform the operation on the first row + mResult.m[0][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w); + mResult.m[0][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w); + mResult.m[0][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w); + mResult.m[0][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w); + // Repeat for all the other rows + x = M2.m[0][1]; + y = M2.m[1][1]; + z = M2.m[2][1]; + w = M2.m[3][1]; + mResult.m[1][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w); + mResult.m[1][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w); + mResult.m[1][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w); + mResult.m[1][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w); + x = M2.m[0][2]; + y = M2.m[1][2]; + z = M2.m[2][2]; + w = M2.m[3][2]; + mResult.m[2][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w); + mResult.m[2][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w); + mResult.m[2][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w); + mResult.m[2][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w); + x = M2.m[0][3]; + y = M2.m[1][3]; + z = M2.m[2][3]; + w = M2.m[3][3]; + mResult.m[3][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w); + mResult.m[3][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w); + mResult.m[3][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w); + mResult.m[3][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w); + return mResult; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(M1.r[0]); + float32x2_t VH = vget_high_f32(M1.r[0]); + // Perform the operation on the first row + float32x4_t vX = vmulq_lane_f32(M2.r[0], VL, 0); + float32x4_t vY = vmulq_lane_f32(M2.r[1], VL, 1); + float32x4_t vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + float32x4_t vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + float32x4_t r0 = vaddq_f32(vZ, vW); + // Repeat for the other 3 rows + VL = vget_low_f32(M1.r[1]); + VH = vget_high_f32(M1.r[1]); + vX = vmulq_lane_f32(M2.r[0], VL, 0); + vY = vmulq_lane_f32(M2.r[1], VL, 1); + vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + float32x4_t r1 = vaddq_f32(vZ, vW); + VL = vget_low_f32(M1.r[2]); + VH = vget_high_f32(M1.r[2]); + vX = vmulq_lane_f32(M2.r[0], VL, 0); + vY = vmulq_lane_f32(M2.r[1], VL, 1); + vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + float32x4_t r2 = vaddq_f32(vZ, vW); + VL = vget_low_f32(M1.r[3]); + VH = vget_high_f32(M1.r[3]); + vX = vmulq_lane_f32(M2.r[0], VL, 0); + vY = vmulq_lane_f32(M2.r[1], VL, 1); + vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); + vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); + float32x4_t r3 = vaddq_f32(vZ, vW); + + // Transpose result + float32x4x2_t P0 = vzipq_f32(r0, r2); + float32x4x2_t P1 = vzipq_f32(r1, r3); + + float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); + float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); + + XMMATRIX mResult; + mResult.r[0] = T0.val[0]; + mResult.r[1] = T0.val[1]; + mResult.r[2] = T1.val[0]; + mResult.r[3] = T1.val[1]; + return mResult; +#elif defined(_XM_AVX2_INTRINSICS_) + __m256 t0 = _mm256_castps128_ps256(M1.r[0]); + t0 = _mm256_insertf128_ps(t0, M1.r[1], 1); + __m256 t1 = _mm256_castps128_ps256(M1.r[2]); + t1 = _mm256_insertf128_ps(t1, M1.r[3], 1); + + __m256 u0 = _mm256_castps128_ps256(M2.r[0]); + u0 = _mm256_insertf128_ps(u0, M2.r[1], 1); + __m256 u1 = _mm256_castps128_ps256(M2.r[2]); + u1 = _mm256_insertf128_ps(u1, M2.r[3], 1); + + __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00); + __m256 c0 = _mm256_mul_ps(a0, b0); + __m256 c1 = _mm256_mul_ps(a1, b0); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1)); + b0 = _mm256_permute2f128_ps(u0, u0, 0x11); + __m256 c2 = _mm256_fmadd_ps(a0, b0, c0); + __m256 c3 = _mm256_fmadd_ps(a1, b0, c1); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00); + __m256 c4 = _mm256_mul_ps(a0, b1); + __m256 c5 = _mm256_mul_ps(a1, b1); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3)); + b1 = _mm256_permute2f128_ps(u1, u1, 0x11); + __m256 c6 = _mm256_fmadd_ps(a0, b1, c4); + __m256 c7 = _mm256_fmadd_ps(a1, b1, c5); + + t0 = _mm256_add_ps(c2, c6); + t1 = _mm256_add_ps(c3, c7); + + // Transpose result + __m256 vTemp = _mm256_unpacklo_ps(t0, t1); + __m256 vTemp2 = _mm256_unpackhi_ps(t0, t1); + __m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); + __m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); + vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4); + vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4); + t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); + t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); + + XMMATRIX mResult; + mResult.r[0] = _mm256_castps256_ps128(t0); + mResult.r[1] = _mm256_extractf128_ps(t0, 1); + mResult.r[2] = _mm256_castps256_ps128(t1); + mResult.r[3] = _mm256_extractf128_ps(t1, 1); + return mResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the component X,Y,Z then W +#if defined(_XM_AVX_INTRINSICS_) + XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 0); + XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 1); + XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 2); + XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 3); +#else + // Use vW to hold the original row + XMVECTOR vW = M1.r[0]; + XMVECTOR vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + // Perform the operation on the first row + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + // Perform a binary add to reduce cumulative errors + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + XMVECTOR r0 = vX; + // Repeat for the other 3 rows +#if defined(_XM_AVX_INTRINSICS_) + vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 3); +#else + vW = M1.r[1]; + vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + XMVECTOR r1 = vX; +#if defined(_XM_AVX_INTRINSICS_) + vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 3); +#else + vW = M1.r[2]; + vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + XMVECTOR r2 = vX; +#if defined(_XM_AVX_INTRINSICS_) + vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 3); +#else + vW = M1.r[3]; + vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.r[0]); + vY = _mm_mul_ps(vY, M2.r[1]); + vZ = _mm_mul_ps(vZ, M2.r[2]); + vW = _mm_mul_ps(vW, M2.r[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + XMVECTOR r3 = vX; + + // Transpose result + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); + + XMMATRIX mResult; + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); + return mResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixTranspose(FXMMATRIX M) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + // Original matrix: + // + // m00m01m02m03 + // m10m11m12m13 + // m20m21m22m23 + // m30m31m32m33 + + XMMATRIX P; + P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21 + P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31 + P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23 + P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33 + + XMMATRIX MT; + MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30 + MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31 + MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32 + MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33 + return MT; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]); + float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]); + + float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); + float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); + + XMMATRIX mResult; + mResult.r[0] = T0.val[0]; + mResult.r[1] = T0.val[1]; + mResult.r[2] = T1.val[0]; + mResult.r[3] = T1.val[1]; + return mResult; +#elif defined(_XM_AVX2_INTRINSICS_) + __m256 t0 = _mm256_castps128_ps256(M.r[0]); + t0 = _mm256_insertf128_ps(t0, M.r[1], 1); + __m256 t1 = _mm256_castps128_ps256(M.r[2]); + t1 = _mm256_insertf128_ps(t1, M.r[3], 1); + + __m256 vTemp = _mm256_unpacklo_ps(t0, t1); + __m256 vTemp2 = _mm256_unpackhi_ps(t0, t1); + __m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); + __m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); + vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4); + vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4); + t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); + t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); + + XMMATRIX mResult; + mResult.r[0] = _mm256_castps256_ps128(t0); + mResult.r[1] = _mm256_extractf128_ps(t0, 1); + mResult.r[2] = _mm256_castps256_ps128(t1); + mResult.r[3] = _mm256_extractf128_ps(t1, 1); + return mResult; +#elif defined(_XM_SSE_INTRINSICS_) + // x.x,x.y,y.x,y.y + XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + + XMMATRIX mResult; + // x.x,y.x,z.x,w.x + mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + // x.w,y.w,z.w,w.w + mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); + return mResult; +#endif +} + +//------------------------------------------------------------------------------ +// Return the inverse and the determinant of a 4x4 matrix +_Use_decl_annotations_ +inline XMMATRIX XM_CALLCONV XMMatrixInverse +( + XMVECTOR* pDeterminant, + FXMMATRIX M +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMMATRIX MT = XMMatrixTranspose(M); + + XMVECTOR V0[4], V1[4]; + V0[0] = XMVectorSwizzle(MT.r[2]); + V1[0] = XMVectorSwizzle(MT.r[3]); + V0[1] = XMVectorSwizzle(MT.r[0]); + V1[1] = XMVectorSwizzle(MT.r[1]); + V0[2] = XMVectorPermute(MT.r[2], MT.r[0]); + V1[2] = XMVectorPermute(MT.r[3], MT.r[1]); + + XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]); + XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]); + XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]); + + V0[0] = XMVectorSwizzle(MT.r[2]); + V1[0] = XMVectorSwizzle(MT.r[3]); + V0[1] = XMVectorSwizzle(MT.r[0]); + V1[1] = XMVectorSwizzle(MT.r[1]); + V0[2] = XMVectorPermute(MT.r[2], MT.r[0]); + V1[2] = XMVectorPermute(MT.r[3], MT.r[1]); + + D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0); + D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1); + D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2); + + V0[0] = XMVectorSwizzle(MT.r[1]); + V1[0] = XMVectorPermute(D0, D2); + V0[1] = XMVectorSwizzle(MT.r[0]); + V1[1] = XMVectorPermute(D0, D2); + V0[2] = XMVectorSwizzle(MT.r[3]); + V1[2] = XMVectorPermute(D1, D2); + V0[3] = XMVectorSwizzle(MT.r[2]); + V1[3] = XMVectorPermute(D1, D2); + + XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]); + XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]); + XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]); + XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]); + + V0[0] = XMVectorSwizzle(MT.r[1]); + V1[0] = XMVectorPermute(D0, D2); + V0[1] = XMVectorSwizzle(MT.r[0]); + V1[1] = XMVectorPermute(D0, D2); + V0[2] = XMVectorSwizzle(MT.r[3]); + V1[2] = XMVectorPermute(D1, D2); + V0[3] = XMVectorSwizzle(MT.r[2]); + V1[3] = XMVectorPermute(D1, D2); + + C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); + C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); + C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); + C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); + + V0[0] = XMVectorSwizzle(MT.r[1]); + V1[0] = XMVectorPermute(D0, D2); + V0[1] = XMVectorSwizzle(MT.r[0]); + V1[1] = XMVectorPermute(D0, D2); + V0[2] = XMVectorSwizzle(MT.r[3]); + V1[2] = XMVectorPermute(D1, D2); + V0[3] = XMVectorSwizzle(MT.r[2]); + V1[3] = XMVectorPermute(D1, D2); + + XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); + C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0); + XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2); + C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); + XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); + C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4); + XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6); + C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); + + XMMATRIX R; + R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v); + R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v); + R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v); + R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v); + + XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]); + + if (pDeterminant != nullptr) + *pDeterminant = Determinant; + + XMVECTOR Reciprocal = XMVectorReciprocal(Determinant); + + XMMATRIX Result; + Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal); + Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal); + Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal); + Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal); + return Result; + +#elif defined(_XM_SSE_INTRINSICS_) + // Transpose matrix + XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); + XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); + XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + + XMMATRIX MT; + MT.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + MT.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + MT.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + MT.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); + + XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(1, 1, 0, 0)); + XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(3, 2, 3, 2)); + XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1, 1, 0, 0)); + XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(3, 2, 3, 2)); + XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0], _MM_SHUFFLE(2, 0, 2, 0)); + XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1], _MM_SHUFFLE(3, 1, 3, 1)); + + XMVECTOR D0 = _mm_mul_ps(V00, V10); + XMVECTOR D1 = _mm_mul_ps(V01, V11); + XMVECTOR D2 = _mm_mul_ps(V02, V12); + + V00 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(3, 2, 3, 2)); + V10 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1, 1, 0, 0)); + V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(3, 2, 3, 2)); + V11 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1, 1, 0, 0)); + V02 = _mm_shuffle_ps(MT.r[2], MT.r[0], _MM_SHUFFLE(3, 1, 3, 1)); + V12 = _mm_shuffle_ps(MT.r[3], MT.r[1], _MM_SHUFFLE(2, 0, 2, 0)); + + D0 = XM_FNMADD_PS(V00, V10, D0); + D1 = XM_FNMADD_PS(V01, V11, D1); + D2 = XM_FNMADD_PS(V02, V12, D2); + // V11 = D0Y,D0W,D2Y,D2Y + V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 1, 3, 1)); + V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1, 0, 2, 1)); + V10 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(0, 3, 0, 2)); + V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0, 1, 0, 2)); + V11 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(2, 1, 2, 1)); + // V13 = D1Y,D1W,D2W,D2W + XMVECTOR V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 3, 3, 1)); + V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1, 0, 2, 1)); + V12 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(0, 3, 0, 2)); + XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(0, 1, 0, 2)); + V13 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(2, 1, 2, 1)); + + XMVECTOR C0 = _mm_mul_ps(V00, V10); + XMVECTOR C2 = _mm_mul_ps(V01, V11); + XMVECTOR C4 = _mm_mul_ps(V02, V12); + XMVECTOR C6 = _mm_mul_ps(V03, V13); + + // V11 = D0X,D0Y,D2X,D2X + V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(0, 0, 1, 0)); + V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2, 1, 3, 2)); + V10 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(2, 1, 0, 3)); + V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1, 3, 2, 3)); + V11 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(0, 2, 1, 2)); + // V13 = D1X,D1Y,D2Z,D2Z + V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(2, 2, 1, 0)); + V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2, 1, 3, 2)); + V12 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(2, 1, 0, 3)); + V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(1, 3, 2, 3)); + V13 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(0, 2, 1, 2)); + + C0 = XM_FNMADD_PS(V00, V10, C0); + C2 = XM_FNMADD_PS(V01, V11, C2); + C4 = XM_FNMADD_PS(V02, V12, C4); + C6 = XM_FNMADD_PS(V03, V13, C6); + + V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(0, 3, 0, 3)); + // V10 = D0Z,D0Z,D2X,D2Y + V10 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 2, 2)); + V10 = XM_PERMUTE_PS(V10, _MM_SHUFFLE(0, 2, 3, 0)); + V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(2, 0, 3, 1)); + // V11 = D0X,D0W,D2X,D2Y + V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 3, 0)); + V11 = XM_PERMUTE_PS(V11, _MM_SHUFFLE(2, 1, 0, 3)); + V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(0, 3, 0, 3)); + // V12 = D1Z,D1Z,D2Z,D2W + V12 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 2, 2)); + V12 = XM_PERMUTE_PS(V12, _MM_SHUFFLE(0, 2, 3, 0)); + V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(2, 0, 3, 1)); + // V13 = D1X,D1W,D2Z,D2W + V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 3, 0)); + V13 = XM_PERMUTE_PS(V13, _MM_SHUFFLE(2, 1, 0, 3)); + + V00 = _mm_mul_ps(V00, V10); + V01 = _mm_mul_ps(V01, V11); + V02 = _mm_mul_ps(V02, V12); + V03 = _mm_mul_ps(V03, V13); + XMVECTOR C1 = _mm_sub_ps(C0, V00); + C0 = _mm_add_ps(C0, V00); + XMVECTOR C3 = _mm_add_ps(C2, V01); + C2 = _mm_sub_ps(C2, V01); + XMVECTOR C5 = _mm_sub_ps(C4, V02); + C4 = _mm_add_ps(C4, V02); + XMVECTOR C7 = _mm_add_ps(C6, V03); + C6 = _mm_sub_ps(C6, V03); + + C0 = _mm_shuffle_ps(C0, C1, _MM_SHUFFLE(3, 1, 2, 0)); + C2 = _mm_shuffle_ps(C2, C3, _MM_SHUFFLE(3, 1, 2, 0)); + C4 = _mm_shuffle_ps(C4, C5, _MM_SHUFFLE(3, 1, 2, 0)); + C6 = _mm_shuffle_ps(C6, C7, _MM_SHUFFLE(3, 1, 2, 0)); + C0 = XM_PERMUTE_PS(C0, _MM_SHUFFLE(3, 1, 2, 0)); + C2 = XM_PERMUTE_PS(C2, _MM_SHUFFLE(3, 1, 2, 0)); + C4 = XM_PERMUTE_PS(C4, _MM_SHUFFLE(3, 1, 2, 0)); + C6 = XM_PERMUTE_PS(C6, _MM_SHUFFLE(3, 1, 2, 0)); + // Get the determinant + XMVECTOR vTemp = XMVector4Dot(C0, MT.r[0]); + if (pDeterminant != nullptr) + *pDeterminant = vTemp; + vTemp = _mm_div_ps(g_XMOne, vTemp); + XMMATRIX mResult; + mResult.r[0] = _mm_mul_ps(C0, vTemp); + mResult.r[1] = _mm_mul_ps(C2, vTemp); + mResult.r[2] = _mm_mul_ps(C4, vTemp); + mResult.r[3] = _mm_mul_ps(C6, vTemp); + return mResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixVectorTensorProduct +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + XMMATRIX mResult; + mResult.r[0] = XMVectorMultiply(XMVectorSwizzle<0, 0, 0, 0>(V1), V2); + mResult.r[1] = XMVectorMultiply(XMVectorSwizzle<1, 1, 1, 1>(V1), V2); + mResult.r[2] = XMVectorMultiply(XMVectorSwizzle<2, 2, 2, 2>(V1), V2); + mResult.r[3] = XMVectorMultiply(XMVectorSwizzle<3, 3, 3, 3>(V1), V2); + return mResult; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMMatrixDeterminant(FXMMATRIX M) noexcept +{ + static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, 1.0f, -1.0f } } }; + + XMVECTOR V0 = XMVectorSwizzle(M.r[2]); + XMVECTOR V1 = XMVectorSwizzle(M.r[3]); + XMVECTOR V2 = XMVectorSwizzle(M.r[2]); + XMVECTOR V3 = XMVectorSwizzle(M.r[3]); + XMVECTOR V4 = XMVectorSwizzle(M.r[2]); + XMVECTOR V5 = XMVectorSwizzle(M.r[3]); + + XMVECTOR P0 = XMVectorMultiply(V0, V1); + XMVECTOR P1 = XMVectorMultiply(V2, V3); + XMVECTOR P2 = XMVectorMultiply(V4, V5); + + V0 = XMVectorSwizzle(M.r[2]); + V1 = XMVectorSwizzle(M.r[3]); + V2 = XMVectorSwizzle(M.r[2]); + V3 = XMVectorSwizzle(M.r[3]); + V4 = XMVectorSwizzle(M.r[2]); + V5 = XMVectorSwizzle(M.r[3]); + + P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0); + P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1); + P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2); + + V0 = XMVectorSwizzle(M.r[1]); + V1 = XMVectorSwizzle(M.r[1]); + V2 = XMVectorSwizzle(M.r[1]); + + XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v); + XMVECTOR R = XMVectorMultiply(V0, P0); + R = XMVectorNegativeMultiplySubtract(V1, P1, R); + R = XMVectorMultiplyAdd(V2, P2, R); + + return XMVector4Dot(S, R); +} + +#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \ + if((x) < (y)) \ + { \ + if((y) < (z)) \ + { \ + (a) = 2; \ + (b) = 1; \ + (c) = 0; \ + } \ + else \ + { \ + (a) = 1; \ + \ + if((x) < (z)) \ + { \ + (b) = 2; \ + (c) = 0; \ + } \ + else \ + { \ + (b) = 0; \ + (c) = 2; \ + } \ + } \ + } \ + else \ + { \ + if((x) < (z)) \ + { \ + (a) = 2; \ + (b) = 0; \ + (c) = 1; \ + } \ + else \ + { \ + (a) = 0; \ + \ + if((y) < (z)) \ + { \ + (b) = 2; \ + (c) = 1; \ + } \ + else \ + { \ + (b) = 1; \ + (c) = 2; \ + } \ + } \ + } + +#define XM3_DECOMP_EPSILON 0.0001f + +_Use_decl_annotations_ +inline bool XM_CALLCONV XMMatrixDecompose +( + XMVECTOR* outScale, + XMVECTOR* outRotQuat, + XMVECTOR* outTrans, + FXMMATRIX M +) noexcept +{ + static const XMVECTOR* pvCanonicalBasis[3] = { + &g_XMIdentityR0.v, + &g_XMIdentityR1.v, + &g_XMIdentityR2.v + }; + + assert(outScale != nullptr); + assert(outRotQuat != nullptr); + assert(outTrans != nullptr); + + // Get the translation + outTrans[0] = M.r[3]; + + XMVECTOR* ppvBasis[3]; + XMMATRIX matTemp; + ppvBasis[0] = &matTemp.r[0]; + ppvBasis[1] = &matTemp.r[1]; + ppvBasis[2] = &matTemp.r[2]; + + matTemp.r[0] = M.r[0]; + matTemp.r[1] = M.r[1]; + matTemp.r[2] = M.r[2]; + matTemp.r[3] = g_XMIdentityR3.v; + + auto pfScales = reinterpret_cast(outScale); + + size_t a, b, c; + XMVectorGetXPtr(&pfScales[0], XMVector3Length(ppvBasis[0][0])); + XMVectorGetXPtr(&pfScales[1], XMVector3Length(ppvBasis[1][0])); + XMVectorGetXPtr(&pfScales[2], XMVector3Length(ppvBasis[2][0])); + pfScales[3] = 0.f; + + XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2]) + + if (pfScales[a] < XM3_DECOMP_EPSILON) + { + ppvBasis[a][0] = pvCanonicalBasis[a][0]; + } + ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]); + + if (pfScales[b] < XM3_DECOMP_EPSILON) + { + size_t aa, bb, cc; + float fAbsX, fAbsY, fAbsZ; + + fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0])); + fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0])); + fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0])); + + XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ) + + ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0], pvCanonicalBasis[cc][0]); + } + + ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]); + + if (pfScales[c] < XM3_DECOMP_EPSILON) + { + ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0], ppvBasis[b][0]); + } + + ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]); + + float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp)); + + // use Kramer's rule to check for handedness of coordinate system + if (fDet < 0.0f) + { + // switch coordinate system by negating the scale and inverting the basis vector on the x-axis + pfScales[a] = -pfScales[a]; + ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]); + + fDet = -fDet; + } + + fDet -= 1.0f; + fDet *= fDet; + + if (XM3_DECOMP_EPSILON < fDet) + { + // Non-SRT matrix encountered + return false; + } + + // generate the quaternion from the matrix + outRotQuat[0] = XMQuaternionRotationMatrix(matTemp); + return true; +} + +#undef XM3_DECOMP_EPSILON +#undef XM3RANKDECOMPOSE + +//------------------------------------------------------------------------------ +// Transformation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixIdentity() noexcept +{ + XMMATRIX M; + M.r[0] = g_XMIdentityR0.v; + M.r[1] = g_XMIdentityR1.v; + M.r[2] = g_XMIdentityR2.v; + M.r[3] = g_XMIdentityR3.v; + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixSet +( + float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33 +) noexcept +{ + XMMATRIX M; +#if defined(_XM_NO_INTRINSICS_) + M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03; + M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13; + M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23; + M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33; +#else + M.r[0] = XMVectorSet(m00, m01, m02, m03); + M.r[1] = XMVectorSet(m10, m11, m12, m13); + M.r[2] = XMVectorSet(m20, m21, m22, m23); + M.r[3] = XMVectorSet(m30, m31, m32, m33); +#endif + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixTranslation +( + float OffsetX, + float OffsetY, + float OffsetZ +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.m[0][0] = 1.0f; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = 1.0f; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = 1.0f; + M.m[2][3] = 0.0f; + + M.m[3][0] = OffsetX; + M.m[3][1] = OffsetY; + M.m[3][2] = OffsetZ; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX M; + M.r[0] = g_XMIdentityR0.v; + M.r[1] = g_XMIdentityR1.v; + M.r[2] = g_XMIdentityR2.v; + M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f); + return M; +#endif +} + + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector(FXMVECTOR Offset) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.m[0][0] = 1.0f; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = 1.0f; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = 1.0f; + M.m[2][3] = 0.0f; + + M.m[3][0] = Offset.vector4_f32[0]; + M.m[3][1] = Offset.vector4_f32[1]; + M.m[3][2] = Offset.vector4_f32[2]; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX M; + M.r[0] = g_XMIdentityR0.v; + M.r[1] = g_XMIdentityR1.v; + M.r[2] = g_XMIdentityR2.v; + M.r[3] = XMVectorSelect(g_XMIdentityR3.v, Offset, g_XMSelect1110.v); + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixScaling +( + float ScaleX, + float ScaleY, + float ScaleZ +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.m[0][0] = ScaleX; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = ScaleY; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = ScaleZ; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = 0.0f; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + const XMVECTOR Zero = vdupq_n_f32(0); + XMMATRIX M; + M.r[0] = vsetq_lane_f32(ScaleX, Zero, 0); + M.r[1] = vsetq_lane_f32(ScaleY, Zero, 1); + M.r[2] = vsetq_lane_f32(ScaleZ, Zero, 2); + M.r[3] = g_XMIdentityR3.v; + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + M.r[0] = _mm_set_ps(0, 0, 0, ScaleX); + M.r[1] = _mm_set_ps(0, 0, ScaleY, 0); + M.r[2] = _mm_set_ps(0, ScaleZ, 0, 0); + M.r[3] = g_XMIdentityR3.v; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixScalingFromVector(FXMVECTOR Scale) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMMATRIX M; + M.m[0][0] = Scale.vector4_f32[0]; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = Scale.vector4_f32[1]; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = Scale.vector4_f32[2]; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = 0.0f; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX M; + M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskX)); + M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskY)); + M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskZ)); + M.r[3] = g_XMIdentityR3.v; + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + M.r[0] = _mm_and_ps(Scale, g_XMMaskX); + M.r[1] = _mm_and_ps(Scale, g_XMMaskY); + M.r[2] = _mm_and_ps(Scale, g_XMMaskZ); + M.r[3] = g_XMIdentityR3.v; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationX(float Angle) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + XMMATRIX M; + M.m[0][0] = 1.0f; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = fCosAngle; + M.m[1][2] = fSinAngle; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = -fSinAngle; + M.m[2][2] = fCosAngle; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = 0.0f; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + const float32x4_t Zero = vdupq_n_f32(0); + + float32x4_t T1 = vsetq_lane_f32(fCosAngle, Zero, 1); + T1 = vsetq_lane_f32(fSinAngle, T1, 2); + + float32x4_t T2 = vsetq_lane_f32(-fSinAngle, Zero, 1); + T2 = vsetq_lane_f32(fCosAngle, T2, 2); + + XMMATRIX M; + M.r[0] = g_XMIdentityR0.v; + M.r[1] = T1; + M.r[2] = T2; + M.r[3] = g_XMIdentityR3.v; + return M; +#elif defined(_XM_SSE_INTRINSICS_) + float SinAngle; + float CosAngle; + XMScalarSinCos(&SinAngle, &CosAngle, Angle); + + XMVECTOR vSin = _mm_set_ss(SinAngle); + XMVECTOR vCos = _mm_set_ss(CosAngle); + // x = 0,y = cos,z = sin, w = 0 + vCos = _mm_shuffle_ps(vCos, vSin, _MM_SHUFFLE(3, 0, 0, 3)); + XMMATRIX M; + M.r[0] = g_XMIdentityR0; + M.r[1] = vCos; + // x = 0,y = sin,z = cos, w = 0 + vCos = XM_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 1, 2, 0)); + // x = 0,y = -sin,z = cos, w = 0 + vCos = _mm_mul_ps(vCos, g_XMNegateY); + M.r[2] = vCos; + M.r[3] = g_XMIdentityR3; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationY(float Angle) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + XMMATRIX M; + M.m[0][0] = fCosAngle; + M.m[0][1] = 0.0f; + M.m[0][2] = -fSinAngle; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = 1.0f; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = fSinAngle; + M.m[2][1] = 0.0f; + M.m[2][2] = fCosAngle; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = 0.0f; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + const float32x4_t Zero = vdupq_n_f32(0); + + float32x4_t T0 = vsetq_lane_f32(fCosAngle, Zero, 0); + T0 = vsetq_lane_f32(-fSinAngle, T0, 2); + + float32x4_t T2 = vsetq_lane_f32(fSinAngle, Zero, 0); + T2 = vsetq_lane_f32(fCosAngle, T2, 2); + + XMMATRIX M; + M.r[0] = T0; + M.r[1] = g_XMIdentityR1.v; + M.r[2] = T2; + M.r[3] = g_XMIdentityR3.v; + return M; +#elif defined(_XM_SSE_INTRINSICS_) + float SinAngle; + float CosAngle; + XMScalarSinCos(&SinAngle, &CosAngle, Angle); + + XMVECTOR vSin = _mm_set_ss(SinAngle); + XMVECTOR vCos = _mm_set_ss(CosAngle); + // x = sin,y = 0,z = cos, w = 0 + vSin = _mm_shuffle_ps(vSin, vCos, _MM_SHUFFLE(3, 0, 3, 0)); + XMMATRIX M; + M.r[2] = vSin; + M.r[1] = g_XMIdentityR1; + // x = cos,y = 0,z = sin, w = 0 + vSin = XM_PERMUTE_PS(vSin, _MM_SHUFFLE(3, 0, 1, 2)); + // x = cos,y = 0,z = -sin, w = 0 + vSin = _mm_mul_ps(vSin, g_XMNegateZ); + M.r[0] = vSin; + M.r[3] = g_XMIdentityR3; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationZ(float Angle) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + XMMATRIX M; + M.m[0][0] = fCosAngle; + M.m[0][1] = fSinAngle; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = -fSinAngle; + M.m[1][1] = fCosAngle; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = 1.0f; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = 0.0f; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + const float32x4_t Zero = vdupq_n_f32(0); + + float32x4_t T0 = vsetq_lane_f32(fCosAngle, Zero, 0); + T0 = vsetq_lane_f32(fSinAngle, T0, 1); + + float32x4_t T1 = vsetq_lane_f32(-fSinAngle, Zero, 0); + T1 = vsetq_lane_f32(fCosAngle, T1, 1); + + XMMATRIX M; + M.r[0] = T0; + M.r[1] = T1; + M.r[2] = g_XMIdentityR2.v; + M.r[3] = g_XMIdentityR3.v; + return M; +#elif defined(_XM_SSE_INTRINSICS_) + float SinAngle; + float CosAngle; + XMScalarSinCos(&SinAngle, &CosAngle, Angle); + + XMVECTOR vSin = _mm_set_ss(SinAngle); + XMVECTOR vCos = _mm_set_ss(CosAngle); + // x = cos,y = sin,z = 0, w = 0 + vCos = _mm_unpacklo_ps(vCos, vSin); + XMMATRIX M; + M.r[0] = vCos; + // x = sin,y = cos,z = 0, w = 0 + vCos = XM_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 2, 0, 1)); + // x = cos,y = -sin,z = 0, w = 0 + vCos = _mm_mul_ps(vCos, g_XMNegateX); + M.r[1] = vCos; + M.r[2] = g_XMIdentityR2; + M.r[3] = g_XMIdentityR3; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw +( + float Pitch, + float Yaw, + float Roll +) noexcept +{ + XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); + return XMMatrixRotationRollPitchYawFromVector(Angles); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector +( + FXMVECTOR Angles // +) noexcept +{ + XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); + return XMMatrixRotationQuaternion(Q); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationNormal +( + FXMVECTOR NormalAxis, + float Angle +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f); + + XMVECTOR C2 = XMVectorSplatZ(A); + XMVECTOR C1 = XMVectorSplatY(A); + XMVECTOR C0 = XMVectorSplatX(A); + + XMVECTOR N0 = XMVectorSwizzle(NormalAxis); + XMVECTOR N1 = XMVectorSwizzle(NormalAxis); + + XMVECTOR V0 = XMVectorMultiply(C2, N0); + V0 = XMVectorMultiply(V0, N1); + + XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis); + R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1); + + XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0); + XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0); + + V0 = XMVectorSelect(A, R0, g_XMSelect1110.v); + XMVECTOR V1 = XMVectorPermute(R1, R2); + XMVECTOR V2 = XMVectorPermute(R1, R2); + + XMMATRIX M; + M.r[0] = XMVectorPermute(V0, V1); + M.r[1] = XMVectorPermute(V0, V1); + M.r[2] = XMVectorPermute(V0, V2); + M.r[3] = g_XMIdentityR3.v; + return M; + +#elif defined(_XM_SSE_INTRINSICS_) + float fSinAngle; + float fCosAngle; + XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); + + XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle); + XMVECTOR C1 = _mm_set_ps1(fCosAngle); + XMVECTOR C0 = _mm_set_ps1(fSinAngle); + + XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis, _MM_SHUFFLE(3, 0, 2, 1)); + XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis, _MM_SHUFFLE(3, 1, 0, 2)); + + XMVECTOR V0 = _mm_mul_ps(C2, N0); + V0 = _mm_mul_ps(V0, N1); + + XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis); + R0 = _mm_mul_ps(R0, NormalAxis); + R0 = _mm_add_ps(R0, C1); + + XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis); + R1 = _mm_add_ps(R1, V0); + XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis); + R2 = _mm_sub_ps(V0, R2); + + V0 = _mm_and_ps(R0, g_XMMask3); + XMVECTOR V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 1, 2, 0)); + V1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 3, 2, 1)); + XMVECTOR V2 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(0, 0, 1, 1)); + V2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 0, 2, 0)); + + R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(1, 0, 3, 0)); + R2 = XM_PERMUTE_PS(R2, _MM_SHUFFLE(1, 3, 2, 0)); + + XMMATRIX M; + M.r[0] = R2; + + R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(3, 2, 3, 1)); + R2 = XM_PERMUTE_PS(R2, _MM_SHUFFLE(1, 3, 0, 2)); + M.r[1] = R2; + + V2 = _mm_shuffle_ps(V2, V0, _MM_SHUFFLE(3, 2, 1, 0)); + M.r[2] = V2; + M.r[3] = g_XMIdentityR3.v; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationAxis +( + FXMVECTOR Axis, + float Angle +) noexcept +{ + assert(!XMVector3Equal(Axis, XMVectorZero())); + assert(!XMVector3IsInfinite(Axis)); + + XMVECTOR Normal = XMVector3Normalize(Axis); + return XMMatrixRotationNormal(Normal, Angle); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion(FXMVECTOR Quaternion) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } }; + + XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion); + XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0); + + XMVECTOR V0 = XMVectorPermute(Q1, Constant1110.v); + XMVECTOR V1 = XMVectorPermute(Q1, Constant1110.v); + XMVECTOR R0 = XMVectorSubtract(Constant1110, V0); + R0 = XMVectorSubtract(R0, V1); + + V0 = XMVectorSwizzle(Quaternion); + V1 = XMVectorSwizzle(Q0); + V0 = XMVectorMultiply(V0, V1); + + V1 = XMVectorSplatW(Quaternion); + XMVECTOR V2 = XMVectorSwizzle(Q0); + V1 = XMVectorMultiply(V1, V2); + + XMVECTOR R1 = XMVectorAdd(V0, V1); + XMVECTOR R2 = XMVectorSubtract(V0, V1); + + V0 = XMVectorPermute(R1, R2); + V1 = XMVectorPermute(R1, R2); + + XMMATRIX M; + M.r[0] = XMVectorPermute(R0, V0); + M.r[1] = XMVectorPermute(R0, V0); + M.r[2] = XMVectorPermute(R0, V1); + M.r[3] = g_XMIdentityR3.v; + return M; + +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } }; + + XMVECTOR Q0 = _mm_add_ps(Quaternion, Quaternion); + XMVECTOR Q1 = _mm_mul_ps(Quaternion, Q0); + + XMVECTOR V0 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(3, 0, 0, 1)); + V0 = _mm_and_ps(V0, g_XMMask3); + XMVECTOR V1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(3, 1, 2, 2)); + V1 = _mm_and_ps(V1, g_XMMask3); + XMVECTOR R0 = _mm_sub_ps(Constant1110, V0); + R0 = _mm_sub_ps(R0, V1); + + V0 = XM_PERMUTE_PS(Quaternion, _MM_SHUFFLE(3, 1, 0, 0)); + V1 = XM_PERMUTE_PS(Q0, _MM_SHUFFLE(3, 2, 1, 2)); + V0 = _mm_mul_ps(V0, V1); + + V1 = XM_PERMUTE_PS(Quaternion, _MM_SHUFFLE(3, 3, 3, 3)); + XMVECTOR V2 = XM_PERMUTE_PS(Q0, _MM_SHUFFLE(3, 0, 2, 1)); + V1 = _mm_mul_ps(V1, V2); + + XMVECTOR R1 = _mm_add_ps(V0, V1); + XMVECTOR R2 = _mm_sub_ps(V0, V1); + + V0 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(1, 0, 2, 1)); + V0 = XM_PERMUTE_PS(V0, _MM_SHUFFLE(1, 3, 2, 0)); + V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 2, 0, 0)); + V1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 0, 2, 0)); + + Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(1, 0, 3, 0)); + Q1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(1, 3, 2, 0)); + + XMMATRIX M; + M.r[0] = Q1; + + Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(3, 2, 3, 1)); + Q1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(1, 3, 0, 2)); + M.r[1] = Q1; + + Q1 = _mm_shuffle_ps(V1, R0, _MM_SHUFFLE(3, 2, 1, 0)); + M.r[2] = Q1; + M.r[3] = g_XMIdentityR3; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixTransformation2D +( + FXMVECTOR ScalingOrigin, + float ScalingOrientation, + FXMVECTOR Scaling, + FXMVECTOR RotationOrigin, + float Rotation, + GXMVECTOR Translation +) noexcept +{ + // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation * + // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; + + XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v); + XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin); + + XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin); + XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation); + XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation); + XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v); + XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling); + XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v); + XMMATRIX MRotation = XMMatrixRotationZ(Rotation); + XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation, g_XMSelect1100.v); + + XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT); + M = XMMatrixMultiply(M, MScaling); + M = XMMatrixMultiply(M, MScalingOrientation); + M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin); + M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); + M = XMMatrixMultiply(M, MRotation); + M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); + M.r[3] = XMVectorAdd(M.r[3], VTranslation); + + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixTransformation +( + FXMVECTOR ScalingOrigin, + FXMVECTOR ScalingOrientationQuaternion, + FXMVECTOR Scaling, + GXMVECTOR RotationOrigin, + HXMVECTOR RotationQuaternion, + HXMVECTOR Translation +) noexcept +{ + // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation * + // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; + + XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v); + XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin); + + XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin); + XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion); + XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation); + XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling); + XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v); + XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion); + XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v); + + XMMATRIX M; + M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT); + M = XMMatrixMultiply(M, MScaling); + M = XMMatrixMultiply(M, MScalingOrientation); + M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin); + M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); + M = XMMatrixMultiply(M, MRotation); + M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); + M.r[3] = XMVectorAdd(M.r[3], VTranslation); + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation2D +( + FXMVECTOR Scaling, + FXMVECTOR RotationOrigin, + float Rotation, + FXMVECTOR Translation +) noexcept +{ + // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; + + XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v); + XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling); + XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v); + XMMATRIX MRotation = XMMatrixRotationZ(Rotation); + XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation, g_XMSelect1100.v); + + XMMATRIX M; + M = MScaling; + M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); + M = XMMatrixMultiply(M, MRotation); + M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); + M.r[3] = XMVectorAdd(M.r[3], VTranslation); + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation +( + FXMVECTOR Scaling, + FXMVECTOR RotationOrigin, + FXMVECTOR RotationQuaternion, + GXMVECTOR Translation +) noexcept +{ + // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; + + XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling); + XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v); + XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion); + XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v); + + XMMATRIX M; + M = MScaling; + M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); + M = XMMatrixMultiply(M, MRotation); + M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); + M.r[3] = XMVectorAdd(M.r[3], VTranslation); + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixReflect(FXMVECTOR ReflectionPlane) noexcept +{ + assert(!XMVector3Equal(ReflectionPlane, XMVectorZero())); + assert(!XMPlaneIsInfinite(ReflectionPlane)); + + static const XMVECTORF32 NegativeTwo = { { { -2.0f, -2.0f, -2.0f, 0.0f } } }; + + XMVECTOR P = XMPlaneNormalize(ReflectionPlane); + XMVECTOR S = XMVectorMultiply(P, NegativeTwo); + + XMVECTOR A = XMVectorSplatX(P); + XMVECTOR B = XMVectorSplatY(P); + XMVECTOR C = XMVectorSplatZ(P); + XMVECTOR D = XMVectorSplatW(P); + + XMMATRIX M; + M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v); + M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v); + M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v); + M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v); + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixShadow +( + FXMVECTOR ShadowPlane, + FXMVECTOR LightPosition +) noexcept +{ + static const XMVECTORU32 Select0001 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1 } } }; + + assert(!XMVector3Equal(ShadowPlane, XMVectorZero())); + assert(!XMPlaneIsInfinite(ShadowPlane)); + + XMVECTOR P = XMPlaneNormalize(ShadowPlane); + XMVECTOR Dot = XMPlaneDot(P, LightPosition); + P = XMVectorNegate(P); + XMVECTOR D = XMVectorSplatW(P); + XMVECTOR C = XMVectorSplatZ(P); + XMVECTOR B = XMVectorSplatY(P); + XMVECTOR A = XMVectorSplatX(P); + Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v); + + XMMATRIX M; + M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot); + Dot = XMVectorRotateLeft(Dot, 1); + M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot); + Dot = XMVectorRotateLeft(Dot, 1); + M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot); + Dot = XMVectorRotateLeft(Dot, 1); + M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot); + return M; +} + +//------------------------------------------------------------------------------ +// View and projection initialization operations +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixLookAtLH +( + FXMVECTOR EyePosition, + FXMVECTOR FocusPosition, + FXMVECTOR UpDirection +) noexcept +{ + XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition); + return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixLookAtRH +( + FXMVECTOR EyePosition, + FXMVECTOR FocusPosition, + FXMVECTOR UpDirection +) noexcept +{ + XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition); + return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixLookToLH +( + FXMVECTOR EyePosition, + FXMVECTOR EyeDirection, + FXMVECTOR UpDirection +) noexcept +{ + assert(!XMVector3Equal(EyeDirection, XMVectorZero())); + assert(!XMVector3IsInfinite(EyeDirection)); + assert(!XMVector3Equal(UpDirection, XMVectorZero())); + assert(!XMVector3IsInfinite(UpDirection)); + + XMVECTOR R2 = XMVector3Normalize(EyeDirection); + + XMVECTOR R0 = XMVector3Cross(UpDirection, R2); + R0 = XMVector3Normalize(R0); + + XMVECTOR R1 = XMVector3Cross(R2, R0); + + XMVECTOR NegEyePosition = XMVectorNegate(EyePosition); + + XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition); + XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition); + XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition); + + XMMATRIX M; + M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v); + M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v); + M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v); + M.r[3] = g_XMIdentityR3.v; + + M = XMMatrixTranspose(M); + + return M; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixLookToRH +( + FXMVECTOR EyePosition, + FXMVECTOR EyeDirection, + FXMVECTOR UpDirection +) noexcept +{ + XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection); + return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection); +} + +//------------------------------------------------------------------------------ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable:28931, "PREfast noise: Esp:1266") +#endif + +inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH +( + float ViewWidth, + float ViewHeight, + float NearZ, + float FarZ +) noexcept +{ + assert(NearZ > 0.f && FarZ > 0.f); + assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float TwoNearZ = NearZ + NearZ; + float fRange = FarZ / (FarZ - NearZ); + + XMMATRIX M; + M.m[0][0] = TwoNearZ / ViewWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = TwoNearZ / ViewHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = 1.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = -fRange * NearZ; + M.m[3][3] = 0.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float TwoNearZ = NearZ + NearZ; + float fRange = FarZ / (FarZ - NearZ); + const float32x4_t Zero = vdupq_n_f32(0); + XMMATRIX M; + M.r[0] = vsetq_lane_f32(TwoNearZ / ViewWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(TwoNearZ / ViewHeight, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, g_XMIdentityR3.v, 2); + M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float TwoNearZ = NearZ + NearZ; + float fRange = FarZ / (FarZ - NearZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + TwoNearZ / ViewWidth, + TwoNearZ / ViewHeight, + fRange, + -fRange * NearZ + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // TwoNearZ / ViewWidth,0,0,0 + M.r[0] = vTemp; + // 0,TwoNearZ / ViewHeight,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // x=fRange,y=-fRange * NearZ,0,1.0f + vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + // 0,0,fRange,1.0f + vTemp = _mm_setzero_ps(); + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + M.r[2] = vTemp; + // 0,0,-fRange * NearZ,0 + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + M.r[3] = vTemp; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH +( + float ViewWidth, + float ViewHeight, + float NearZ, + float FarZ +) noexcept +{ + assert(NearZ > 0.f && FarZ > 0.f); + assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float TwoNearZ = NearZ + NearZ; + float fRange = FarZ / (NearZ - FarZ); + + XMMATRIX M; + M.m[0][0] = TwoNearZ / ViewWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = TwoNearZ / ViewHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = -1.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = fRange * NearZ; + M.m[3][3] = 0.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float TwoNearZ = NearZ + NearZ; + float fRange = FarZ / (NearZ - FarZ); + const float32x4_t Zero = vdupq_n_f32(0); + + XMMATRIX M; + M.r[0] = vsetq_lane_f32(TwoNearZ / ViewWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(TwoNearZ / ViewHeight, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, g_XMNegIdentityR3.v, 2); + M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float TwoNearZ = NearZ + NearZ; + float fRange = FarZ / (NearZ - FarZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + TwoNearZ / ViewWidth, + TwoNearZ / ViewHeight, + fRange, + fRange * NearZ + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // TwoNearZ / ViewWidth,0,0,0 + M.r[0] = vTemp; + // 0,TwoNearZ / ViewHeight,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // x=fRange,y=-fRange * NearZ,0,-1.0f + vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + // 0,0,fRange,-1.0f + vTemp = _mm_setzero_ps(); + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + M.r[2] = vTemp; + // 0,0,-fRange * NearZ,0 + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + M.r[3] = vTemp; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH +( + float FovAngleY, + float AspectRatio, + float NearZ, + float FarZ +) noexcept +{ + assert(NearZ > 0.f && FarZ > 0.f); + assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); + assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float SinFov; + float CosFov; + XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + + float Height = CosFov / SinFov; + float Width = Height / AspectRatio; + float fRange = FarZ / (FarZ - NearZ); + + XMMATRIX M; + M.m[0][0] = Width; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = Height; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = 1.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = -fRange * NearZ; + M.m[3][3] = 0.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float SinFov; + float CosFov; + XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + + float fRange = FarZ / (FarZ - NearZ); + float Height = CosFov / SinFov; + float Width = Height / AspectRatio; + const float32x4_t Zero = vdupq_n_f32(0); + + XMMATRIX M; + M.r[0] = vsetq_lane_f32(Width, Zero, 0); + M.r[1] = vsetq_lane_f32(Height, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, g_XMIdentityR3.v, 2); + M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + float SinFov; + float CosFov; + XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + + float fRange = FarZ / (FarZ - NearZ); + // Note: This is recorded on the stack + float Height = CosFov / SinFov; + XMVECTOR rMem = { + Height / AspectRatio, + Height, + fRange, + -fRange * NearZ + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // CosFov / SinFov,0,0,0 + XMMATRIX M; + M.r[0] = vTemp; + // 0,Height / AspectRatio,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // x=fRange,y=-fRange * NearZ,0,1.0f + vTemp = _mm_setzero_ps(); + vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + // 0,0,fRange,1.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + M.r[2] = vTemp; + // 0,0,-fRange * NearZ,0.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + M.r[3] = vTemp; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH +( + float FovAngleY, + float AspectRatio, + float NearZ, + float FarZ +) noexcept +{ + assert(NearZ > 0.f && FarZ > 0.f); + assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); + assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float SinFov; + float CosFov; + XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + + float Height = CosFov / SinFov; + float Width = Height / AspectRatio; + float fRange = FarZ / (NearZ - FarZ); + + XMMATRIX M; + M.m[0][0] = Width; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = Height; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = -1.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = fRange * NearZ; + M.m[3][3] = 0.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float SinFov; + float CosFov; + XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + float fRange = FarZ / (NearZ - FarZ); + float Height = CosFov / SinFov; + float Width = Height / AspectRatio; + const float32x4_t Zero = vdupq_n_f32(0); + + XMMATRIX M; + M.r[0] = vsetq_lane_f32(Width, Zero, 0); + M.r[1] = vsetq_lane_f32(Height, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, g_XMNegIdentityR3.v, 2); + M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + float SinFov; + float CosFov; + XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); + float fRange = FarZ / (NearZ - FarZ); + // Note: This is recorded on the stack + float Height = CosFov / SinFov; + XMVECTOR rMem = { + Height / AspectRatio, + Height, + fRange, + fRange * NearZ + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // CosFov / SinFov,0,0,0 + XMMATRIX M; + M.r[0] = vTemp; + // 0,Height / AspectRatio,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // x=fRange,y=-fRange * NearZ,0,-1.0f + vTemp = _mm_setzero_ps(); + vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + // 0,0,fRange,-1.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + M.r[2] = vTemp; + // 0,0,fRange * NearZ,0.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + M.r[3] = vTemp; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH +( + float ViewLeft, + float ViewRight, + float ViewBottom, + float ViewTop, + float NearZ, + float FarZ +) noexcept +{ + assert(NearZ > 0.f && FarZ > 0.f); + assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); + assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float TwoNearZ = NearZ + NearZ; + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = FarZ / (FarZ - NearZ); + + XMMATRIX M; + M.m[0][0] = TwoNearZ * ReciprocalWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = TwoNearZ * ReciprocalHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth; + M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight; + M.m[2][2] = fRange; + M.m[2][3] = 1.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = -fRange * NearZ; + M.m[3][3] = 0.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float TwoNearZ = NearZ + NearZ; + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = FarZ / (FarZ - NearZ); + const float32x4_t Zero = vdupq_n_f32(0); + + XMMATRIX M; + M.r[0] = vsetq_lane_f32(TwoNearZ * ReciprocalWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(TwoNearZ * ReciprocalHeight, Zero, 1); + M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, + -(ViewTop + ViewBottom) * ReciprocalHeight, + fRange, + 1.0f); + M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float TwoNearZ = NearZ + NearZ; + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = FarZ / (FarZ - NearZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + TwoNearZ * ReciprocalWidth, + TwoNearZ * ReciprocalHeight, + -fRange * NearZ, + 0 + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // TwoNearZ*ReciprocalWidth,0,0,0 + M.r[0] = vTemp; + // 0,TwoNearZ*ReciprocalHeight,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // 0,0,fRange,1.0f + M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, + -(ViewTop + ViewBottom) * ReciprocalHeight, + fRange, + 1.0f); + // 0,0,-fRange * NearZ,0.0f + vValues = _mm_and_ps(vValues, g_XMMaskZ); + M.r[3] = vValues; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH +( + float ViewLeft, + float ViewRight, + float ViewBottom, + float ViewTop, + float NearZ, + float FarZ +) noexcept +{ + assert(NearZ > 0.f && FarZ > 0.f); + assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); + assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float TwoNearZ = NearZ + NearZ; + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = FarZ / (NearZ - FarZ); + + XMMATRIX M; + M.m[0][0] = TwoNearZ * ReciprocalWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = TwoNearZ * ReciprocalHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth; + M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight; + M.m[2][2] = fRange; + M.m[2][3] = -1.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = fRange * NearZ; + M.m[3][3] = 0.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float TwoNearZ = NearZ + NearZ; + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = FarZ / (NearZ - FarZ); + const float32x4_t Zero = vdupq_n_f32(0); + + XMMATRIX M; + M.r[0] = vsetq_lane_f32(TwoNearZ * ReciprocalWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(TwoNearZ * ReciprocalHeight, Zero, 1); + M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth, + (ViewTop + ViewBottom) * ReciprocalHeight, + fRange, + -1.0f); + M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float TwoNearZ = NearZ + NearZ; + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = FarZ / (NearZ - FarZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + TwoNearZ * ReciprocalWidth, + TwoNearZ * ReciprocalHeight, + fRange * NearZ, + 0 + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // TwoNearZ*ReciprocalWidth,0,0,0 + M.r[0] = vTemp; + // 0,TwoNearZ*ReciprocalHeight,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // 0,0,fRange,1.0f + M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth, + (ViewTop + ViewBottom) * ReciprocalHeight, + fRange, + -1.0f); + // 0,0,-fRange * NearZ,0.0f + vValues = _mm_and_ps(vValues, g_XMMaskZ); + M.r[3] = vValues; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixOrthographicLH +( + float ViewWidth, + float ViewHeight, + float NearZ, + float FarZ +) noexcept +{ + assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float fRange = 1.0f / (FarZ - NearZ); + + XMMATRIX M; + M.m[0][0] = 2.0f / ViewWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = 2.0f / ViewHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = -fRange * NearZ; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fRange = 1.0f / (FarZ - NearZ); + + const float32x4_t Zero = vdupq_n_f32(0); + XMMATRIX M; + M.r[0] = vsetq_lane_f32(2.0f / ViewWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(2.0f / ViewHeight, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, Zero, 2); + M.r[3] = vsetq_lane_f32(-fRange * NearZ, g_XMIdentityR3.v, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float fRange = 1.0f / (FarZ - NearZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + 2.0f / ViewWidth, + 2.0f / ViewHeight, + fRange, + -fRange * NearZ + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // 2.0f / ViewWidth,0,0,0 + M.r[0] = vTemp; + // 0,2.0f / ViewHeight,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // x=fRange,y=-fRange * NearZ,0,1.0f + vTemp = _mm_setzero_ps(); + vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + // 0,0,fRange,0.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 0, 0, 0)); + M.r[2] = vTemp; + // 0,0,-fRange * NearZ,1.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 1, 0, 0)); + M.r[3] = vTemp; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixOrthographicRH +( + float ViewWidth, + float ViewHeight, + float NearZ, + float FarZ +) noexcept +{ + assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float fRange = 1.0f / (NearZ - FarZ); + + XMMATRIX M; + M.m[0][0] = 2.0f / ViewWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = 2.0f / ViewHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = 0.0f; + + M.m[3][0] = 0.0f; + M.m[3][1] = 0.0f; + M.m[3][2] = fRange * NearZ; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float fRange = 1.0f / (NearZ - FarZ); + + const float32x4_t Zero = vdupq_n_f32(0); + XMMATRIX M; + M.r[0] = vsetq_lane_f32(2.0f / ViewWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(2.0f / ViewHeight, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, Zero, 2); + M.r[3] = vsetq_lane_f32(fRange * NearZ, g_XMIdentityR3.v, 2); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float fRange = 1.0f / (NearZ - FarZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + 2.0f / ViewWidth, + 2.0f / ViewHeight, + fRange, + fRange * NearZ + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // 2.0f / ViewWidth,0,0,0 + M.r[0] = vTemp; + // 0,2.0f / ViewHeight,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + M.r[1] = vTemp; + // x=fRange,y=fRange * NearZ,0,1.0f + vTemp = _mm_setzero_ps(); + vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + // 0,0,fRange,0.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 0, 0, 0)); + M.r[2] = vTemp; + // 0,0,fRange * NearZ,1.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 1, 0, 0)); + M.r[3] = vTemp; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH +( + float ViewLeft, + float ViewRight, + float ViewBottom, + float ViewTop, + float NearZ, + float FarZ +) noexcept +{ + assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); + assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = 1.0f / (FarZ - NearZ); + + XMMATRIX M; + M.m[0][0] = ReciprocalWidth + ReciprocalWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = ReciprocalHeight + ReciprocalHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = 0.0f; + + M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth; + M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight; + M.m[3][2] = -fRange * NearZ; + M.m[3][3] = 1.0f; + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = 1.0f / (FarZ - NearZ); + const float32x4_t Zero = vdupq_n_f32(0); + XMMATRIX M; + M.r[0] = vsetq_lane_f32(ReciprocalWidth + ReciprocalWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(ReciprocalHeight + ReciprocalHeight, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, Zero, 2); + M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, + -(ViewTop + ViewBottom) * ReciprocalHeight, + -fRange * NearZ, + 1.0f); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = 1.0f / (FarZ - NearZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + fReciprocalWidth, + fReciprocalHeight, + fRange, + 1.0f + }; + XMVECTOR rMem2 = { + -(ViewLeft + ViewRight), + -(ViewTop + ViewBottom), + -NearZ, + 1.0f + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // fReciprocalWidth*2,0,0,0 + vTemp = _mm_add_ss(vTemp, vTemp); + M.r[0] = vTemp; + // 0,fReciprocalHeight*2,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + vTemp = _mm_add_ps(vTemp, vTemp); + M.r[1] = vTemp; + // 0,0,fRange,0.0f + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskZ); + M.r[2] = vTemp; + // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f + vValues = _mm_mul_ps(vValues, rMem2); + M.r[3] = vValues; + return M; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH +( + float ViewLeft, + float ViewRight, + float ViewBottom, + float ViewTop, + float NearZ, + float FarZ +) noexcept +{ + assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); + assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + +#if defined(_XM_NO_INTRINSICS_) + + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = 1.0f / (NearZ - FarZ); + + XMMATRIX M; + M.m[0][0] = ReciprocalWidth + ReciprocalWidth; + M.m[0][1] = 0.0f; + M.m[0][2] = 0.0f; + M.m[0][3] = 0.0f; + + M.m[1][0] = 0.0f; + M.m[1][1] = ReciprocalHeight + ReciprocalHeight; + M.m[1][2] = 0.0f; + M.m[1][3] = 0.0f; + + M.m[2][0] = 0.0f; + M.m[2][1] = 0.0f; + M.m[2][2] = fRange; + M.m[2][3] = 0.0f; + + M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, + -(ViewTop + ViewBottom) * ReciprocalHeight, + fRange * NearZ, + 1.0f); + return M; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = 1.0f / (NearZ - FarZ); + const float32x4_t Zero = vdupq_n_f32(0); + XMMATRIX M; + M.r[0] = vsetq_lane_f32(ReciprocalWidth + ReciprocalWidth, Zero, 0); + M.r[1] = vsetq_lane_f32(ReciprocalHeight + ReciprocalHeight, Zero, 1); + M.r[2] = vsetq_lane_f32(fRange, Zero, 2); + M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, + -(ViewTop + ViewBottom) * ReciprocalHeight, + fRange * NearZ, + 1.0f); + return M; +#elif defined(_XM_SSE_INTRINSICS_) + XMMATRIX M; + float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft); + float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom); + float fRange = 1.0f / (NearZ - FarZ); + // Note: This is recorded on the stack + XMVECTOR rMem = { + fReciprocalWidth, + fReciprocalHeight, + fRange, + 1.0f + }; + XMVECTOR rMem2 = { + -(ViewLeft + ViewRight), + -(ViewTop + ViewBottom), + NearZ, + 1.0f + }; + // Copy from memory to SSE register + XMVECTOR vValues = rMem; + XMVECTOR vTemp = _mm_setzero_ps(); + // Copy x only + vTemp = _mm_move_ss(vTemp, vValues); + // fReciprocalWidth*2,0,0,0 + vTemp = _mm_add_ss(vTemp, vTemp); + M.r[0] = vTemp; + // 0,fReciprocalHeight*2,0,0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskY); + vTemp = _mm_add_ps(vTemp, vTemp); + M.r[1] = vTemp; + // 0,0,fRange,0.0f + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, g_XMMaskZ); + M.r[2] = vTemp; + // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f + vValues = _mm_mul_ps(vValues, rMem2); + M.r[3] = vValues; + return M; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +/**************************************************************************** + * + * XMMATRIX operators and methods + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMMATRIX::XMMATRIX +( + float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33 +) noexcept +{ + r[0] = XMVectorSet(m00, m01, m02, m03); + r[1] = XMVectorSet(m10, m11, m12, m13); + r[2] = XMVectorSet(m20, m21, m22, m23); + r[3] = XMVectorSet(m30, m31, m32, m33); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMMATRIX::XMMATRIX(const float* pArray) noexcept +{ + assert(pArray != nullptr); + r[0] = XMLoadFloat4(reinterpret_cast(pArray)); + r[1] = XMLoadFloat4(reinterpret_cast(pArray + 4)); + r[2] = XMLoadFloat4(reinterpret_cast(pArray + 8)); + r[3] = XMLoadFloat4(reinterpret_cast(pArray + 12)); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XMMATRIX::operator- () const noexcept +{ + XMMATRIX R; + R.r[0] = XMVectorNegate(r[0]); + R.r[1] = XMVectorNegate(r[1]); + R.r[2] = XMVectorNegate(r[2]); + R.r[3] = XMVectorNegate(r[3]); + return R; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX& XM_CALLCONV XMMATRIX::operator+= (FXMMATRIX M) noexcept +{ + r[0] = XMVectorAdd(r[0], M.r[0]); + r[1] = XMVectorAdd(r[1], M.r[1]); + r[2] = XMVectorAdd(r[2], M.r[2]); + r[3] = XMVectorAdd(r[3], M.r[3]); + return *this; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX& XM_CALLCONV XMMATRIX::operator-= (FXMMATRIX M) noexcept +{ + r[0] = XMVectorSubtract(r[0], M.r[0]); + r[1] = XMVectorSubtract(r[1], M.r[1]); + r[2] = XMVectorSubtract(r[2], M.r[2]); + r[3] = XMVectorSubtract(r[3], M.r[3]); + return *this; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX& XM_CALLCONV XMMATRIX::operator*=(FXMMATRIX M) noexcept +{ + *this = XMMatrixMultiply(*this, M); + return *this; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX& XMMATRIX::operator*= (float S) noexcept +{ + r[0] = XMVectorScale(r[0], S); + r[1] = XMVectorScale(r[1], S); + r[2] = XMVectorScale(r[2], S); + r[3] = XMVectorScale(r[3], S); + return *this; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX& XMMATRIX::operator/= (float S) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR vS = XMVectorReplicate(S); + r[0] = XMVectorDivide(r[0], vS); + r[1] = XMVectorDivide(r[1], vS); + r[2] = XMVectorDivide(r[2], vS); + r[3] = XMVectorDivide(r[3], vS); + return *this; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + float32x4_t vS = vdupq_n_f32(S); + r[0] = vdivq_f32(r[0], vS); + r[1] = vdivq_f32(r[1], vS); + r[2] = vdivq_f32(r[2], vS); + r[3] = vdivq_f32(r[3], vS); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x2_t vS = vdup_n_f32(S); + float32x2_t R0 = vrecpe_f32(vS); + float32x2_t S0 = vrecps_f32(R0, vS); + R0 = vmul_f32(S0, R0); + S0 = vrecps_f32(R0, vS); + R0 = vmul_f32(S0, R0); + float32x4_t Reciprocal = vcombine_f32(R0, R0); + r[0] = vmulq_f32(r[0], Reciprocal); + r[1] = vmulq_f32(r[1], Reciprocal); + r[2] = vmulq_f32(r[2], Reciprocal); + r[3] = vmulq_f32(r[3], Reciprocal); +#endif + return *this; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 vS = _mm_set_ps1(S); + r[0] = _mm_div_ps(r[0], vS); + r[1] = _mm_div_ps(r[1], vS); + r[2] = _mm_div_ps(r[2], vS); + r[3] = _mm_div_ps(r[3], vS); + return *this; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMATRIX::operator+ (FXMMATRIX M) const noexcept +{ + XMMATRIX R; + R.r[0] = XMVectorAdd(r[0], M.r[0]); + R.r[1] = XMVectorAdd(r[1], M.r[1]); + R.r[2] = XMVectorAdd(r[2], M.r[2]); + R.r[3] = XMVectorAdd(r[3], M.r[3]); + return R; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMATRIX::operator- (FXMMATRIX M) const noexcept +{ + XMMATRIX R; + R.r[0] = XMVectorSubtract(r[0], M.r[0]); + R.r[1] = XMVectorSubtract(r[1], M.r[1]); + R.r[2] = XMVectorSubtract(r[2], M.r[2]); + R.r[3] = XMVectorSubtract(r[3], M.r[3]); + return R; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV XMMATRIX::operator*(FXMMATRIX M) const noexcept +{ + return XMMatrixMultiply(*this, M); +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XMMATRIX::operator* (float S) const noexcept +{ + XMMATRIX R; + R.r[0] = XMVectorScale(r[0], S); + R.r[1] = XMVectorScale(r[1], S); + R.r[2] = XMVectorScale(r[2], S); + R.r[3] = XMVectorScale(r[3], S); + return R; +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XMMATRIX::operator/ (float S) const noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR vS = XMVectorReplicate(S); + XMMATRIX R; + R.r[0] = XMVectorDivide(r[0], vS); + R.r[1] = XMVectorDivide(r[1], vS); + R.r[2] = XMVectorDivide(r[2], vS); + R.r[3] = XMVectorDivide(r[3], vS); + return R; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + float32x4_t vS = vdupq_n_f32(S); + XMMATRIX R; + R.r[0] = vdivq_f32(r[0], vS); + R.r[1] = vdivq_f32(r[1], vS); + R.r[2] = vdivq_f32(r[2], vS); + R.r[3] = vdivq_f32(r[3], vS); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x2_t vS = vdup_n_f32(S); + float32x2_t R0 = vrecpe_f32(vS); + float32x2_t S0 = vrecps_f32(R0, vS); + R0 = vmul_f32(S0, R0); + S0 = vrecps_f32(R0, vS); + R0 = vmul_f32(S0, R0); + float32x4_t Reciprocal = vcombine_f32(R0, R0); + XMMATRIX R; + R.r[0] = vmulq_f32(r[0], Reciprocal); + R.r[1] = vmulq_f32(r[1], Reciprocal); + R.r[2] = vmulq_f32(r[2], Reciprocal); + R.r[3] = vmulq_f32(r[3], Reciprocal); +#endif + return R; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 vS = _mm_set_ps1(S); + XMMATRIX R; + R.r[0] = _mm_div_ps(r[0], vS); + R.r[1] = _mm_div_ps(r[1], vS); + R.r[2] = _mm_div_ps(r[2], vS); + R.r[3] = _mm_div_ps(r[3], vS); + return R; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMMATRIX XM_CALLCONV operator* +( + float S, + FXMMATRIX M +) noexcept +{ + XMMATRIX R; + R.r[0] = XMVectorScale(M.r[0], S); + R.r[1] = XMVectorScale(M.r[1], S); + R.r[2] = XMVectorScale(M.r[2], S); + R.r[3] = XMVectorScale(M.r[3], S); + return R; +} + +/**************************************************************************** + * + * XMFLOAT3X3 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT3X3::XMFLOAT3X3(const float* pArray) noexcept +{ + assert(pArray != nullptr); + for (size_t Row = 0; Row < 3; Row++) + { + for (size_t Column = 0; Column < 3; Column++) + { + m[Row][Column] = pArray[Row * 3 + Column]; + } + } +} + +/**************************************************************************** + * + * XMFLOAT4X3 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT4X3::XMFLOAT4X3(const float* pArray) noexcept +{ + assert(pArray != nullptr); + + m[0][0] = pArray[0]; + m[0][1] = pArray[1]; + m[0][2] = pArray[2]; + + m[1][0] = pArray[3]; + m[1][1] = pArray[4]; + m[1][2] = pArray[5]; + + m[2][0] = pArray[6]; + m[2][1] = pArray[7]; + m[2][2] = pArray[8]; + + m[3][0] = pArray[9]; + m[3][1] = pArray[10]; + m[3][2] = pArray[11]; +} + +/**************************************************************************** +* +* XMFLOAT3X4 operators +* +****************************************************************************/ + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT3X4::XMFLOAT3X4(const float* pArray) noexcept +{ + assert(pArray != nullptr); + + m[0][0] = pArray[0]; + m[0][1] = pArray[1]; + m[0][2] = pArray[2]; + m[0][3] = pArray[3]; + + m[1][0] = pArray[4]; + m[1][1] = pArray[5]; + m[1][2] = pArray[6]; + m[1][3] = pArray[7]; + + m[2][0] = pArray[8]; + m[2][1] = pArray[9]; + m[2][2] = pArray[10]; + m[2][3] = pArray[11]; +} + +/**************************************************************************** + * + * XMFLOAT4X4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT4X4::XMFLOAT4X4(const float* pArray) noexcept +{ + assert(pArray != nullptr); + + m[0][0] = pArray[0]; + m[0][1] = pArray[1]; + m[0][2] = pArray[2]; + m[0][3] = pArray[3]; + + m[1][0] = pArray[4]; + m[1][1] = pArray[5]; + m[1][2] = pArray[6]; + m[1][3] = pArray[7]; + + m[2][0] = pArray[8]; + m[2][1] = pArray[9]; + m[2][2] = pArray[10]; + m[2][3] = pArray[11]; + + m[3][0] = pArray[12]; + m[3][1] = pArray[13]; + m[3][2] = pArray[14]; + m[3][3] = pArray[15]; +} + diff --git a/Sdk/External/DirectXMath/Inc/DirectXMathMisc.inl b/Sdk/External/DirectXMath/Inc/DirectXMathMisc.inl new file mode 100644 index 0000000..4cfd042 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXMathMisc.inl @@ -0,0 +1,2452 @@ +//------------------------------------------------------------------------------------- +// DirectXMathMisc.inl -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +/**************************************************************************** + * + * Quaternion + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMQuaternionEqual +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) noexcept +{ + return XMVector4Equal(Q1, Q2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMQuaternionNotEqual +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) noexcept +{ + return XMVector4NotEqual(Q1, Q2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMQuaternionIsNaN(FXMVECTOR Q) noexcept +{ + return XMVector4IsNaN(Q); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMQuaternionIsInfinite(FXMVECTOR Q) noexcept +{ + return XMVector4IsInfinite(Q); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMQuaternionIsIdentity(FXMVECTOR Q) noexcept +{ + return XMVector4Equal(Q, g_XMIdentityR3.v); +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionDot +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) noexcept +{ + return XMVector4Dot(Q1, Q2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionMultiply +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) noexcept +{ + // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2) + + // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y), + // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x), + // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w), + // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ] + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]), + (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]), + (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]), + (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } }; + static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } }; + static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } }; + + float32x2_t Q2L = vget_low_f32(Q2); + float32x2_t Q2H = vget_high_f32(Q2); + + float32x4_t Q2X = vdupq_lane_f32(Q2L, 0); + float32x4_t Q2Y = vdupq_lane_f32(Q2L, 1); + float32x4_t Q2Z = vdupq_lane_f32(Q2H, 0); + XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1); + + // Mul by Q1WZYX + float32x4_t vTemp = vrev64q_f32(Q1); + vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); + Q2X = vmulq_f32(Q2X, vTemp); + vResult = vmlaq_f32(vResult, Q2X, ControlWZYX); + + // Mul by Q1ZWXY + vTemp = vreinterpretq_f32_u32(vrev64q_u32(vreinterpretq_u32_f32(vTemp))); + Q2Y = vmulq_f32(Q2Y, vTemp); + vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY); + + // Mul by Q1YXWZ + vTemp = vreinterpretq_f32_u32(vrev64q_u32(vreinterpretq_u32_f32(vTemp))); + vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); + Q2Z = vmulq_f32(Q2Z, vTemp); + vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } }; + static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } }; + static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } }; + // Copy to SSE registers and use as few as possible for x86 + XMVECTOR Q2X = Q2; + XMVECTOR Q2Y = Q2; + XMVECTOR Q2Z = Q2; + XMVECTOR vResult = Q2; + // Splat with one instruction + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 3, 3, 3)); + Q2X = XM_PERMUTE_PS(Q2X, _MM_SHUFFLE(0, 0, 0, 0)); + Q2Y = XM_PERMUTE_PS(Q2Y, _MM_SHUFFLE(1, 1, 1, 1)); + Q2Z = XM_PERMUTE_PS(Q2Z, _MM_SHUFFLE(2, 2, 2, 2)); + // Retire Q1 and perform Q1*Q2W + vResult = _mm_mul_ps(vResult, Q1); + XMVECTOR Q1Shuffle = Q1; + // Shuffle the copies of Q1 + Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); + // Mul by Q1WZYX + Q2X = _mm_mul_ps(Q2X, Q1Shuffle); + Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1)); + // Flip the signs on y and z + vResult = XM_FMADD_PS(Q2X, ControlWZYX, vResult); + // Mul by Q1ZWXY + Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle); + Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); + // Flip the signs on z and w + Q2Y = _mm_mul_ps(Q2Y, ControlZWXY); + // Mul by Q1YXWZ + Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle); + // Flip the signs on x and w + Q2Y = XM_FMADD_PS(Q2Z, ControlYXWZ, Q2Y); + vResult = _mm_add_ps(vResult, Q2Y); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq(FXMVECTOR Q) noexcept +{ + return XMVector4LengthSq(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength(FXMVECTOR Q) noexcept +{ + return XMVector4ReciprocalLength(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionLength(FXMVECTOR Q) noexcept +{ + return XMVector4Length(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst(FXMVECTOR Q) noexcept +{ + return XMVector4NormalizeEst(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionNormalize(FXMVECTOR Q) noexcept +{ + return XMVector4Normalize(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionConjugate(FXMVECTOR Q) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + -Q.vector4_f32[0], + -Q.vector4_f32[1], + -Q.vector4_f32[2], + Q.vector4_f32[3] + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } }; + return vmulq_f32(Q, NegativeOne3.v); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } }; + return _mm_mul_ps(Q, NegativeOne3); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionInverse(FXMVECTOR Q) noexcept +{ + const XMVECTOR Zero = XMVectorZero(); + + XMVECTOR L = XMVector4LengthSq(Q); + XMVECTOR Conjugate = XMQuaternionConjugate(Q); + + XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); + + XMVECTOR Result = XMVectorDivide(Conjugate, L); + + Result = XMVectorSelect(Result, Zero, Control); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionLn(FXMVECTOR Q) noexcept +{ + static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } }; + + XMVECTOR QW = XMVectorSplatW(Q); + XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); + + XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v); + + XMVECTOR Theta = XMVectorACos(QW); + XMVECTOR SinTheta = XMVectorSin(Theta); + + XMVECTOR S = XMVectorDivide(Theta, SinTheta); + + XMVECTOR Result = XMVectorMultiply(Q0, S); + Result = XMVectorSelect(Q0, Result, ControlW); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionExp(FXMVECTOR Q) noexcept +{ + XMVECTOR Theta = XMVector3Length(Q); + + XMVECTOR SinTheta, CosTheta; + XMVectorSinCos(&SinTheta, &CosTheta, Theta); + + XMVECTOR S = XMVectorDivide(SinTheta, Theta); + + XMVECTOR Result = XMVectorMultiply(Q, S); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); + Result = XMVectorSelect(Result, Q, Control); + + Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionSlerp +( + FXMVECTOR Q0, + FXMVECTOR Q1, + float t +) noexcept +{ + XMVECTOR T = XMVectorReplicate(t); + return XMQuaternionSlerpV(Q0, Q1, T); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR T +) noexcept +{ + assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); + + // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) + +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } }; + + XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorLess(CosOmega, Zero); + XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); + + CosOmega = XMVectorMultiply(CosOmega, Sign); + + Control = XMVectorLess(CosOmega, OneMinusEpsilon); + + XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); + SinOmega = XMVectorSqrt(SinOmega); + + XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); + + XMVECTOR SignMask = XMVectorSplatSignMask(); + XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2); + SignMask = XMVectorShiftLeft(SignMask, Zero, 3); + V01 = XMVectorXorInt(V01, SignMask); + V01 = XMVectorAdd(g_XMIdentityR0.v, V01); + + XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega); + + XMVECTOR S0 = XMVectorMultiply(V01, Omega); + S0 = XMVectorSin(S0); + S0 = XMVectorMultiply(S0, InvSinOmega); + + S0 = XMVectorSelect(V01, S0, Control); + + XMVECTOR S1 = XMVectorSplatY(S0); + S0 = XMVectorSplatX(S0); + + S1 = XMVectorMultiply(S1, Sign); + + XMVECTOR Result = XMVectorMultiply(Q0, S0); + Result = XMVectorMultiplyAdd(Q1, S1, Result); + + return Result; + +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } }; + static const XMVECTORU32 SignMask2 = { { { 0x80000000, 0x00000000, 0x00000000, 0x00000000 } } }; + + XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorLess(CosOmega, Zero); + XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); + + CosOmega = _mm_mul_ps(CosOmega, Sign); + + Control = XMVectorLess(CosOmega, OneMinusEpsilon); + + XMVECTOR SinOmega = _mm_mul_ps(CosOmega, CosOmega); + SinOmega = _mm_sub_ps(g_XMOne, SinOmega); + SinOmega = _mm_sqrt_ps(SinOmega); + + XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); + + XMVECTOR V01 = XM_PERMUTE_PS(T, _MM_SHUFFLE(2, 3, 0, 1)); + V01 = _mm_and_ps(V01, g_XMMaskXY); + V01 = _mm_xor_ps(V01, SignMask2); + V01 = _mm_add_ps(g_XMIdentityR0, V01); + + XMVECTOR S0 = _mm_mul_ps(V01, Omega); + S0 = XMVectorSin(S0); + S0 = _mm_div_ps(S0, SinOmega); + + S0 = XMVectorSelect(V01, S0, Control); + + XMVECTOR S1 = XMVectorSplatY(S0); + S0 = XMVectorSplatX(S0); + + S1 = _mm_mul_ps(S1, Sign); + XMVECTOR Result = _mm_mul_ps(Q0, S0); + S1 = _mm_mul_ps(S1, Q1); + Result = _mm_add_ps(Result, S1); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionSquad +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR Q3, + float t +) noexcept +{ + XMVECTOR T = XMVectorReplicate(t); + return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionSquadV +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR Q3, + HXMVECTOR T +) noexcept +{ + assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); + + XMVECTOR TP = T; + const XMVECTOR Two = XMVectorSplatConstant(2, 0); + + XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T); + XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T); + + TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); + TP = XMVectorMultiply(TP, Two); + + XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP); + + return Result; +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMQuaternionSquadSetup +( + XMVECTOR* pA, + XMVECTOR* pB, + XMVECTOR* pC, + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR Q3 +) noexcept +{ + assert(pA); + assert(pB); + assert(pC); + + XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); + XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); + XMVECTOR SQ2 = XMVectorNegate(Q2); + + XMVECTOR Control1 = XMVectorLess(LS12, LD12); + SQ2 = XMVectorSelect(Q2, SQ2, Control1); + + XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); + XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); + XMVECTOR SQ0 = XMVectorNegate(Q0); + + XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); + XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); + XMVECTOR SQ3 = XMVectorNegate(Q3); + + XMVECTOR Control0 = XMVectorLess(LS01, LD01); + XMVECTOR Control2 = XMVectorLess(LS23, LD23); + + SQ0 = XMVectorSelect(Q0, SQ0, Control0); + SQ3 = XMVectorSelect(Q3, SQ3, Control2); + + XMVECTOR InvQ1 = XMQuaternionInverse(Q1); + XMVECTOR InvQ2 = XMQuaternionInverse(SQ2); + + XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); + XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); + XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); + XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); + + const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2); + + XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); + XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); + ExpQ02 = XMQuaternionExp(ExpQ02); + ExpQ13 = XMQuaternionExp(ExpQ13); + + *pA = XMQuaternionMultiply(Q1, ExpQ02); + *pB = XMQuaternionMultiply(SQ2, ExpQ13); + *pC = SQ2; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + float f, + float g +) noexcept +{ + float s = f + g; + + XMVECTOR Result; + if ((s < 0.00001f) && (s > -0.00001f)) + { + Result = Q0; + } + else + { + XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s); + XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s); + + Result = XMQuaternionSlerp(Q01, Q02, g / s); + } + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR F, + HXMVECTOR G +) noexcept +{ + assert((XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F))); + assert((XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G))); + + const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16); + + XMVECTOR S = XMVectorAdd(F, G); + + XMVECTOR Result; + if (XMVector4InBounds(S, Epsilon)) + { + Result = Q0; + } + else + { + XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S); + XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S); + XMVECTOR GS = XMVectorReciprocal(S); + GS = XMVectorMultiply(G, GS); + + Result = XMQuaternionSlerpV(Q01, Q02, GS); + } + + return Result; +} + +//------------------------------------------------------------------------------ +// Transformation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionIdentity() noexcept +{ + return g_XMIdentityR3.v; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw +( + float Pitch, + float Yaw, + float Roll +) noexcept +{ + XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); + XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); + return Q; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector +( + FXMVECTOR Angles // +) noexcept +{ + static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, -1.0f, 1.0f } } }; + + XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); + + XMVECTOR SinAngles, CosAngles; + XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); + + XMVECTOR P0 = XMVectorPermute(SinAngles, CosAngles); + XMVECTOR Y0 = XMVectorPermute(SinAngles, CosAngles); + XMVECTOR R0 = XMVectorPermute(SinAngles, CosAngles); + XMVECTOR P1 = XMVectorPermute(CosAngles, SinAngles); + XMVECTOR Y1 = XMVectorPermute(CosAngles, SinAngles); + XMVECTOR R1 = XMVectorPermute(CosAngles, SinAngles); + + XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); + XMVECTOR Q0 = XMVectorMultiply(P0, Y0); + Q1 = XMVectorMultiply(Q1, Y1); + Q0 = XMVectorMultiply(Q0, R0); + XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0); + + return Q; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal +( + FXMVECTOR NormalAxis, + float Angle +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); + + float SinV, CosV; + XMScalarSinCos(&SinV, &CosV, 0.5f * Angle); + + XMVECTOR Scale = XMVectorSet(SinV, SinV, SinV, CosV); + return XMVectorMultiply(N, Scale); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR N = _mm_and_ps(NormalAxis, g_XMMask3); + N = _mm_or_ps(N, g_XMIdentityR3); + XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); + XMVECTOR vSine; + XMVECTOR vCosine; + XMVectorSinCos(&vSine, &vCosine, Scale); + Scale = _mm_and_ps(vSine, g_XMMask3); + vCosine = _mm_and_ps(vCosine, g_XMMaskW); + Scale = _mm_or_ps(Scale, vCosine); + N = _mm_mul_ps(N, Scale); + return N; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis +( + FXMVECTOR Axis, + float Angle +) noexcept +{ + assert(!XMVector3Equal(Axis, XMVectorZero())); + assert(!XMVector3IsInfinite(Axis)); + + XMVECTOR Normal = XMVector3Normalize(Axis); + XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle); + return Q; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix(FXMMATRIX M) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 q; + float r22 = M.m[2][2]; + if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2 + { + float dif10 = M.m[1][1] - M.m[0][0]; + float omr22 = 1.f - r22; + if (dif10 <= 0.f) // x^2 >= y^2 + { + float fourXSqr = omr22 - dif10; + float inv4x = 0.5f / sqrtf(fourXSqr); + q.f[0] = fourXSqr * inv4x; + q.f[1] = (M.m[0][1] + M.m[1][0]) * inv4x; + q.f[2] = (M.m[0][2] + M.m[2][0]) * inv4x; + q.f[3] = (M.m[1][2] - M.m[2][1]) * inv4x; + } + else // y^2 >= x^2 + { + float fourYSqr = omr22 + dif10; + float inv4y = 0.5f / sqrtf(fourYSqr); + q.f[0] = (M.m[0][1] + M.m[1][0]) * inv4y; + q.f[1] = fourYSqr * inv4y; + q.f[2] = (M.m[1][2] + M.m[2][1]) * inv4y; + q.f[3] = (M.m[2][0] - M.m[0][2]) * inv4y; + } + } + else // z^2 + w^2 >= x^2 + y^2 + { + float sum10 = M.m[1][1] + M.m[0][0]; + float opr22 = 1.f + r22; + if (sum10 <= 0.f) // z^2 >= w^2 + { + float fourZSqr = opr22 - sum10; + float inv4z = 0.5f / sqrtf(fourZSqr); + q.f[0] = (M.m[0][2] + M.m[2][0]) * inv4z; + q.f[1] = (M.m[1][2] + M.m[2][1]) * inv4z; + q.f[2] = fourZSqr * inv4z; + q.f[3] = (M.m[0][1] - M.m[1][0]) * inv4z; + } + else // w^2 >= z^2 + { + float fourWSqr = opr22 + sum10; + float inv4w = 0.5f / sqrtf(fourWSqr); + q.f[0] = (M.m[1][2] - M.m[2][1]) * inv4w; + q.f[1] = (M.m[2][0] - M.m[0][2]) * inv4w; + q.f[2] = (M.m[0][1] - M.m[1][0]) * inv4w; + q.f[3] = fourWSqr * inv4w; + } + } + return q.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } }; + static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } }; + static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } }; + static const XMVECTORU32 Select0110 = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 } } }; + static const XMVECTORU32 Select0010 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } }; + + float32x4_t r0 = M.r[0]; + float32x4_t r1 = M.r[1]; + float32x4_t r2 = M.r[2]; + + float32x4_t r00 = vdupq_lane_f32(vget_low_f32(r0), 0); + float32x4_t r11 = vdupq_lane_f32(vget_low_f32(r1), 1); + float32x4_t r22 = vdupq_lane_f32(vget_high_f32(r2), 0); + + // x^2 >= y^2 equivalent to r11 - r00 <= 0 + float32x4_t r11mr00 = vsubq_f32(r11, r00); + uint32x4_t x2gey2 = vcleq_f32(r11mr00, g_XMZero); + + // z^2 >= w^2 equivalent to r11 + r00 <= 0 + float32x4_t r11pr00 = vaddq_f32(r11, r00); + uint32x4_t z2gew2 = vcleq_f32(r11pr00, g_XMZero); + + // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 + uint32x4_t x2py2gez2pw2 = vcleq_f32(r22, g_XMZero); + + // (4*x^2, 4*y^2, 4*z^2, 4*w^2) + float32x4_t t0 = vmulq_f32(XMPMMP, r00); + float32x4_t x2y2z2w2 = vmlaq_f32(t0, XMMPMP, r11); + x2y2z2w2 = vmlaq_f32(x2y2z2w2, XMMMPP, r22); + x2y2z2w2 = vaddq_f32(x2y2z2w2, g_XMOne); + + // (r01, r02, r12, r11) + t0 = vextq_f32(r0, r0, 1); + float32x4_t t1 = vextq_f32(r1, r1, 1); + t0 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_low_f32(t1))); + + // (r10, r20, r21, r10) + t1 = vextq_f32(r2, r2, 3); + float32x4_t r10 = vdupq_lane_f32(vget_low_f32(r1), 0); + t1 = vbslq_f32(Select0110, t1, r10); + + // (4*x*y, 4*x*z, 4*y*z, unused) + float32x4_t xyxzyz = vaddq_f32(t0, t1); + + // (r21, r20, r10, r10) + t0 = vcombine_f32(vrev64_f32(vget_low_f32(r2)), vget_low_f32(r10)); + + // (r12, r02, r01, r12) + float32x4_t t2 = vcombine_f32(vrev64_f32(vget_high_f32(r0)), vrev64_f32(vget_low_f32(r0))); + float32x4_t t3 = vdupq_lane_f32(vget_high_f32(r1), 0); + t1 = vbslq_f32(Select0110, t2, t3); + + // (4*x*w, 4*y*w, 4*z*w, unused) + float32x4_t xwywzw = vsubq_f32(t0, t1); + xwywzw = vmulq_f32(XMMPMP, xwywzw); + + // (4*x*x, 4*x*y, 4*x*z, 4*x*w) + t0 = vextq_f32(xyxzyz, xyxzyz, 3); + t1 = vbslq_f32(Select0110, t0, x2y2z2w2); + t2 = vdupq_lane_f32(vget_low_f32(xwywzw), 0); + float32x4_t tensor0 = vbslq_f32(g_XMSelect1110, t1, t2); + + // (4*y*x, 4*y*y, 4*y*z, 4*y*w) + t0 = vbslq_f32(g_XMSelect1011, xyxzyz, x2y2z2w2); + t1 = vdupq_lane_f32(vget_low_f32(xwywzw), 1); + float32x4_t tensor1 = vbslq_f32(g_XMSelect1110, t0, t1); + + // (4*z*x, 4*z*y, 4*z*z, 4*z*w) + t0 = vextq_f32(xyxzyz, xyxzyz, 1); + t1 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_high_f32(xwywzw))); + float32x4_t tensor2 = vbslq_f32(Select0010, x2y2z2w2, t1); + + // (4*w*x, 4*w*y, 4*w*z, 4*w*w) + float32x4_t tensor3 = vbslq_f32(g_XMSelect1110, xwywzw, x2y2z2w2); + + // Select the row of the tensor-product matrix that has the largest + // magnitude. + t0 = vbslq_f32(x2gey2, tensor0, tensor1); + t1 = vbslq_f32(z2gew2, tensor2, tensor3); + t2 = vbslq_f32(x2py2gez2pw2, t0, t1); + + // Normalize the row. No division by zero is possible because the + // quaternion is unit-length (and the row is a nonzero multiple of + // the quaternion). + t0 = XMVector4Length(t2); + return XMVectorDivide(t2, t0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } }; + static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } }; + static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } }; + + XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0) + XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0) + XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0) + + // (r00, r00, r00, r00) + XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0, 0, 0, 0)); + // (r11, r11, r11, r11) + XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1, 1, 1, 1)); + // (r22, r22, r22, r22) + XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2, 2, 2, 2)); + + // x^2 >= y^2 equivalent to r11 - r00 <= 0 + // (r11 - r00, r11 - r00, r11 - r00, r11 - r00) + XMVECTOR r11mr00 = _mm_sub_ps(r11, r00); + XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero); + + // z^2 >= w^2 equivalent to r11 + r00 <= 0 + // (r11 + r00, r11 + r00, r11 + r00, r11 + r00) + XMVECTOR r11pr00 = _mm_add_ps(r11, r00); + XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero); + + // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 + XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero); + + // (4*x^2, 4*y^2, 4*z^2, 4*w^2) + XMVECTOR t0 = XM_FMADD_PS(XMPMMP, r00, g_XMOne); + XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11); + XMVECTOR t2 = XM_FMADD_PS(XMMMPP, r22, t0); + XMVECTOR x2y2z2w2 = _mm_add_ps(t1, t2); + + // (r01, r02, r12, r11) + t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 2, 2, 1)); + // (r10, r10, r20, r21) + t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1, 0, 0, 0)); + // (r10, r20, r21, r10) + t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0)); + // (4*x*y, 4*x*z, 4*y*z, unused) + XMVECTOR xyxzyz = _mm_add_ps(t0, t1); + + // (r21, r20, r10, r10) + t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 1)); + // (r12, r12, r02, r01) + t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1, 2, 2, 2)); + // (r12, r02, r01, r12) + t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0)); + // (4*x*w, 4*y*w, 4*z*w, unused) + XMVECTOR xwywzw = _mm_sub_ps(t0, t1); + xwywzw = _mm_mul_ps(XMMPMP, xwywzw); + + // (4*x^2, 4*y^2, 4*x*y, unused) + t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0, 0, 1, 0)); + // (4*z^2, 4*w^2, 4*z*w, unused) + t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0, 2, 3, 2)); + // (4*x*z, 4*y*z, 4*x*w, 4*y*w) + t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1, 0, 2, 1)); + + // (4*x*x, 4*x*y, 4*x*z, 4*x*w) + XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2, 0, 2, 0)); + // (4*y*x, 4*y*y, 4*y*z, 4*y*w) + XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 1, 1, 2)); + // (4*z*x, 4*z*y, 4*z*z, 4*z*w) + XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2, 0, 1, 0)); + // (4*w*x, 4*w*y, 4*w*z, 4*w*w) + XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1, 2, 3, 2)); + + // Select the row of the tensor-product matrix that has the largest + // magnitude. + t0 = _mm_and_ps(x2gey2, tensor0); + t1 = _mm_andnot_ps(x2gey2, tensor1); + t0 = _mm_or_ps(t0, t1); + t1 = _mm_and_ps(z2gew2, tensor2); + t2 = _mm_andnot_ps(z2gew2, tensor3); + t1 = _mm_or_ps(t1, t2); + t0 = _mm_and_ps(x2py2gez2pw2, t0); + t1 = _mm_andnot_ps(x2py2gez2pw2, t1); + t2 = _mm_or_ps(t0, t1); + + // Normalize the row. No division by zero is possible because the + // quaternion is unit-length (and the row is a nonzero multiple of + // the quaternion). + t0 = XMVector4Length(t2); + return _mm_div_ps(t2, t0); +#endif +} + +//------------------------------------------------------------------------------ +// Conversion operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMQuaternionToAxisAngle +( + XMVECTOR* pAxis, + float* pAngle, + FXMVECTOR Q +) noexcept +{ + assert(pAxis); + assert(pAngle); + + *pAxis = Q; + + *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); +} + +/**************************************************************************** + * + * Plane + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMPlaneEqual +( + FXMVECTOR P1, + FXMVECTOR P2 +) noexcept +{ + return XMVector4Equal(P1, P2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMPlaneNearEqual +( + FXMVECTOR P1, + FXMVECTOR P2, + FXMVECTOR Epsilon +) noexcept +{ + XMVECTOR NP1 = XMPlaneNormalize(P1); + XMVECTOR NP2 = XMPlaneNormalize(P2); + return XMVector4NearEqual(NP1, NP2, Epsilon); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMPlaneNotEqual +( + FXMVECTOR P1, + FXMVECTOR P2 +) noexcept +{ + return XMVector4NotEqual(P1, P2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMPlaneIsNaN(FXMVECTOR P) noexcept +{ + return XMVector4IsNaN(P); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMPlaneIsInfinite(FXMVECTOR P) noexcept +{ + return XMVector4IsInfinite(P); +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneDot +( + FXMVECTOR P, + FXMVECTOR V +) noexcept +{ + return XMVector4Dot(P, V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneDotCoord +( + FXMVECTOR P, + FXMVECTOR V +) noexcept +{ + // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] + + XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); + XMVECTOR Result = XMVector4Dot(P, V3); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneDotNormal +( + FXMVECTOR P, + FXMVECTOR V +) noexcept +{ + return XMVector3Dot(P, V); +} + +//------------------------------------------------------------------------------ +// XMPlaneNormalizeEst uses a reciprocal estimate and +// returns QNaN on zero and infinite vectors. + +inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst(FXMVECTOR P) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR Result = XMVector3ReciprocalLengthEst(P); + return XMVectorMultiply(P, Result); + +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(P, P, 0x7f); + XMVECTOR vResult = _mm_rsqrt_ps(vTemp); + return _mm_mul_ps(vResult, P); +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product + XMVECTOR vDot = _mm_mul_ps(P, P); + // x=Dot.y, y=Dot.z + XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); + // Result.x = x+y + vDot = _mm_add_ss(vDot, vTemp); + // x=Dot.z + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // Result.x = (x+y)+z + vDot = _mm_add_ss(vDot, vTemp); + // Splat x + vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the reciprocal + vDot = _mm_rsqrt_ps(vDot); + // Get the reciprocal + vDot = _mm_mul_ps(vDot, P); + return vDot; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneNormalize(FXMVECTOR P) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float fLengthSq = sqrtf((P.vector4_f32[0] * P.vector4_f32[0]) + (P.vector4_f32[1] * P.vector4_f32[1]) + (P.vector4_f32[2] * P.vector4_f32[2])); + // Prevent divide by zero + if (fLengthSq > 0) + { + fLengthSq = 1.0f / fLengthSq; + } + XMVECTORF32 vResult = { { { + P.vector4_f32[0] * fLengthSq, + P.vector4_f32[1] * fLengthSq, + P.vector4_f32[2] * fLengthSq, + P.vector4_f32[3] * fLengthSq + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vLength = XMVector3ReciprocalLength(P); + return XMVectorMultiply(P, vLength); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vLengthSq = _mm_dp_ps(P, P, 0x7f); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(P, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vLengthSq); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y and z only + XMVECTOR vLengthSq = _mm_mul_ps(P, P); + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(P, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vLengthSq); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine +( + FXMVECTOR P, + FXMVECTOR LinePoint1, + FXMVECTOR LinePoint2 +) noexcept +{ + XMVECTOR V1 = XMVector3Dot(P, LinePoint1); + XMVECTOR V2 = XMVector3Dot(P, LinePoint2); + XMVECTOR D = XMVectorSubtract(V1, V2); + + XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1); + VT = XMVectorDivide(VT, D); + + XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1); + Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); + + return XMVectorSelect(Point, g_XMQNaN.v, Control); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMPlaneIntersectPlane +( + XMVECTOR* pLinePoint1, + XMVECTOR* pLinePoint2, + FXMVECTOR P1, + FXMVECTOR P2 +) noexcept +{ + assert(pLinePoint1); + assert(pLinePoint2); + + XMVECTOR V1 = XMVector3Cross(P2, P1); + + XMVECTOR LengthSq = XMVector3LengthSq(V1); + + XMVECTOR V2 = XMVector3Cross(P2, V1); + + XMVECTOR P1W = XMVectorSplatW(P1); + XMVECTOR Point = XMVectorMultiply(V2, P1W); + + XMVECTOR V3 = XMVector3Cross(V1, P1); + + XMVECTOR P2W = XMVectorSplatW(P2); + Point = XMVectorMultiplyAdd(V3, P2W, Point); + + XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq); + + XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1); + + XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); + *pLinePoint1 = XMVectorSelect(LinePoint1, g_XMQNaN.v, Control); + *pLinePoint2 = XMVectorSelect(LinePoint2, g_XMQNaN.v, Control); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneTransform +( + FXMVECTOR P, + FXMMATRIX M +) noexcept +{ + XMVECTOR W = XMVectorSplatW(P); + XMVECTOR Z = XMVectorSplatZ(P); + XMVECTOR Y = XMVectorSplatY(P); + XMVECTOR X = XMVectorSplatX(P); + + XMVECTOR Result = XMVectorMultiply(W, M.r[3]); + Result = XMVectorMultiplyAdd(Z, M.r[2], Result); + Result = XMVectorMultiplyAdd(Y, M.r[1], Result); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + return Result; +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream +( + XMFLOAT4* pOutputStream, + size_t OutputStride, + const XMFLOAT4* pInputStream, + size_t InputStride, + size_t PlaneCount, + FXMMATRIX M +) noexcept +{ + return XMVector4TransformStream(pOutputStream, + OutputStride, + pInputStream, + InputStride, + PlaneCount, + M); +} + +//------------------------------------------------------------------------------ +// Conversion operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal +( + FXMVECTOR Point, + FXMVECTOR Normal +) noexcept +{ + XMVECTOR W = XMVector3Dot(Point, Normal); + W = XMVectorNegate(W); + return XMVectorSelect(W, Normal, g_XMSelect1110.v); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMPlaneFromPoints +( + FXMVECTOR Point1, + FXMVECTOR Point2, + FXMVECTOR Point3 +) noexcept +{ + XMVECTOR V21 = XMVectorSubtract(Point1, Point2); + XMVECTOR V31 = XMVectorSubtract(Point1, Point3); + + XMVECTOR N = XMVector3Cross(V21, V31); + N = XMVector3Normalize(N); + + XMVECTOR D = XMPlaneDotNormal(N, Point1); + D = XMVectorNegate(D); + + XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v); + + return Result; +} + +/**************************************************************************** + * + * Color + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVector4Equal(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorNotEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVector4NotEqual(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorGreater +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVector4Greater(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorGreaterOrEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVector4GreaterOrEqual(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorLess +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVector4Less(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorLessOrEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVector4LessOrEqual(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorIsNaN(FXMVECTOR C) noexcept +{ + return XMVector4IsNaN(C); +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMColorIsInfinite(FXMVECTOR C) noexcept +{ + return XMVector4IsInfinite(C); +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorNegative(FXMVECTOR vColor) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + 1.0f - vColor.vector4_f32[0], + 1.0f - vColor.vector4_f32[1], + 1.0f - vColor.vector4_f32[2], + vColor.vector4_f32[3] + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vTemp = veorq_u32(vreinterpretq_u32_f32(vColor), g_XMNegate3); + return vaddq_f32(vreinterpretq_f32_u32(vTemp), g_XMOne3); +#elif defined(_XM_SSE_INTRINSICS_) + // Negate only x,y and z. + XMVECTOR vTemp = _mm_xor_ps(vColor, g_XMNegate3); + // Add 1,1,1,0 to -x,-y,-z,w + return _mm_add_ps(vTemp, g_XMOne3); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorModulate +( + FXMVECTOR C1, + FXMVECTOR C2 +) noexcept +{ + return XMVectorMultiply(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation +( + FXMVECTOR vColor, + float fSaturation +) noexcept +{ + // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; + // Result = (C - Luminance) * Saturation + Luminance; + + const XMVECTORF32 gvLuminance = { { { 0.2125f, 0.7154f, 0.0721f, 0.0f } } }; +#if defined(_XM_NO_INTRINSICS_) + float fLuminance = (vColor.vector4_f32[0] * gvLuminance.f[0]) + (vColor.vector4_f32[1] * gvLuminance.f[1]) + (vColor.vector4_f32[2] * gvLuminance.f[2]); + XMVECTOR vResult; + vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance) * fSaturation) + fLuminance; + vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance) * fSaturation) + fLuminance; + vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance) * fSaturation) + fLuminance; + vResult.vector4_f32[3] = vColor.vector4_f32[3]; + return vResult; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance); + XMVECTOR vResult = vsubq_f32(vColor, vLuminance); + vResult = vmlaq_n_f32(vLuminance, vResult, fSaturation); + return vbslq_f32(g_XMSelect1110, vResult, vColor); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance); + // Splat fSaturation + XMVECTOR vSaturation = _mm_set_ps1(fSaturation); + // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; + XMVECTOR vResult = _mm_sub_ps(vColor, vLuminance); + vResult = XM_FMADD_PS(vResult, vSaturation, vLuminance); + // Retain w from the source color + vLuminance = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w + vResult = _mm_shuffle_ps(vResult, vLuminance, _MM_SHUFFLE(3, 0, 1, 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorAdjustContrast +( + FXMVECTOR vColor, + float fContrast +) noexcept +{ + // Result = (vColor - 0.5f) * fContrast + 0.5f; + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + ((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f, + ((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f, + ((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f, + vColor.vector4_f32[3] // Leave W untouched + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v); + vResult = vmlaq_n_f32(g_XMOneHalf.v, vResult, fContrast); + return vbslq_f32(g_XMSelect1110, vResult, vColor); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale + XMVECTOR vResult = _mm_sub_ps(vColor, g_XMOneHalf); // Subtract 0.5f from the source (Saving source) + vResult = XM_FMADD_PS(vResult, vScale, g_XMOneHalf); +// Retain w from the source color + vScale = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w + vResult = _mm_shuffle_ps(vResult, vScale, _MM_SHUFFLE(3, 0, 1, 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb) noexcept +{ + XMVECTOR r = XMVectorSplatX(rgb); + XMVECTOR g = XMVectorSplatY(rgb); + XMVECTOR b = XMVectorSplatZ(rgb); + + XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b)); + XMVECTOR max = XMVectorMax(r, XMVectorMax(g, b)); + + XMVECTOR l = XMVectorMultiply(XMVectorAdd(min, max), g_XMOneHalf); + + XMVECTOR d = XMVectorSubtract(max, min); + + XMVECTOR la = XMVectorSelect(rgb, l, g_XMSelect1110); + + if (XMVector3Less(d, g_XMEpsilon)) + { + // Achromatic, assume H and S of 0 + return XMVectorSelect(la, g_XMZero, g_XMSelect1100); + } + else + { + XMVECTOR s, h; + + XMVECTOR d2 = XMVectorAdd(min, max); + + if (XMVector3Greater(l, g_XMOneHalf)) + { + // d / (2-max-min) + s = XMVectorDivide(d, XMVectorSubtract(g_XMTwo, d2)); + } + else + { + // d / (max+min) + s = XMVectorDivide(d, d2); + } + + if (XMVector3Equal(r, max)) + { + // Red is max + h = XMVectorDivide(XMVectorSubtract(g, b), d); + } + else if (XMVector3Equal(g, max)) + { + // Green is max + h = XMVectorDivide(XMVectorSubtract(b, r), d); + h = XMVectorAdd(h, g_XMTwo); + } + else + { + // Blue is max + h = XMVectorDivide(XMVectorSubtract(r, g), d); + h = XMVectorAdd(h, g_XMFour); + } + + h = XMVectorDivide(h, g_XMSix); + + if (XMVector3Less(h, g_XMZero)) + h = XMVectorAdd(h, g_XMOne); + + XMVECTOR lha = XMVectorSelect(la, h, g_XMSelect1100); + return XMVectorSelect(s, lha, g_XMSelect1011); + } +} + +//------------------------------------------------------------------------------ + +namespace Internal +{ + + inline XMVECTOR XM_CALLCONV XMColorHue2Clr(FXMVECTOR p, FXMVECTOR q, FXMVECTOR h) noexcept + { + static const XMVECTORF32 oneSixth = { { { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f } } }; + static const XMVECTORF32 twoThirds = { { { 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f } } }; + + XMVECTOR t = h; + + if (XMVector3Less(t, g_XMZero)) + t = XMVectorAdd(t, g_XMOne); + + if (XMVector3Greater(t, g_XMOne)) + t = XMVectorSubtract(t, g_XMOne); + + if (XMVector3Less(t, oneSixth)) + { + // p + (q - p) * 6 * t + XMVECTOR t1 = XMVectorSubtract(q, p); + XMVECTOR t2 = XMVectorMultiply(g_XMSix, t); + return XMVectorMultiplyAdd(t1, t2, p); + } + + if (XMVector3Less(t, g_XMOneHalf)) + return q; + + if (XMVector3Less(t, twoThirds)) + { + // p + (q - p) * 6 * (2/3 - t) + XMVECTOR t1 = XMVectorSubtract(q, p); + XMVECTOR t2 = XMVectorMultiply(g_XMSix, XMVectorSubtract(twoThirds, t)); + return XMVectorMultiplyAdd(t1, t2, p); + } + + return p; + } + +} // namespace Internal + +inline XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl) noexcept +{ + static const XMVECTORF32 oneThird = { { { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f } } }; + + XMVECTOR s = XMVectorSplatY(hsl); + XMVECTOR l = XMVectorSplatZ(hsl); + + if (XMVector3NearEqual(s, g_XMZero, g_XMEpsilon)) + { + // Achromatic + return XMVectorSelect(hsl, l, g_XMSelect1110); + } + else + { + XMVECTOR h = XMVectorSplatX(hsl); + + XMVECTOR q; + if (XMVector3Less(l, g_XMOneHalf)) + { + q = XMVectorMultiply(l, XMVectorAdd(g_XMOne, s)); + } + else + { + q = XMVectorSubtract(XMVectorAdd(l, s), XMVectorMultiply(l, s)); + } + + XMVECTOR p = XMVectorSubtract(XMVectorMultiply(g_XMTwo, l), q); + + XMVECTOR r = DirectX::Internal::XMColorHue2Clr(p, q, XMVectorAdd(h, oneThird)); + XMVECTOR g = DirectX::Internal::XMColorHue2Clr(p, q, h); + XMVECTOR b = DirectX::Internal::XMColorHue2Clr(p, q, XMVectorSubtract(h, oneThird)); + + XMVECTOR rg = XMVectorSelect(g, r, g_XMSelect1000); + XMVECTOR ba = XMVectorSelect(hsl, b, g_XMSelect1110); + + return XMVectorSelect(ba, rg, g_XMSelect1100); + } +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb) noexcept +{ + XMVECTOR r = XMVectorSplatX(rgb); + XMVECTOR g = XMVectorSplatY(rgb); + XMVECTOR b = XMVectorSplatZ(rgb); + + XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b)); + XMVECTOR v = XMVectorMax(r, XMVectorMax(g, b)); + + XMVECTOR d = XMVectorSubtract(v, min); + + XMVECTOR s = (XMVector3NearEqual(v, g_XMZero, g_XMEpsilon)) ? g_XMZero : XMVectorDivide(d, v); + + if (XMVector3Less(d, g_XMEpsilon)) + { + // Achromatic, assume H of 0 + XMVECTOR hv = XMVectorSelect(v, g_XMZero, g_XMSelect1000); + XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110); + return XMVectorSelect(s, hva, g_XMSelect1011); + } + else + { + XMVECTOR h; + + if (XMVector3Equal(r, v)) + { + // Red is max + h = XMVectorDivide(XMVectorSubtract(g, b), d); + + if (XMVector3Less(g, b)) + h = XMVectorAdd(h, g_XMSix); + } + else if (XMVector3Equal(g, v)) + { + // Green is max + h = XMVectorDivide(XMVectorSubtract(b, r), d); + h = XMVectorAdd(h, g_XMTwo); + } + else + { + // Blue is max + h = XMVectorDivide(XMVectorSubtract(r, g), d); + h = XMVectorAdd(h, g_XMFour); + } + + h = XMVectorDivide(h, g_XMSix); + + XMVECTOR hv = XMVectorSelect(v, h, g_XMSelect1000); + XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110); + return XMVectorSelect(s, hva, g_XMSelect1011); + } +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv) noexcept +{ + XMVECTOR h = XMVectorSplatX(hsv); + XMVECTOR s = XMVectorSplatY(hsv); + XMVECTOR v = XMVectorSplatZ(hsv); + + XMVECTOR h6 = XMVectorMultiply(h, g_XMSix); + + XMVECTOR i = XMVectorFloor(h6); + XMVECTOR f = XMVectorSubtract(h6, i); + + // p = v* (1-s) + XMVECTOR p = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, s)); + + // q = v*(1-f*s) + XMVECTOR q = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(f, s))); + + // t = v*(1 - (1-f)*s) + XMVECTOR t = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(XMVectorSubtract(g_XMOne, f), s))); + + auto ii = static_cast(XMVectorGetX(XMVectorMod(i, g_XMSix))); + + XMVECTOR _rgb; + + switch (ii) + { + case 0: // rgb = vtp + { + XMVECTOR vt = XMVectorSelect(t, v, g_XMSelect1000); + _rgb = XMVectorSelect(p, vt, g_XMSelect1100); + } + break; + case 1: // rgb = qvp + { + XMVECTOR qv = XMVectorSelect(v, q, g_XMSelect1000); + _rgb = XMVectorSelect(p, qv, g_XMSelect1100); + } + break; + case 2: // rgb = pvt + { + XMVECTOR pv = XMVectorSelect(v, p, g_XMSelect1000); + _rgb = XMVectorSelect(t, pv, g_XMSelect1100); + } + break; + case 3: // rgb = pqv + { + XMVECTOR pq = XMVectorSelect(q, p, g_XMSelect1000); + _rgb = XMVectorSelect(v, pq, g_XMSelect1100); + } + break; + case 4: // rgb = tpv + { + XMVECTOR tp = XMVectorSelect(p, t, g_XMSelect1000); + _rgb = XMVectorSelect(v, tp, g_XMSelect1100); + } + break; + default: // rgb = vpq + { + XMVECTOR vp = XMVectorSelect(p, v, g_XMSelect1000); + _rgb = XMVectorSelect(q, vp, g_XMSelect1100); + } + break; + } + + return XMVectorSelect(hsv, _rgb, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 0.299f, -0.147f, 0.615f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { 0.587f, -0.289f, -0.515f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 0.114f, 0.436f, -0.100f, 0.0f } } }; + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(rgb, M); + + return XMVectorSelect(rgb, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv) noexcept +{ + static const XMVECTORF32 Scale1 = { { { 0.0f, -0.395f, 2.032f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 1.140f, -0.581f, 0.0f, 0.0f } } }; + + XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(yuv, M); + + return XMVectorSelect(yuv, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 0.2126f, -0.0997f, 0.6150f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { 0.7152f, -0.3354f, -0.5586f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 0.0722f, 0.4351f, -0.0564f, 0.0f } } }; + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(rgb, M); + + return XMVectorSelect(rgb, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv) noexcept +{ + static const XMVECTORF32 Scale1 = { { { 0.0f, -0.2153f, 2.1324f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 1.2803f, -0.3806f, 0.0f, 0.0f } } }; + + XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(yuv, M); + + return XMVectorSelect(yuv, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_UHD(FXMVECTOR rgb) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 0.2627f, -0.1215f, 0.6150f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { 0.6780f, -0.3136f, -0.5655f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 0.0593f, 0.4351f, -0.0495f, 0.0f } } }; + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(rgb, M); + + return XMVectorSelect(rgb, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_UHD(FXMVECTOR yuv) noexcept +{ + static const XMVECTORF32 Scale1 = { { { 0.0f, -0.1891f, 2.1620f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 1.1989f, -0.4645f, 0.0f, 0.0f } } }; + + XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(yuv, M); + + return XMVectorSelect(yuv, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f } } }; + static const XMVECTORF32 Scale = { { { 1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f } } }; + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVectorMultiply(XMVector3Transform(rgb, M), Scale); + + return XMVectorSelect(rgb, clr, g_XMSelect1110); +} + +inline XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f } } }; + static const XMVECTORF32 Scale = { { { 0.17697f, 0.17697f, 0.17697f, 0.0f } } }; + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(XMVectorMultiply(xyz, Scale), M); + + return XMVectorSelect(xyz, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 3.2406f, -0.9689f, 0.0557f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { -1.5372f, 1.8758f, -0.2040f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { -0.4986f, 0.0415f, 1.0570f, 0.0f } } }; + static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f } } }; + static const XMVECTORF32 Exp = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f } } }; + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR lclr = XMVector3Transform(xyz, M); + + XMVECTOR sel = XMVectorGreater(lclr, Cutoff); + + // clr = 12.92 * lclr for lclr <= 0.0031308f + XMVECTOR smallC = XMVectorMultiply(lclr, g_XMsrgbScale); + + // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055) + XMVECTOR largeC = XMVectorSubtract(XMVectorMultiply(g_XMsrgbA1, XMVectorPow(lclr, Exp)), g_XMsrgbA); + + XMVECTOR clr = XMVectorSelect(smallC, largeC, sel); + + return XMVectorSelect(xyz, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb) noexcept +{ + static const XMVECTORF32 Scale0 = { { { 0.4124f, 0.2126f, 0.0193f, 0.0f } } }; + static const XMVECTORF32 Scale1 = { { { 0.3576f, 0.7152f, 0.1192f, 0.0f } } }; + static const XMVECTORF32 Scale2 = { { { 0.1805f, 0.0722f, 0.9505f, 0.0f } } }; + static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 0.0f } } }; + static const XMVECTORF32 Exp = { { { 2.4f, 2.4f, 2.4f, 1.0f } } }; + + XMVECTOR sel = XMVectorGreater(srgb, Cutoff); + + // lclr = clr / 12.92 + XMVECTOR smallC = XMVectorDivide(srgb, g_XMsrgbScale); + + // lclr = pow( (clr + a) / (1+a), 2.4 ) + XMVECTOR largeC = XMVectorPow(XMVectorDivide(XMVectorAdd(srgb, g_XMsrgbA), g_XMsrgbA1), Exp); + + XMVECTOR lclr = XMVectorSelect(smallC, largeC, sel); + + XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); + XMVECTOR clr = XMVector3Transform(lclr, M); + + return XMVectorSelect(srgb, clr, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb) noexcept +{ + static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 1.f } } }; + static const XMVECTORF32 Linear = { { { 12.92f, 12.92f, 12.92f, 1.f } } }; + static const XMVECTORF32 Scale = { { { 1.055f, 1.055f, 1.055f, 1.f } } }; + static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } }; + static const XMVECTORF32 InvGamma = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f } } }; + + XMVECTOR V = XMVectorSaturate(rgb); + XMVECTOR V0 = XMVectorMultiply(V, Linear); + XMVECTOR V1 = XMVectorSubtract(XMVectorMultiply(Scale, XMVectorPow(V, InvGamma)), Bias); + XMVECTOR select = XMVectorLess(V, Cutoff); + V = XMVectorSelect(V1, V0, select); + return XMVectorSelect(rgb, V, g_XMSelect1110); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb) noexcept +{ + static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 1.f } } }; + static const XMVECTORF32 ILinear = { { { 1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f } } }; + static const XMVECTORF32 Scale = { { { 1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f } } }; + static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } }; + static const XMVECTORF32 Gamma = { { { 2.4f, 2.4f, 2.4f, 1.f } } }; + + XMVECTOR V = XMVectorSaturate(srgb); + XMVECTOR V0 = XMVectorMultiply(V, ILinear); + XMVECTOR V1 = XMVectorPow(XMVectorMultiply(XMVectorAdd(V, Bias), Scale), Gamma); + XMVECTOR select = XMVectorGreater(V, Cutoff); + V = XMVectorSelect(V0, V1, select); + return XMVectorSelect(srgb, V, g_XMSelect1110); +} + +/**************************************************************************** + * + * Miscellaneous + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline bool XMVerifyCPUSupport() noexcept +{ +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + int CPUInfo[4] = { -1 }; +#if defined(__clang__) || defined(__GNUC__) + __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 0); +#endif + +#ifdef __AVX2__ + if (CPUInfo[0] < 7) + return false; +#else + if (CPUInfo[0] < 1) + return false; +#endif + +#if defined(__clang__) || defined(__GNUC__) + __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuid(CPUInfo, 1); +#endif + +#if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_) + // The compiler can emit FMA3 instructions even without explicit intrinsics use + if ((CPUInfo[2] & 0x38081001) != 0x38081001) + return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support +#elif defined(_XM_FMA3_INTRINSICS_) && defined(_XM_F16C_INTRINSICS_) + if ((CPUInfo[2] & 0x38081001) != 0x38081001) + return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support +#elif defined(_XM_FMA3_INTRINSICS_) + if ((CPUInfo[2] & 0x18081001) != 0x18081001) + return false; // No AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support +#elif defined(_XM_F16C_INTRINSICS_) + if ((CPUInfo[2] & 0x38080001) != 0x38080001) + return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support +#elif defined(__AVX__) || defined(_XM_AVX_INTRINSICS_) + if ((CPUInfo[2] & 0x18080001) != 0x18080001) + return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support +#elif defined(_XM_SSE4_INTRINSICS_) + if ((CPUInfo[2] & 0x80001) != 0x80001) + return false; // No SSE3/SSE4.1 support +#elif defined(_XM_SSE3_INTRINSICS_) + if (!(CPUInfo[2] & 0x1)) + return false; // No SSE3 support +#endif + + // The x64 processor model requires SSE2 support, but no harm in checking + if ((CPUInfo[3] & 0x6000000) != 0x6000000) + return false; // No SSE2/SSE support + +#if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_) +#if defined(__clang__) || defined(__GNUC__) + __cpuid_count(7, 0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +#else + __cpuidex(CPUInfo, 7, 0); +#endif + if (!(CPUInfo[1] & 0x20)) + return false; // No AVX2 support +#endif + + return true; +#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + // ARM-NEON support is required for the Windows on ARM platform + return true; +#else + // No intrinsics path always supported + return true; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMFresnelTerm +( + FXMVECTOR CosIncidentAngle, + FXMVECTOR RefractionIndex +) noexcept +{ + assert(!XMVector4IsInfinite(CosIncidentAngle)); + + // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where + // c = CosIncidentAngle + // g = sqrt(c^2 + RefractionIndex^2 - 1) + +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); + G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); + G = XMVectorAbs(G); + G = XMVectorSqrt(G); + + XMVECTOR S = XMVectorAdd(G, CosIncidentAngle); + XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle); + + XMVECTOR V0 = XMVectorMultiply(D, D); + XMVECTOR V1 = XMVectorMultiply(S, S); + V1 = XMVectorReciprocal(V1); + V0 = XMVectorMultiply(g_XMOneHalf.v, V0); + V0 = XMVectorMultiply(V0, V1); + + XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); + XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); + V2 = XMVectorMultiply(V2, V2); + V3 = XMVectorMultiply(V3, V3); + V3 = XMVectorReciprocal(V3); + V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); + + XMVECTOR Result = XMVectorMultiply(V0, V2); + + Result = XMVectorSaturate(Result); + + return Result; + +#elif defined(_XM_SSE_INTRINSICS_) + // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) + XMVECTOR G = _mm_mul_ps(RefractionIndex, RefractionIndex); + XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle, CosIncidentAngle); + G = _mm_sub_ps(G, g_XMOne); + vTemp = _mm_add_ps(vTemp, G); + // max((0-vTemp),vTemp) == abs(vTemp) + // The abs is needed to deal with refraction and cosine being zero + G = _mm_setzero_ps(); + G = _mm_sub_ps(G, vTemp); + G = _mm_max_ps(G, vTemp); + // Last operation, the sqrt() + G = _mm_sqrt_ps(G); + + // Calc G-C and G+C + XMVECTOR GAddC = _mm_add_ps(G, CosIncidentAngle); + XMVECTOR GSubC = _mm_sub_ps(G, CosIncidentAngle); + // Perform the term (0.5f *(g - c)^2) / (g + c)^2 + XMVECTOR vResult = _mm_mul_ps(GSubC, GSubC); + vTemp = _mm_mul_ps(GAddC, GAddC); + vResult = _mm_mul_ps(vResult, g_XMOneHalf); + vResult = _mm_div_ps(vResult, vTemp); + // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) + GAddC = _mm_mul_ps(GAddC, CosIncidentAngle); + GSubC = _mm_mul_ps(GSubC, CosIncidentAngle); + GAddC = _mm_sub_ps(GAddC, g_XMOne); + GSubC = _mm_add_ps(GSubC, g_XMOne); + GAddC = _mm_mul_ps(GAddC, GAddC); + GSubC = _mm_mul_ps(GSubC, GSubC); + GAddC = _mm_div_ps(GAddC, GSubC); + GAddC = _mm_add_ps(GAddC, g_XMOne); + // Multiply the two term parts + vResult = _mm_mul_ps(vResult, GAddC); + // Clamp to 0.0 - 1.0f + vResult = _mm_max_ps(vResult, g_XMZero); + vResult = _mm_min_ps(vResult, g_XMOne); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XMScalarNearEqual +( + float S1, + float S2, + float Epsilon +) noexcept +{ + float Delta = S1 - S2; + return (fabsf(Delta) <= Epsilon); +} + +//------------------------------------------------------------------------------ +// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI +inline float XMScalarModAngle(float Angle) noexcept +{ + // Note: The modulo is performed with unsigned math only to work + // around a precision error on numbers that are close to PI + + // Normalize the range from 0.0f to XM_2PI + Angle = Angle + XM_PI; + // Perform the modulo, unsigned + float fTemp = fabsf(Angle); + fTemp = fTemp - (XM_2PI * static_cast(static_cast(fTemp / XM_2PI))); + // Restore the number to the range of -XM_PI to XM_PI-epsilon + fTemp = fTemp - XM_PI; + // If the modulo'd value was negative, restore negation + if (Angle < 0.0f) + { + fTemp = -fTemp; + } + return fTemp; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarSin(float Value) noexcept +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + if (y > XM_PIDIV2) + { + y = XM_PI - y; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + } + + // 11-degree minimax approximation + float y2 = y * y; + return (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarSinEst(float Value) noexcept +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + if (y > XM_PIDIV2) + { + y = XM_PI - y; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + } + + // 7-degree minimax approximation + float y2 = y * y; + return (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarCos(float Value) noexcept +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + // 10-degree minimax approximation + float y2 = y * y; + float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f; + return sign * p; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarCosEst(float Value) noexcept +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + // 6-degree minimax approximation + float y2 = y * y; + float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f; + return sign * p; +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XMScalarSinCos +( + float* pSin, + float* pCos, + float Value +) noexcept +{ + assert(pSin); + assert(pCos); + + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + float y2 = y * y; + + // 11-degree minimax approximation + *pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y; + + // 10-degree minimax approximation + float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f; + *pCos = sign * p; +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XMScalarSinCosEst +( + float* pSin, + float* pCos, + float Value +) noexcept +{ + assert(pSin); + assert(pCos); + + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI * Value; + if (Value >= 0.0f) + { + quotient = static_cast(static_cast(quotient + 0.5f)); + } + else + { + quotient = static_cast(static_cast(quotient - 0.5f)); + } + float y = Value - XM_2PI * quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + float y2 = y * y; + + // 7-degree minimax approximation + *pSin = (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y; + + // 6-degree minimax approximation + float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f; + *pCos = sign * p; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarASin(float Value) noexcept +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrtf(omx); + + // 7-degree minimax approximation + float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f; + result *= root; // acos(|x|) + + // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) + return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); +} + +//------------------------------------------------------------------------------ + +inline float XMScalarASinEst(float Value) noexcept +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrtf(omx); + + // 3-degree minimax approximation + float result = ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f; + result *= root; // acos(|x|) + + // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) + return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); +} + +//------------------------------------------------------------------------------ + +inline float XMScalarACos(float Value) noexcept +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrtf(omx); + + // 7-degree minimax approximation + float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f; + result *= root; + + // acos(x) = pi - acos(-x) when x < 0 + return (nonnegative ? result : XM_PI - result); +} + +//------------------------------------------------------------------------------ + +inline float XMScalarACosEst(float Value) noexcept +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrtf(omx); + + // 3-degree minimax approximation + float result = ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f; + result *= root; + + // acos(x) = pi - acos(-x) when x < 0 + return (nonnegative ? result : XM_PI - result); +} + diff --git a/Sdk/External/DirectXMath/Inc/DirectXMathVector.inl b/Sdk/External/DirectXMath/Inc/DirectXMathVector.inl new file mode 100644 index 0000000..35d2e2a --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXMathVector.inl @@ -0,0 +1,14819 @@ +//------------------------------------------------------------------------------------- +// DirectXMathVector.inl -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XM_NO_INTRINSICS_) +#define XMISNAN(x) isnan(x) +#define XMISINF(x) isinf(x) +#endif + +#if defined(_XM_SSE_INTRINSICS_) + +#define XM3UNPACK3INTO4(l1, l2, l3) \ + XMVECTOR V3 = _mm_shuffle_ps(l2, l3, _MM_SHUFFLE(0, 0, 3, 2));\ + XMVECTOR V2 = _mm_shuffle_ps(l2, l1, _MM_SHUFFLE(3, 3, 1, 0));\ + V2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 0, 2));\ + XMVECTOR V4 = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(L3), 32 / 8)) + +#define XM3PACK4INTO3(v2x) \ + v2x = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 2, 1));\ + V2 = _mm_shuffle_ps(V2, V1, _MM_SHUFFLE(2, 2, 0, 0));\ + V1 = _mm_shuffle_ps(V1, V2, _MM_SHUFFLE(0, 2, 1, 0));\ + V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(0, 0, 2, 2));\ + V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(2, 1, 2, 0)) + +#endif + +/**************************************************************************** + * + * General Vector + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Assignment operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + // Return a vector with all elements equaling zero +inline XMVECTOR XM_CALLCONV XMVectorZero() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { 0.0f, 0.0f, 0.0f, 0.0f } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_n_f32(0); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_setzero_ps(); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with four floating point values +inline XMVECTOR XM_CALLCONV XMVectorSet +( + float x, + float y, + float z, + float w +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { x, y, z, w } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t V0 = vcreate_f32( + static_cast(*reinterpret_cast(&x)) + | (static_cast(*reinterpret_cast(&y)) << 32)); + float32x2_t V1 = vcreate_f32( + static_cast(*reinterpret_cast(&z)) + | (static_cast(*reinterpret_cast(&w)) << 32)); + return vcombine_f32(V0, V1); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_set_ps(w, z, y, x); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with four integer values +inline XMVECTOR XM_CALLCONV XMVectorSetInt +( + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult = { { { x, y, z, w } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t V0 = vcreate_u32(static_cast(x) | (static_cast(y) << 32)); + uint32x2_t V1 = vcreate_u32(static_cast(z) | (static_cast(w) << 32)); + return vreinterpretq_f32_u32(vcombine_u32(V0, V1)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_set_epi32(static_cast(w), static_cast(z), static_cast(y), static_cast(x)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with a replicated floating point value +inline XMVECTOR XM_CALLCONV XMVectorReplicate(float Value) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = Value; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_n_f32(Value); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_set_ps1(Value); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with a replicated floating point value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorReplicatePtr(const float* pValue) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float Value = pValue[0]; + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = Value; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vld1q_dup_f32(pValue); +#elif defined(_XM_AVX_INTRINSICS_) + return _mm_broadcast_ss(pValue); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_load_ps1(pValue); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with a replicated integer value +inline XMVECTOR XM_CALLCONV XMVectorReplicateInt(uint32_t Value) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = + vResult.u[1] = + vResult.u[2] = + vResult.u[3] = Value; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vdupq_n_u32(Value)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_set1_epi32(static_cast(Value)); + return _mm_castsi128_ps(vTemp); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with a replicated integer value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorReplicateIntPtr(const uint32_t* pValue) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t Value = pValue[0]; + XMVECTORU32 vResult; + vResult.u[0] = + vResult.u[1] = + vResult.u[2] = + vResult.u[3] = Value; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vld1q_dup_u32(pValue)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_load_ps1(reinterpret_cast(pValue)); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with all bits set (true mask) +inline XMVECTOR XM_CALLCONV XMVectorTrueInt() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult = { { { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_s32(vdupq_n_s32(-1)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_set1_epi32(-1); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +// Initialize a vector with all bits clear (false mask) +inline XMVECTOR XM_CALLCONV XMVectorFalseInt() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { 0.0f, 0.0f, 0.0f, 0.0f } } }; + return vResult; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vdupq_n_u32(0)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_setzero_ps(); +#endif +} + +//------------------------------------------------------------------------------ +// Replicate the x component of the vector +inline XMVECTOR XM_CALLCONV XMVectorSplatX(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.vector4_f32[0]; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_lane_f32(vget_low_f32(V), 0); +#elif defined(_XM_AVX2_INTRINSICS_) && defined(_XM_FAVOR_INTEL_) + return _mm_broadcastss_ps(V); +#elif defined(_XM_SSE_INTRINSICS_) + return XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); +#endif +} + +//------------------------------------------------------------------------------ +// Replicate the y component of the vector +inline XMVECTOR XM_CALLCONV XMVectorSplatY(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.vector4_f32[1]; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_lane_f32(vget_low_f32(V), 1); +#elif defined(_XM_SSE_INTRINSICS_) + return XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); +#endif +} + +//------------------------------------------------------------------------------ +// Replicate the z component of the vector +inline XMVECTOR XM_CALLCONV XMVectorSplatZ(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.vector4_f32[2]; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_lane_f32(vget_high_f32(V), 0); +#elif defined(_XM_SSE_INTRINSICS_) + return XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); +#endif +} + +//------------------------------------------------------------------------------ +// Replicate the w component of the vector +inline XMVECTOR XM_CALLCONV XMVectorSplatW(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.vector4_f32[3]; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_lane_f32(vget_high_f32(V), 1); +#elif defined(_XM_SSE_INTRINSICS_) + return XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); +#endif +} + +//------------------------------------------------------------------------------ +// Return a vector of 1.0f,1.0f,1.0f,1.0f +inline XMVECTOR XM_CALLCONV XMVectorSplatOne() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = 1.0f; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vdupq_n_f32(1.0f); +#elif defined(_XM_SSE_INTRINSICS_) + return g_XMOne; +#endif +} + +//------------------------------------------------------------------------------ +// Return a vector of INF,INF,INF,INF +inline XMVECTOR XM_CALLCONV XMVectorSplatInfinity() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = + vResult.u[1] = + vResult.u[2] = + vResult.u[3] = 0x7F800000; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vdupq_n_u32(0x7F800000)); +#elif defined(_XM_SSE_INTRINSICS_) + return g_XMInfinity; +#endif +} + +//------------------------------------------------------------------------------ +// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN +inline XMVECTOR XM_CALLCONV XMVectorSplatQNaN() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = + vResult.u[1] = + vResult.u[2] = + vResult.u[3] = 0x7FC00000; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vdupq_n_u32(0x7FC00000)); +#elif defined(_XM_SSE_INTRINSICS_) + return g_XMQNaN; +#endif +} + +//------------------------------------------------------------------------------ +// Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f +inline XMVECTOR XM_CALLCONV XMVectorSplatEpsilon() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = + vResult.u[1] = + vResult.u[2] = + vResult.u[3] = 0x34000000; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vdupq_n_u32(0x34000000)); +#elif defined(_XM_SSE_INTRINSICS_) + return g_XMEpsilon; +#endif +} + +//------------------------------------------------------------------------------ +// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f +inline XMVECTOR XM_CALLCONV XMVectorSplatSignMask() noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 vResult; + vResult.u[0] = + vResult.u[1] = + vResult.u[2] = + vResult.u[3] = 0x80000000U; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vdupq_n_u32(0x80000000U)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_set1_epi32(static_cast(0x80000000)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +// Return a floating point value via an index. This is not a recommended +// function to use due to performance loss. +inline float XM_CALLCONV XMVectorGetByIndex(FXMVECTOR V, size_t i) noexcept +{ + assert(i < 4); + _Analysis_assume_(i < 4); +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_f32[i]; +#else + XMVECTORF32 U; + U.v = V; + return U.f[i]; +#endif +} + +//------------------------------------------------------------------------------ +// Return the X component in an FPU register. +inline float XM_CALLCONV XMVectorGetX(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_f32[0]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_f32(V, 0); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cvtss_f32(V); +#endif +} + +// Return the Y component in an FPU register. +inline float XM_CALLCONV XMVectorGetY(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_f32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_f32(V, 1); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + return _mm_cvtss_f32(vTemp); +#endif +} + +// Return the Z component in an FPU register. +inline float XM_CALLCONV XMVectorGetZ(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_f32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_f32(V, 2); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + return _mm_cvtss_f32(vTemp); +#endif +} + +// Return the W component in an FPU register. +inline float XM_CALLCONV XMVectorGetW(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_f32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_f32(V, 3); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + return _mm_cvtss_f32(vTemp); +#endif +} + +//------------------------------------------------------------------------------ + +// Store a component indexed by i into a 32 bit float location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetByIndexPtr(float* f, FXMVECTOR V, size_t i) noexcept +{ + assert(f != nullptr); + assert(i < 4); + _Analysis_assume_(i < 4); +#if defined(_XM_NO_INTRINSICS_) + *f = V.vector4_f32[i]; +#else + XMVECTORF32 U; + U.v = V; + *f = U.f[i]; +#endif +} + +//------------------------------------------------------------------------------ + +// Store the X component into a 32 bit float location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetXPtr(float* x, FXMVECTOR V) noexcept +{ + assert(x != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *x = V.vector4_f32[0]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_f32(x, V, 0); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_ss(x, V); +#endif +} + +// Store the Y component into a 32 bit float location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetYPtr(float* y, FXMVECTOR V) noexcept +{ + assert(y != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *y = V.vector4_f32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_f32(y, V, 1); +#elif defined(_XM_SSE4_INTRINSICS_) + * (reinterpret_cast(y)) = _mm_extract_ps(V, 1); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + _mm_store_ss(y, vResult); +#endif +} + +// Store the Z component into a 32 bit float location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetZPtr(float* z, FXMVECTOR V) noexcept +{ + assert(z != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *z = V.vector4_f32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_f32(z, V, 2); +#elif defined(_XM_SSE4_INTRINSICS_) + * (reinterpret_cast(z)) = _mm_extract_ps(V, 2); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(z, vResult); +#endif +} + +// Store the W component into a 32 bit float location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetWPtr(float* w, FXMVECTOR V) noexcept +{ + assert(w != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *w = V.vector4_f32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_f32(w, V, 3); +#elif defined(_XM_SSE4_INTRINSICS_) + * (reinterpret_cast(w)) = _mm_extract_ps(V, 3); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + _mm_store_ss(w, vResult); +#endif +} + +//------------------------------------------------------------------------------ + +// Return an integer value via an index. This is not a recommended +// function to use due to performance loss. +inline uint32_t XM_CALLCONV XMVectorGetIntByIndex(FXMVECTOR V, size_t i) noexcept +{ + assert(i < 4); + _Analysis_assume_(i < 4); +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_u32[i]; +#else + XMVECTORU32 U; + U.v = V; + return U.u[i]; +#endif +} + +//------------------------------------------------------------------------------ + +// Return the X component in an integer register. +inline uint32_t XM_CALLCONV XMVectorGetIntX(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_u32[0]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_u32(vreinterpretq_u32_f32(V), 0); +#elif defined(_XM_SSE_INTRINSICS_) + return static_cast(_mm_cvtsi128_si32(_mm_castps_si128(V))); +#endif +} + +// Return the Y component in an integer register. +inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_u32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_u32(vreinterpretq_u32_f32(V), 1); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i V1 = _mm_castps_si128(V); + return static_cast(_mm_extract_epi32(V1, 1)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vResulti = _mm_shuffle_epi32(_mm_castps_si128(V), _MM_SHUFFLE(1, 1, 1, 1)); + return static_cast(_mm_cvtsi128_si32(vResulti)); +#endif +} + +// Return the Z component in an integer register. +inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_u32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_u32(vreinterpretq_u32_f32(V), 2); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i V1 = _mm_castps_si128(V); + return static_cast(_mm_extract_epi32(V1, 2)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vResulti = _mm_shuffle_epi32(_mm_castps_si128(V), _MM_SHUFFLE(2, 2, 2, 2)); + return static_cast(_mm_cvtsi128_si32(vResulti)); +#endif +} + +// Return the W component in an integer register. +inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return V.vector4_u32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vgetq_lane_u32(vreinterpretq_u32_f32(V), 3); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i V1 = _mm_castps_si128(V); + return static_cast(_mm_extract_epi32(V1, 3)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vResulti = _mm_shuffle_epi32(_mm_castps_si128(V), _MM_SHUFFLE(3, 3, 3, 3)); + return static_cast(_mm_cvtsi128_si32(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ + +// Store a component indexed by i into a 32 bit integer location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetIntByIndexPtr(uint32_t* x, FXMVECTOR V, size_t i) noexcept +{ + assert(x != nullptr); + assert(i < 4); + _Analysis_assume_(i < 4); +#if defined(_XM_NO_INTRINSICS_) + *x = V.vector4_u32[i]; +#else + XMVECTORU32 U; + U.v = V; + *x = U.u[i]; +#endif +} + +//------------------------------------------------------------------------------ + +// Store the X component into a 32 bit integer location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetIntXPtr(uint32_t* x, FXMVECTOR V) noexcept +{ + assert(x != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *x = V.vector4_u32[0]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_u32(x, *reinterpret_cast(&V), 0); +#elif defined(_XM_SSE_INTRINSICS_) + _mm_store_ss(reinterpret_cast(x), V); +#endif +} + +// Store the Y component into a 32 bit integer location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetIntYPtr(uint32_t* y, FXMVECTOR V) noexcept +{ + assert(y != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *y = V.vector4_u32[1]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_u32(y, *reinterpret_cast(&V), 1); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i V1 = _mm_castps_si128(V); + *y = static_cast(_mm_extract_epi32(V1, 1)); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + _mm_store_ss(reinterpret_cast(y), vResult); +#endif +} + +// Store the Z component into a 32 bit integer locaCantion in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetIntZPtr(uint32_t* z, FXMVECTOR V) noexcept +{ + assert(z != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *z = V.vector4_u32[2]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_u32(z, *reinterpret_cast(&V), 2); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i V1 = _mm_castps_si128(V); + *z = static_cast(_mm_extract_epi32(V1, 2)); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(reinterpret_cast(z), vResult); +#endif +} + +// Store the W component into a 32 bit integer location in memory. +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorGetIntWPtr(uint32_t* w, FXMVECTOR V) noexcept +{ + assert(w != nullptr); +#if defined(_XM_NO_INTRINSICS_) + *w = V.vector4_u32[3]; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + vst1q_lane_u32(w, *reinterpret_cast(&V), 3); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i V1 = _mm_castps_si128(V); + *w = static_cast(_mm_extract_epi32(V1, 3)); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + _mm_store_ss(reinterpret_cast(w), vResult); +#endif +} + +//------------------------------------------------------------------------------ + +// Set a single indexed floating point component +inline XMVECTOR XM_CALLCONV XMVectorSetByIndex(FXMVECTOR V, float f, size_t i) noexcept +{ + assert(i < 4); + _Analysis_assume_(i < 4); + XMVECTORF32 U; + U.v = V; + U.f[i] = f; + return U.v; +} + +//------------------------------------------------------------------------------ + +// Sets the X component of a vector to a passed floating point value +inline XMVECTOR XM_CALLCONV XMVectorSetX(FXMVECTOR V, float x) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + x, + V.vector4_f32[1], + V.vector4_f32[2], + V.vector4_f32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vsetq_lane_f32(x, V, 0); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_set_ss(x); + vResult = _mm_move_ss(V, vResult); + return vResult; +#endif +} + +// Sets the Y component of a vector to a passed floating point value +inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + V.vector4_f32[0], + y, + V.vector4_f32[2], + V.vector4_f32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vsetq_lane_f32(y, V, 1); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vResult = _mm_set_ss(y); + vResult = _mm_insert_ps(V, vResult, 0x10); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Swap y and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); + // Convert input to vector + XMVECTOR vTemp = _mm_set_ss(y); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap y and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); + return vResult; +#endif +} +// Sets the Z component of a vector to a passed floating point value +inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + V.vector4_f32[0], + V.vector4_f32[1], + z, + V.vector4_f32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vsetq_lane_f32(z, V, 2); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vResult = _mm_set_ss(z); + vResult = _mm_insert_ps(V, vResult, 0x20); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Swap z and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); + // Convert input to vector + XMVECTOR vTemp = _mm_set_ss(z); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap z and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); + return vResult; +#endif +} + +// Sets the W component of a vector to a passed floating point value +inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + V.vector4_f32[0], + V.vector4_f32[1], + V.vector4_f32[2], + w + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vsetq_lane_f32(w, V, 3); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vResult = _mm_set_ss(w); + vResult = _mm_insert_ps(V, vResult, 0x30); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Swap w and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); + // Convert input to vector + XMVECTOR vTemp = _mm_set_ss(w); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap w and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +// Sets a component of a vector to a floating point value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetByIndexPtr(FXMVECTOR V, const float* f, size_t i) noexcept +{ + assert(f != nullptr); + assert(i < 4); + _Analysis_assume_(i < 4); + XMVECTORF32 U; + U.v = V; + U.f[i] = *f; + return U.v; +} + +//------------------------------------------------------------------------------ + +// Sets the X component of a vector to a floating point value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetXPtr(FXMVECTOR V, const float* x) noexcept +{ + assert(x != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + *x, + V.vector4_f32[1], + V.vector4_f32[2], + V.vector4_f32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vld1q_lane_f32(x, V, 0); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_load_ss(x); + vResult = _mm_move_ss(V, vResult); + return vResult; +#endif +} + +// Sets the Y component of a vector to a floating point value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetYPtr(FXMVECTOR V, const float* y) noexcept +{ + assert(y != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + V.vector4_f32[0], + *y, + V.vector4_f32[2], + V.vector4_f32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vld1q_lane_f32(y, V, 1); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap y and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); + // Convert input to vector + XMVECTOR vTemp = _mm_load_ss(y); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap y and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); + return vResult; +#endif +} + +// Sets the Z component of a vector to a floating point value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetZPtr(FXMVECTOR V, const float* z) noexcept +{ + assert(z != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + V.vector4_f32[0], + V.vector4_f32[1], + *z, + V.vector4_f32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vld1q_lane_f32(z, V, 2); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap z and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); + // Convert input to vector + XMVECTOR vTemp = _mm_load_ss(z); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap z and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); + return vResult; +#endif +} + +// Sets the W component of a vector to a floating point value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetWPtr(FXMVECTOR V, const float* w) noexcept +{ + assert(w != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 U = { { { + V.vector4_f32[0], + V.vector4_f32[1], + V.vector4_f32[2], + *w + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vld1q_lane_f32(w, V, 3); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap w and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); + // Convert input to vector + XMVECTOR vTemp = _mm_load_ss(w); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap w and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +// Sets a component of a vector to an integer passed by value +inline XMVECTOR XM_CALLCONV XMVectorSetIntByIndex(FXMVECTOR V, uint32_t x, size_t i) noexcept +{ + assert(i < 4); + _Analysis_assume_(i < 4); + XMVECTORU32 tmp; + tmp.v = V; + tmp.u[i] = x; + return tmp; +} + +//------------------------------------------------------------------------------ + +// Sets the X component of a vector to an integer passed by value +inline XMVECTOR XM_CALLCONV XMVectorSetIntX(FXMVECTOR V, uint32_t x) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + x, + V.vector4_u32[1], + V.vector4_u32[2], + V.vector4_u32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vsetq_lane_u32(x, vreinterpretq_u32_f32(V), 0)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cvtsi32_si128(static_cast(x)); + XMVECTOR vResult = _mm_move_ss(V, _mm_castsi128_ps(vTemp)); + return vResult; +#endif +} + +// Sets the Y component of a vector to an integer passed by value +inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + V.vector4_u32[0], + y, + V.vector4_u32[2], + V.vector4_u32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vsetq_lane_u32(y, vreinterpretq_u32_f32(V), 1)); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i vResult = _mm_castps_si128(V); + vResult = _mm_insert_epi32(vResult, static_cast(y), 1); + return _mm_castsi128_ps(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap y and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); + // Convert input to vector + __m128i vTemp = _mm_cvtsi32_si128(static_cast(y)); + // Replace the x component + vResult = _mm_move_ss(vResult, _mm_castsi128_ps(vTemp)); + // Swap y and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); + return vResult; +#endif +} + +// Sets the Z component of a vector to an integer passed by value +inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + V.vector4_u32[0], + V.vector4_u32[1], + z, + V.vector4_u32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vsetq_lane_u32(z, vreinterpretq_u32_f32(V), 2)); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i vResult = _mm_castps_si128(V); + vResult = _mm_insert_epi32(vResult, static_cast(z), 2); + return _mm_castsi128_ps(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap z and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); + // Convert input to vector + __m128i vTemp = _mm_cvtsi32_si128(static_cast(z)); + // Replace the x component + vResult = _mm_move_ss(vResult, _mm_castsi128_ps(vTemp)); + // Swap z and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); + return vResult; +#endif +} + +// Sets the W component of a vector to an integer passed by value +inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + V.vector4_u32[0], + V.vector4_u32[1], + V.vector4_u32[2], + w + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vsetq_lane_u32(w, vreinterpretq_u32_f32(V), 3)); +#elif defined(_XM_SSE4_INTRINSICS_) + __m128i vResult = _mm_castps_si128(V); + vResult = _mm_insert_epi32(vResult, static_cast(w), 3); + return _mm_castsi128_ps(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap w and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); + // Convert input to vector + __m128i vTemp = _mm_cvtsi32_si128(static_cast(w)); + // Replace the x component + vResult = _mm_move_ss(vResult, _mm_castsi128_ps(vTemp)); + // Swap w and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +// Sets a component of a vector to an integer value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetIntByIndexPtr(FXMVECTOR V, const uint32_t* x, size_t i) noexcept +{ + assert(x != nullptr); + assert(i < 4); + _Analysis_assume_(i < 4); + XMVECTORU32 tmp; + tmp.v = V; + tmp.u[i] = *x; + return tmp; +} + +//------------------------------------------------------------------------------ + +// Sets the X component of a vector to an integer value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetIntXPtr(FXMVECTOR V, const uint32_t* x) noexcept +{ + assert(x != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + *x, + V.vector4_u32[1], + V.vector4_u32[2], + V.vector4_u32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vld1q_lane_u32(x, *reinterpret_cast(&V), 0)); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(x)); + XMVECTOR vResult = _mm_move_ss(V, vTemp); + return vResult; +#endif +} + +// Sets the Y component of a vector to an integer value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetIntYPtr(FXMVECTOR V, const uint32_t* y) noexcept +{ + assert(y != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + V.vector4_u32[0], + *y, + V.vector4_u32[2], + V.vector4_u32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vld1q_lane_u32(y, *reinterpret_cast(&V), 1)); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap y and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); + // Convert input to vector + XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(y)); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap y and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); + return vResult; +#endif +} + +// Sets the Z component of a vector to an integer value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetIntZPtr(FXMVECTOR V, const uint32_t* z) noexcept +{ + assert(z != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + V.vector4_u32[0], + V.vector4_u32[1], + *z, + V.vector4_u32[3] + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vld1q_lane_u32(z, *reinterpret_cast(&V), 2)); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap z and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); + // Convert input to vector + XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(z)); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap z and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); + return vResult; +#endif +} + +// Sets the W component of a vector to an integer value passed by pointer +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorSetIntWPtr(FXMVECTOR V, const uint32_t* w) noexcept +{ + assert(w != nullptr); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORU32 U = { { { + V.vector4_u32[0], + V.vector4_u32[1], + V.vector4_u32[2], + *w + } } }; + return U.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vld1q_lane_u32(w, *reinterpret_cast(&V), 3)); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap w and x + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); + // Convert input to vector + XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(w)); + // Replace the x component + vResult = _mm_move_ss(vResult, vTemp); + // Swap w and x again + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSwizzle +( + FXMVECTOR V, + uint32_t E0, + uint32_t E1, + uint32_t E2, + uint32_t E3 +) noexcept +{ + assert((E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4)); + _Analysis_assume_((E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4)); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + V.vector4_f32[E0], + V.vector4_f32[E1], + V.vector4_f32[E2], + V.vector4_f32[E3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const uint32_t ControlElement[4] = + { + 0x03020100, // XM_SWIZZLE_X + 0x07060504, // XM_SWIZZLE_Y + 0x0B0A0908, // XM_SWIZZLE_Z + 0x0F0E0D0C, // XM_SWIZZLE_W + }; + + uint8x8x2_t tbl; + tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V)); + tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V)); + + uint32x2_t idx = vcreate_u32(static_cast(ControlElement[E0]) | (static_cast(ControlElement[E1]) << 32)); + const uint8x8_t rL = vtbl2_u8(tbl, vreinterpret_u8_u32(idx)); + + idx = vcreate_u32(static_cast(ControlElement[E2]) | (static_cast(ControlElement[E3]) << 32)); + const uint8x8_t rH = vtbl2_u8(tbl, vreinterpret_u8_u32(idx)); + + return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH)); +#elif defined(_XM_AVX_INTRINSICS_) + unsigned int elem[4] = { E0, E1, E2, E3 }; + __m128i vControl = _mm_loadu_si128(reinterpret_cast(&elem[0])); + return _mm_permutevar_ps(V, vControl); +#else + auto aPtr = reinterpret_cast(&V); + + XMVECTOR Result; + auto pWork = reinterpret_cast(&Result); + + pWork[0] = aPtr[E0]; + pWork[1] = aPtr[E1]; + pWork[2] = aPtr[E2]; + pWork[3] = aPtr[E3]; + + return Result; +#endif +} + +//------------------------------------------------------------------------------ +inline XMVECTOR XM_CALLCONV XMVectorPermute +( + FXMVECTOR V1, + FXMVECTOR V2, + uint32_t PermuteX, + uint32_t PermuteY, + uint32_t PermuteZ, + uint32_t PermuteW +) noexcept +{ + assert(PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7); + _Analysis_assume_(PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7); + +#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + static const uint32_t ControlElement[8] = + { + 0x03020100, // XM_PERMUTE_0X + 0x07060504, // XM_PERMUTE_0Y + 0x0B0A0908, // XM_PERMUTE_0Z + 0x0F0E0D0C, // XM_PERMUTE_0W + 0x13121110, // XM_PERMUTE_1X + 0x17161514, // XM_PERMUTE_1Y + 0x1B1A1918, // XM_PERMUTE_1Z + 0x1F1E1D1C, // XM_PERMUTE_1W + }; + + uint8x8x4_t tbl; + tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V1)); + tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V1)); + tbl.val[2] = vreinterpret_u8_f32(vget_low_f32(V2)); + tbl.val[3] = vreinterpret_u8_f32(vget_high_f32(V2)); + + uint32x2_t idx = vcreate_u32(static_cast(ControlElement[PermuteX]) | (static_cast(ControlElement[PermuteY]) << 32)); + const uint8x8_t rL = vtbl4_u8(tbl, vreinterpret_u8_u32(idx)); + + idx = vcreate_u32(static_cast(ControlElement[PermuteZ]) | (static_cast(ControlElement[PermuteW]) << 32)); + const uint8x8_t rH = vtbl4_u8(tbl, vreinterpret_u8_u32(idx)); + + return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH)); +#elif defined(_XM_AVX_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + static const XMVECTORU32 three = { { { 3, 3, 3, 3 } } }; + + XM_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW }; + __m128i vControl = _mm_load_si128(reinterpret_cast(&elem[0])); + + __m128i vSelect = _mm_cmpgt_epi32(vControl, three); + vControl = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(vControl), three)); + + __m128 shuffled1 = _mm_permutevar_ps(V1, vControl); + __m128 shuffled2 = _mm_permutevar_ps(V2, vControl); + + __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1); + __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2); + + return _mm_or_ps(masked1, masked2); +#else + + const uint32_t* aPtr[2]; + aPtr[0] = reinterpret_cast(&V1); + aPtr[1] = reinterpret_cast(&V2); + + XMVECTOR Result; + auto pWork = reinterpret_cast(&Result); + + const uint32_t i0 = PermuteX & 3; + const uint32_t vi0 = PermuteX >> 2; + pWork[0] = aPtr[vi0][i0]; + + const uint32_t i1 = PermuteY & 3; + const uint32_t vi1 = PermuteY >> 2; + pWork[1] = aPtr[vi1][i1]; + + const uint32_t i2 = PermuteZ & 3; + const uint32_t vi2 = PermuteZ >> 2; + pWork[2] = aPtr[vi2][i2]; + + const uint32_t i3 = PermuteW & 3; + const uint32_t vi3 = PermuteW >> 2; + pWork[3] = aPtr[vi3][i3]; + + return Result; +#endif +} + +//------------------------------------------------------------------------------ +// Define a control vector to be used in XMVectorSelect +// operations. The four integers specified in XMVectorSelectControl +// serve as indices to select between components in two vectors. +// The first index controls selection for the first component of +// the vectors involved in a select operation, the second index +// controls selection for the second component etc. A value of +// zero for an index causes the corresponding component from the first +// vector to be selected whereas a one causes the component from the +// second vector to be selected instead. + +inline XMVECTOR XM_CALLCONV XMVectorSelectControl +( + uint32_t VectorIndex0, + uint32_t VectorIndex1, + uint32_t VectorIndex2, + uint32_t VectorIndex3 +) noexcept +{ +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + // x=Index0,y=Index1,z=Index2,w=Index3 + __m128i vTemp = _mm_set_epi32(static_cast(VectorIndex3), static_cast(VectorIndex2), static_cast(VectorIndex1), static_cast(VectorIndex0)); + // Any non-zero entries become 0xFFFFFFFF else 0 + vTemp = _mm_cmpgt_epi32(vTemp, g_XMZero); + return _mm_castsi128_ps(vTemp); +#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + int32x2_t V0 = vcreate_s32(static_cast(VectorIndex0) | (static_cast(VectorIndex1) << 32)); + int32x2_t V1 = vcreate_s32(static_cast(VectorIndex2) | (static_cast(VectorIndex3) << 32)); + int32x4_t vTemp = vcombine_s32(V0, V1); + // Any non-zero entries become 0xFFFFFFFF else 0 + return vreinterpretq_f32_u32(vcgtq_s32(vTemp, g_XMZero)); +#else + XMVECTOR ControlVector; + const uint32_t ControlElement[] = + { + XM_SELECT_0, + XM_SELECT_1 + }; + + assert(VectorIndex0 < 2); + assert(VectorIndex1 < 2); + assert(VectorIndex2 < 2); + assert(VectorIndex3 < 2); + _Analysis_assume_(VectorIndex0 < 2); + _Analysis_assume_(VectorIndex1 < 2); + _Analysis_assume_(VectorIndex2 < 2); + _Analysis_assume_(VectorIndex3 < 2); + + ControlVector.vector4_u32[0] = ControlElement[VectorIndex0]; + ControlVector.vector4_u32[1] = ControlElement[VectorIndex1]; + ControlVector.vector4_u32[2] = ControlElement[VectorIndex2]; + ControlVector.vector4_u32[3] = ControlElement[VectorIndex3]; + + return ControlVector; + +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSelect +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR Control +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]), + (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]), + (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]), + (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]), + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vbslq_f32(vreinterpretq_u32_f32(Control), V2, V1); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp1 = _mm_andnot_ps(Control, V1); + XMVECTOR vTemp2 = _mm_and_ps(V2, Control); + return _mm_or_ps(vTemp1, vTemp2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMergeXY +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + V1.vector4_u32[0], + V2.vector4_u32[0], + V1.vector4_u32[1], + V2.vector4_u32[1], + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vzipq_f32(V1, V2).val[0]; +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_unpacklo_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMergeZW +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + V1.vector4_u32[2], + V2.vector4_u32[2], + V1.vector4_u32[3], + V2.vector4_u32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vzipq_f32(V1, V2).val[1]; +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_unpackhi_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements) noexcept +{ + assert(Elements < 4); + _Analysis_assume_(Elements < 4); + return XMVectorPermute(V1, V2, Elements, ((Elements)+1), ((Elements)+2), ((Elements)+3)); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements) noexcept +{ + assert(Elements < 4); + _Analysis_assume_(Elements < 4); + return XMVectorSwizzle(V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, uint32_t Elements) noexcept +{ + assert(Elements < 4); + _Analysis_assume_(Elements < 4); + return XMVectorSwizzle(V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorInsert( + FXMVECTOR VD, FXMVECTOR VS, + uint32_t VSLeftRotateElements, + uint32_t Select0, uint32_t Select1, uint32_t Select2, uint32_t Select3) noexcept +{ + XMVECTOR Control = XMVectorSelectControl(Select0 & 1, Select1 & 1, Select2 & 1, Select3 & 1); + return XMVectorSelect(VD, XMVectorRotateLeft(VS, VSLeftRotateElements), Control); +} + +//------------------------------------------------------------------------------ +// Comparison operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0, + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vceqq_f32(V1, V2)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cmpeq_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorEqualR +( + uint32_t* pCR, + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + assert(pCR != nullptr); +#if defined(_XM_NO_INTRINSICS_) + uint32_t ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; + uint32_t uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; + uint32_t uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; + uint32_t uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; + uint32_t CR = 0; + if (ux & uy & uz & uw) + { + // All elements are greater + CR = XM_CRMASK_CR6TRUE; + } + else if (!(ux | uy | uz | uw)) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + + XMVECTORU32 Control = { { { ux, uy, uz, uw } } }; + return Control; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vreinterpret_u8_u32(vget_low_u32(vResult)), vreinterpret_u8_u32(vget_high_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + // All elements are equal + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + // All elements are not equal + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vreinterpretq_f32_u32(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + uint32_t CR = 0; + int iTest = _mm_movemask_ps(vTemp); + if (iTest == 0xf) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +// Treat the components of the vectors as unsigned integers and +// compare individual bits between the two. This is useful for +// comparing control vectors and result vectors returned from +// other comparison operations. + +inline XMVECTOR XM_CALLCONV XMVectorEqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0, + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vceqq_s32(vreinterpretq_s32_f32(V1), vreinterpretq_s32_f32(V2))); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorEqualIntR +( + uint32_t* pCR, + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + assert(pCR != nullptr); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Control = XMVectorEqualInt(V1, V2); + + *pCR = 0; + if (XMVector4EqualInt(Control, XMVectorTrueInt())) + { + // All elements are equal + *pCR |= XM_CRMASK_CR6TRUE; + } + else if (XMVector4EqualInt(Control, XMVectorFalseInt())) + { + // All elements are not equal + *pCR |= XM_CRMASK_CR6FALSE; + } + return Control; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + // All elements are equal + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + // All elements are not equal + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vreinterpretq_f32_u32(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + int iTemp = _mm_movemask_ps(_mm_castsi128_ps(V)); + uint32_t CR = 0; + if (iTemp == 0x0F) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTemp) + { + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorNearEqual +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR Epsilon +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + float fDeltax = V1.vector4_f32[0] - V2.vector4_f32[0]; + float fDeltay = V1.vector4_f32[1] - V2.vector4_f32[1]; + float fDeltaz = V1.vector4_f32[2] - V2.vector4_f32[2]; + float fDeltaw = V1.vector4_f32[3] - V2.vector4_f32[3]; + + fDeltax = fabsf(fDeltax); + fDeltay = fabsf(fDeltay); + fDeltaz = fabsf(fDeltaz); + fDeltaw = fabsf(fDeltaw); + + XMVECTORU32 Control = { { { + (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0, + (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0, + (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0, + (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0, + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vDelta = vsubq_f32(V1, V2); +#ifdef _MSC_VER + return vacleq_f32(vDelta, Epsilon); +#else + return vreinterpretq_f32_u32(vcleq_f32(vabsq_f32(vDelta), Epsilon)); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + // Get the difference + XMVECTOR vDelta = _mm_sub_ps(V1, V2); + // Get the absolute value of the difference + XMVECTOR vTemp = _mm_setzero_ps(); + vTemp = _mm_sub_ps(vTemp, vDelta); + vTemp = _mm_max_ps(vTemp, vDelta); + vTemp = _mm_cmple_ps(vTemp, Epsilon); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorNotEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0, + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(V1, V2))); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cmpneq_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorNotEqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0, + (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0, + (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0, + (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vmvnq_u32( + vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)))); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return _mm_xor_ps(_mm_castsi128_ps(V), g_XMNegOneMask); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorGreater +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vcgtq_f32(V1, V2)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cmpgt_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorGreaterR +( + uint32_t* pCR, + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + assert(pCR != nullptr); +#if defined(_XM_NO_INTRINSICS_) + + uint32_t ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; + uint32_t uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; + uint32_t uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; + uint32_t uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; + uint32_t CR = 0; + if (ux & uy & uz & uw) + { + // All elements are greater + CR = XM_CRMASK_CR6TRUE; + } + else if (!(ux | uy | uz | uw)) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + + XMVECTORU32 Control = { { { ux, uy, uz, uw } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgtq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + // All elements are greater + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vreinterpretq_f32_u32(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + uint32_t CR = 0; + int iTest = _mm_movemask_ps(vTemp); + if (iTest == 0xf) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorGreaterOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vcgeq_f32(V1, V2)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cmpge_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorGreaterOrEqualR +( + uint32_t* pCR, + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + assert(pCR != nullptr); +#if defined(_XM_NO_INTRINSICS_) + + uint32_t ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; + uint32_t uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; + uint32_t uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; + uint32_t uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; + uint32_t CR = 0; + if (ux & uy & uz & uw) + { + // All elements are greater + CR = XM_CRMASK_CR6TRUE; + } + else if (!(ux | uy | uz | uw)) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + + XMVECTORU32 Control = { { { ux, uy, uz, uw } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgeq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + // All elements are greater or equal + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + // All elements are not greater or equal + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vreinterpretq_f32_u32(vResult); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + uint32_t CR = 0; + int iTest = _mm_movemask_ps(vTemp); + if (iTest == 0xf) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + // All elements are not greater + CR = XM_CRMASK_CR6FALSE; + } + *pCR = CR; + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLess +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vcltq_f32(V1, V2)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cmplt_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLessOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vcleq_f32(V1, V2)); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_cmple_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorInBounds +( + FXMVECTOR V, + FXMVECTOR Bounds +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0, + (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0, + (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0, + (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test if less than or equal + uint32x4_t vTemp1 = vcleq_f32(V, Bounds); + // Negate the bounds + uint32x4_t vTemp2 = vreinterpretq_u32_f32(vnegq_f32(Bounds)); + // Test if greater or equal (Reversed) + vTemp2 = vcleq_f32(vreinterpretq_f32_u32(vTemp2), V); + // Blend answers + vTemp1 = vandq_u32(vTemp1, vTemp2); + return vreinterpretq_f32_u32(vTemp1); +#elif defined(_XM_SSE_INTRINSICS_) + // Test if less than or equal + XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); + // Negate the bounds + XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); + // Test if greater or equal (Reversed) + vTemp2 = _mm_cmple_ps(vTemp2, V); + // Blend answers + vTemp1 = _mm_and_ps(vTemp1, vTemp2); + return vTemp1; +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMVectorInBoundsR +( + uint32_t* pCR, + FXMVECTOR V, + FXMVECTOR Bounds +) noexcept +{ + assert(pCR != nullptr); +#if defined(_XM_NO_INTRINSICS_) + + uint32_t ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0; + uint32_t uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0; + uint32_t uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0; + uint32_t uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0; + + uint32_t CR = 0; + if (ux & uy & uz & uw) + { + // All elements are in bounds + CR = XM_CRMASK_CR6BOUNDS; + } + *pCR = CR; + + XMVECTORU32 Control = { { { ux, uy, uz, uw } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test if less than or equal + uint32x4_t vTemp1 = vcleq_f32(V, Bounds); + // Negate the bounds + uint32x4_t vTemp2 = vreinterpretq_u32_f32(vnegq_f32(Bounds)); + // Test if greater or equal (Reversed) + vTemp2 = vcleq_f32(vreinterpretq_f32_u32(vTemp2), V); + // Blend answers + vTemp1 = vandq_u32(vTemp1, vTemp2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTemp1)), vget_high_u8(vreinterpretq_u8_u32(vTemp1))); + uint16x4x2_t vTemp3 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp3.val[1]), 1); + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + // All elements are in bounds + CR = XM_CRMASK_CR6BOUNDS; + } + *pCR = CR; + return vreinterpretq_f32_u32(vTemp1); +#elif defined(_XM_SSE_INTRINSICS_) + // Test if less than or equal + XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); + // Negate the bounds + XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); + // Test if greater or equal (Reversed) + vTemp2 = _mm_cmple_ps(vTemp2, V); + // Blend answers + vTemp1 = _mm_and_ps(vTemp1, vTemp2); + + uint32_t CR = 0; + if (_mm_movemask_ps(vTemp1) == 0xf) + { + // All elements are in bounds + CR = XM_CRMASK_CR6BOUNDS; + } + *pCR = CR; + return vTemp1; +#endif +} + +//------------------------------------------------------------------------------ + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(push) +#pragma float_control(precise, on) +#endif + +inline XMVECTOR XM_CALLCONV XMVectorIsNaN(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0, + XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0, + XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0, + XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test against itself. NaN is always not equal + uint32x4_t vTempNan = vceqq_f32(V, V); + // Flip results + return vreinterpretq_f32_u32(vmvnq_u32(vTempNan)); +#elif defined(_XM_SSE_INTRINSICS_) + // Test against itself. NaN is always not equal + return _mm_cmpneq_ps(V, V); +#endif +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(pop) +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorIsInfinite(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Control = { { { + XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0, + XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0, + XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0, + XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0 + } } }; + return Control.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Mask off the sign bit + uint32x4_t vTemp = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + // Compare to infinity + vTemp = vceqq_f32(vreinterpretq_f32_u32(vTemp), g_XMInfinity); + // If any are infinity, the signs are true. + return vreinterpretq_f32_u32(vTemp); +#elif defined(_XM_SSE_INTRINSICS_) + // Mask off the sign bit + __m128 vTemp = _mm_and_ps(V, g_XMAbsMask); + // Compare to infinity + vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); + // If any are infinity, the signs are true. + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +// Rounding and clamping operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMin +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0], + (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1], + (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2], + (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vminq_f32(V1, V2); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_min_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMax +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0], + (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1], + (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2], + (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vmaxq_f32(V1, V2); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_max_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +namespace Internal +{ + // Round to nearest (even) a.k.a. banker's rounding + inline float round_to_nearest(float x) noexcept + { + float i = floorf(x); + x -= i; + if (x < 0.5f) + return i; + if (x > 0.5f) + return i + 1.f; + + float int_part; + (void)modff(i / 2.f, &int_part); + if ((2.f * int_part) == i) + { + return i; + } + + return i + 1.f; + } +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(push) +#pragma float_control(precise, on) +#endif + +inline XMVECTOR XM_CALLCONV XMVectorRound(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + Internal::round_to_nearest(V.vector4_f32[0]), + Internal::round_to_nearest(V.vector4_f32[1]), + Internal::round_to_nearest(V.vector4_f32[2]), + Internal::round_to_nearest(V.vector4_f32[3]) + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vrndnq_f32(V); +#else + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(V), g_XMNegativeZero); + float32x4_t sMagic = vreinterpretq_f32_u32(vorrq_u32(g_XMNoFraction, sign)); + float32x4_t R1 = vaddq_f32(V, sMagic); + R1 = vsubq_f32(R1, sMagic); + float32x4_t R2 = vabsq_f32(V); + uint32x4_t mask = vcleq_f32(R2, g_XMNoFraction); + return vbslq_f32(mask, R1, V); +#endif +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_round_ps(V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +#elif defined(_XM_SSE_INTRINSICS_) + __m128 sign = _mm_and_ps(V, g_XMNegativeZero); + __m128 sMagic = _mm_or_ps(g_XMNoFraction, sign); + __m128 R1 = _mm_add_ps(V, sMagic); + R1 = _mm_sub_ps(R1, sMagic); + __m128 R2 = _mm_and_ps(V, g_XMAbsMask); + __m128 mask = _mm_cmple_ps(R2, g_XMNoFraction); + R2 = _mm_andnot_ps(mask, V); + R1 = _mm_and_ps(R1, mask); + XMVECTOR vResult = _mm_xor_ps(R1, R2); + return vResult; +#endif +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(pop) +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorTruncate(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR Result; + uint32_t i; + + // Avoid C4701 + Result.vector4_f32[0] = 0.0f; + + for (i = 0; i < 4; i++) + { + if (XMISNAN(V.vector4_f32[i])) + { + Result.vector4_u32[i] = 0x7FC00000; + } + else if (fabsf(V.vector4_f32[i]) < 8388608.0f) + { + Result.vector4_f32[i] = static_cast(static_cast(V.vector4_f32[i])); + } + else + { + Result.vector4_f32[i] = V.vector4_f32[i]; + } + } + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vrndq_f32(V); +#else + float32x4_t vTest = vabsq_f32(V); + vTest = vreinterpretq_f32_u32(vcltq_f32(vTest, g_XMNoFraction)); + + int32x4_t vInt = vcvtq_s32_f32(V); + float32x4_t vResult = vcvtq_f32_s32(vInt); + + // All numbers less than 8388608 will use the round to int + // All others, use the ORIGINAL value + return vbslq_f32(vreinterpretq_u32_f32(vTest), vResult, V); +#endif +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_round_ps(V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#elif defined(_XM_SSE_INTRINSICS_) + // To handle NAN, INF and numbers greater than 8388608, use masking + // Get the abs value + __m128i vTest = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF + vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction); + // Convert to int and back to float for rounding with truncation + __m128i vInt = _mm_cvttps_epi32(V); + // Convert back to floats + XMVECTOR vResult = _mm_cvtepi32_ps(vInt); + // All numbers less than 8388608 will use the round to int + vResult = _mm_and_ps(vResult, _mm_castsi128_ps(vTest)); + // All others, use the ORIGINAL value + vTest = _mm_andnot_si128(vTest, _mm_castps_si128(V)); + vResult = _mm_or_ps(vResult, _mm_castsi128_ps(vTest)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorFloor(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + floorf(V.vector4_f32[0]), + floorf(V.vector4_f32[1]), + floorf(V.vector4_f32[2]), + floorf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vrndmq_f32(V); +#else + float32x4_t vTest = vabsq_f32(V); + vTest = vreinterpretq_f32_u32(vcltq_f32(vTest, g_XMNoFraction)); + // Truncate + int32x4_t vInt = vcvtq_s32_f32(V); + float32x4_t vResult = vcvtq_f32_s32(vInt); + uint32x4_t vLargerMask = vcgtq_f32(vResult, V); + // 0 -> 0, 0xffffffff -> -1.0f + float32x4_t vLarger = vcvtq_f32_s32(vreinterpretq_s32_u32(vLargerMask)); + vResult = vaddq_f32(vResult, vLarger); + // All numbers less than 8388608 will use the round to int + // All others, use the ORIGINAL value + return vbslq_f32(vreinterpretq_u32_f32(vTest), vResult, V); +#endif +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_floor_ps(V); +#elif defined(_XM_SSE_INTRINSICS_) + // To handle NAN, INF and numbers greater than 8388608, use masking + __m128i vTest = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction); + // Truncate + __m128i vInt = _mm_cvttps_epi32(V); + XMVECTOR vResult = _mm_cvtepi32_ps(vInt); + __m128 vLarger = _mm_cmpgt_ps(vResult, V); + // 0 -> 0, 0xffffffff -> -1.0f + vLarger = _mm_cvtepi32_ps(_mm_castps_si128(vLarger)); + vResult = _mm_add_ps(vResult, vLarger); + // All numbers less than 8388608 will use the round to int + vResult = _mm_and_ps(vResult, _mm_castsi128_ps(vTest)); + // All others, use the ORIGINAL value + vTest = _mm_andnot_si128(vTest, _mm_castps_si128(V)); + vResult = _mm_or_ps(vResult, _mm_castsi128_ps(vTest)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorCeiling(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + ceilf(V.vector4_f32[0]), + ceilf(V.vector4_f32[1]), + ceilf(V.vector4_f32[2]), + ceilf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vrndpq_f32(V); +#else + float32x4_t vTest = vabsq_f32(V); + vTest = vreinterpretq_f32_u32(vcltq_f32(vTest, g_XMNoFraction)); + // Truncate + int32x4_t vInt = vcvtq_s32_f32(V); + float32x4_t vResult = vcvtq_f32_s32(vInt); + uint32x4_t vSmallerMask = vcltq_f32(vResult, V); + // 0 -> 0, 0xffffffff -> -1.0f + float32x4_t vSmaller = vcvtq_f32_s32(vreinterpretq_s32_u32(vSmallerMask)); + vResult = vsubq_f32(vResult, vSmaller); + // All numbers less than 8388608 will use the round to int + // All others, use the ORIGINAL value + return vbslq_f32(vreinterpretq_u32_f32(vTest), vResult, V); +#endif +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_ceil_ps(V); +#elif defined(_XM_SSE_INTRINSICS_) + // To handle NAN, INF and numbers greater than 8388608, use masking + __m128i vTest = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction); + // Truncate + __m128i vInt = _mm_cvttps_epi32(V); + XMVECTOR vResult = _mm_cvtepi32_ps(vInt); + __m128 vSmaller = _mm_cmplt_ps(vResult, V); + // 0 -> 0, 0xffffffff -> -1.0f + vSmaller = _mm_cvtepi32_ps(_mm_castps_si128(vSmaller)); + vResult = _mm_sub_ps(vResult, vSmaller); + // All numbers less than 8388608 will use the round to int + vResult = _mm_and_ps(vResult, _mm_castsi128_ps(vTest)); + // All others, use the ORIGINAL value + vTest = _mm_andnot_si128(vTest, _mm_castps_si128(V)); + vResult = _mm_or_ps(vResult, _mm_castsi128_ps(vTest)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorClamp +( + FXMVECTOR V, + FXMVECTOR Min, + FXMVECTOR Max +) noexcept +{ + assert(XMVector4LessOrEqual(Min, Max)); + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVectorMax(Min, V); + Result = XMVectorMin(Max, Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmaxq_f32(Min, V); + vResult = vminq_f32(Max, vResult); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult; + vResult = _mm_max_ps(Min, V); + vResult = _mm_min_ps(Max, vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSaturate(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + const XMVECTOR Zero = XMVectorZero(); + + return XMVectorClamp(V, Zero, g_XMOne.v); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Set <0 to 0 + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); + // Set>1 to 1 + return vminq_f32(vResult, vdupq_n_f32(1.0f)); +#elif defined(_XM_SSE_INTRINSICS_) + // Set <0 to 0 + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + // Set>1 to 1 + return _mm_min_ps(vResult, g_XMOne); +#endif +} + +//------------------------------------------------------------------------------ +// Bitwise logical operations +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorAndInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + V1.vector4_u32[0] & V2.vector4_u32[0], + V1.vector4_u32[1] & V2.vector4_u32[1], + V1.vector4_u32[2] & V2.vector4_u32[2], + V1.vector4_u32[3] & V2.vector4_u32[3] + } } }; + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_and_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorAndCInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + V1.vector4_u32[0] & ~V2.vector4_u32[0], + V1.vector4_u32[1] & ~V2.vector4_u32[1], + V1.vector4_u32[2] & ~V2.vector4_u32[2], + V1.vector4_u32[3] & ~V2.vector4_u32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_andnot_si128(_mm_castps_si128(V2), _mm_castps_si128(V1)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorOrInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + V1.vector4_u32[0] | V2.vector4_u32[0], + V1.vector4_u32[1] | V2.vector4_u32[1], + V1.vector4_u32[2] | V2.vector4_u32[2], + V1.vector4_u32[3] | V2.vector4_u32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_or_si128(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorNorInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + ~(V1.vector4_u32[0] | V2.vector4_u32[0]), + ~(V1.vector4_u32[1] | V2.vector4_u32[1]), + ~(V1.vector4_u32[2] | V2.vector4_u32[2]), + ~(V1.vector4_u32[3] | V2.vector4_u32[3]) + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t Result = vorrq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + return vreinterpretq_f32_u32(vbicq_u32(g_XMNegOneMask, Result)); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i Result; + Result = _mm_or_si128(_mm_castps_si128(V1), _mm_castps_si128(V2)); + Result = _mm_andnot_si128(Result, g_XMNegOneMask); + return _mm_castsi128_ps(Result); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorXorInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORU32 Result = { { { + V1.vector4_u32[0] ^ V2.vector4_u32[0], + V1.vector4_u32[1] ^ V2.vector4_u32[1], + V1.vector4_u32[2] ^ V2.vector4_u32[2], + V1.vector4_u32[3] ^ V2.vector4_u32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i V = _mm_xor_si128(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return _mm_castsi128_ps(V); +#endif +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorNegate(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + -V.vector4_f32[0], + -V.vector4_f32[1], + -V.vector4_f32[2], + -V.vector4_f32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vnegq_f32(V); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR Z; + + Z = _mm_setzero_ps(); + + return _mm_sub_ps(Z, V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorAdd +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + V1.vector4_f32[0] + V2.vector4_f32[0], + V1.vector4_f32[1] + V2.vector4_f32[1], + V1.vector4_f32[2] + V2.vector4_f32[2], + V1.vector4_f32[3] + V2.vector4_f32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vaddq_f32(V1, V2); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_add_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSum(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result; + Result.f[0] = + Result.f[1] = + Result.f[2] = + Result.f[3] = V.vector4_f32[0] + V.vector4_f32[1] + V.vector4_f32[2] + V.vector4_f32[3]; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + float32x4_t vTemp = vpaddq_f32(V, V); + return vpaddq_f32(vTemp, vTemp); +#else + float32x2_t v1 = vget_low_f32(V); + float32x2_t v2 = vget_high_f32(V); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + return vcombine_f32(v1, v1); +#endif +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vTemp = _mm_hadd_ps(V, V); + return _mm_hadd_ps(vTemp, vTemp); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 3, 0, 1)); + XMVECTOR vTemp2 = _mm_add_ps(V, vTemp); + vTemp = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 0, 3, 2)); + return _mm_add_ps(vTemp, vTemp2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorAddAngles +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + const XMVECTOR Zero = XMVectorZero(); + + // Add the given angles together. If the range of V1 is such + // that -Pi <= V1 < Pi and the range of V2 is such that + // -2Pi <= V2 <= 2Pi, then the range of the resulting angle + // will be -Pi <= Result < Pi. + XMVECTOR Result = XMVectorAdd(V1, V2); + + XMVECTOR Mask = XMVectorLess(Result, g_XMNegativePi.v); + XMVECTOR Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); + + Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); + Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); + + Result = XMVectorAdd(Result, Offset); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Adjust the angles + float32x4_t vResult = vaddq_f32(V1, V2); + // Less than Pi? + uint32x4_t vOffset = vcltq_f32(vResult, g_XMNegativePi); + vOffset = vandq_u32(vOffset, g_XMTwoPi); + // Add 2Pi to all entries less than -Pi + vResult = vaddq_f32(vResult, vreinterpretq_f32_u32(vOffset)); + // Greater than or equal to Pi? + vOffset = vcgeq_f32(vResult, g_XMPi); + vOffset = vandq_u32(vOffset, g_XMTwoPi); + // Sub 2Pi to all entries greater than Pi + vResult = vsubq_f32(vResult, vreinterpretq_f32_u32(vOffset)); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Adjust the angles + XMVECTOR vResult = _mm_add_ps(V1, V2); + // Less than Pi? + XMVECTOR vOffset = _mm_cmplt_ps(vResult, g_XMNegativePi); + vOffset = _mm_and_ps(vOffset, g_XMTwoPi); + // Add 2Pi to all entries less than -Pi + vResult = _mm_add_ps(vResult, vOffset); + // Greater than or equal to Pi? + vOffset = _mm_cmpge_ps(vResult, g_XMPi); + vOffset = _mm_and_ps(vOffset, g_XMTwoPi); + // Sub 2Pi to all entries greater than Pi + vResult = _mm_sub_ps(vResult, vOffset); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSubtract +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + V1.vector4_f32[0] - V2.vector4_f32[0], + V1.vector4_f32[1] - V2.vector4_f32[1], + V1.vector4_f32[2] - V2.vector4_f32[2], + V1.vector4_f32[3] - V2.vector4_f32[3] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vsubq_f32(V1, V2); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_sub_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSubtractAngles +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + const XMVECTOR Zero = XMVectorZero(); + + // Subtract the given angles. If the range of V1 is such + // that -Pi <= V1 < Pi and the range of V2 is such that + // -2Pi <= V2 <= 2Pi, then the range of the resulting angle + // will be -Pi <= Result < Pi. + XMVECTOR Result = XMVectorSubtract(V1, V2); + + XMVECTOR Mask = XMVectorLess(Result, g_XMNegativePi.v); + XMVECTOR Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); + + Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); + Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); + + Result = XMVectorAdd(Result, Offset); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Adjust the angles + XMVECTOR vResult = vsubq_f32(V1, V2); + // Less than Pi? + uint32x4_t vOffset = vcltq_f32(vResult, g_XMNegativePi); + vOffset = vandq_u32(vOffset, g_XMTwoPi); + // Add 2Pi to all entries less than -Pi + vResult = vaddq_f32(vResult, vreinterpretq_f32_u32(vOffset)); + // Greater than or equal to Pi? + vOffset = vcgeq_f32(vResult, g_XMPi); + vOffset = vandq_u32(vOffset, g_XMTwoPi); + // Sub 2Pi to all entries greater than Pi + vResult = vsubq_f32(vResult, vreinterpretq_f32_u32(vOffset)); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Adjust the angles + XMVECTOR vResult = _mm_sub_ps(V1, V2); + // Less than Pi? + XMVECTOR vOffset = _mm_cmplt_ps(vResult, g_XMNegativePi); + vOffset = _mm_and_ps(vOffset, g_XMTwoPi); + // Add 2Pi to all entries less than -Pi + vResult = _mm_add_ps(vResult, vOffset); + // Greater than or equal to Pi? + vOffset = _mm_cmpge_ps(vResult, g_XMPi); + vOffset = _mm_and_ps(vOffset, g_XMTwoPi); + // Sub 2Pi to all entries greater than Pi + vResult = _mm_sub_ps(vResult, vOffset); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMultiply +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + V1.vector4_f32[0] * V2.vector4_f32[0], + V1.vector4_f32[1] * V2.vector4_f32[1], + V1.vector4_f32[2] * V2.vector4_f32[2], + V1.vector4_f32[3] * V2.vector4_f32[3] + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vmulq_f32(V1, V2); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_mul_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + V1.vector4_f32[0] * V2.vector4_f32[0] + V3.vector4_f32[0], + V1.vector4_f32[1] * V2.vector4_f32[1] + V3.vector4_f32[1], + V1.vector4_f32[2] * V2.vector4_f32[2] + V3.vector4_f32[2], + V1.vector4_f32[3] * V2.vector4_f32[3] + V3.vector4_f32[3] + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vfmaq_f32(V3, V1, V2); +#else + return vmlaq_f32(V3, V1, V2); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + return XM_FMADD_PS(V1, V2, V3); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorDivide +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + V1.vector4_f32[0] / V2.vector4_f32[0], + V1.vector4_f32[1] / V2.vector4_f32[1], + V1.vector4_f32[2] / V2.vector4_f32[2], + V1.vector4_f32[3] / V2.vector4_f32[3] + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vdivq_f32(V1, V2); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x4_t Reciprocal = vrecpeq_f32(V2); + float32x4_t S = vrecpsq_f32(Reciprocal, V2); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, V2); + Reciprocal = vmulq_f32(S, Reciprocal); + return vmulq_f32(V1, Reciprocal); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_div_ps(V1, V2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]), + V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]), + V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]), + V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3]) + } } }; + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + return vfmsq_f32(V3, V1, V2); +#else + return vmlsq_f32(V3, V1, V2); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + return XM_FNMADD_PS(V1, V2, V3); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorScale +( + FXMVECTOR V, + float ScaleFactor +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + V.vector4_f32[0] * ScaleFactor, + V.vector4_f32[1] * ScaleFactor, + V.vector4_f32[2] * ScaleFactor, + V.vector4_f32[3] * ScaleFactor + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vmulq_n_f32(V, ScaleFactor); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_set_ps1(ScaleFactor); + return _mm_mul_ps(vResult, V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorReciprocalEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + 1.f / V.vector4_f32[0], + 1.f / V.vector4_f32[1], + 1.f / V.vector4_f32[2], + 1.f / V.vector4_f32[3] + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vrecpeq_f32(V); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_rcp_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorReciprocal(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + 1.f / V.vector4_f32[0], + 1.f / V.vector4_f32[1], + 1.f / V.vector4_f32[2], + 1.f / V.vector4_f32[3] + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + float32x4_t one = vdupq_n_f32(1.0f); + return vdivq_f32(one, V); +#else + // 2 iterations of Newton-Raphson refinement + float32x4_t Reciprocal = vrecpeq_f32(V); + float32x4_t S = vrecpsq_f32(Reciprocal, V); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, V); + return vmulq_f32(S, Reciprocal); +#endif +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_div_ps(g_XMOne, V); +#endif +} + +//------------------------------------------------------------------------------ +// Return an estimated square root +inline XMVECTOR XM_CALLCONV XMVectorSqrtEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + sqrtf(V.vector4_f32[0]), + sqrtf(V.vector4_f32[1]), + sqrtf(V.vector4_f32[2]), + sqrtf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // 1 iteration of Newton-Raphson refinment of sqrt + float32x4_t S0 = vrsqrteq_f32(V); + float32x4_t P0 = vmulq_f32(V, S0); + float32x4_t R0 = vrsqrtsq_f32(P0, S0); + float32x4_t S1 = vmulq_f32(S0, R0); + + XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); + XMVECTOR VEqualsZero = XMVectorEqual(V, vdupq_n_f32(0)); + XMVECTOR Result = vmulq_f32(V, S1); + XMVECTOR Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); + return XMVectorSelect(V, Result, Select); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_sqrt_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSqrt(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + sqrtf(V.vector4_f32[0]), + sqrtf(V.vector4_f32[1]), + sqrtf(V.vector4_f32[2]), + sqrtf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // 3 iterations of Newton-Raphson refinment of sqrt + float32x4_t S0 = vrsqrteq_f32(V); + float32x4_t P0 = vmulq_f32(V, S0); + float32x4_t R0 = vrsqrtsq_f32(P0, S0); + float32x4_t S1 = vmulq_f32(S0, R0); + float32x4_t P1 = vmulq_f32(V, S1); + float32x4_t R1 = vrsqrtsq_f32(P1, S1); + float32x4_t S2 = vmulq_f32(S1, R1); + float32x4_t P2 = vmulq_f32(V, S2); + float32x4_t R2 = vrsqrtsq_f32(P2, S2); + float32x4_t S3 = vmulq_f32(S2, R2); + + XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); + XMVECTOR VEqualsZero = XMVectorEqual(V, vdupq_n_f32(0)); + XMVECTOR Result = vmulq_f32(V, S3); + XMVECTOR Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); + return XMVectorSelect(V, Result, Select); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_sqrt_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorReciprocalSqrtEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + 1.f / sqrtf(V.vector4_f32[0]), + 1.f / sqrtf(V.vector4_f32[1]), + 1.f / sqrtf(V.vector4_f32[2]), + 1.f / sqrtf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vrsqrteq_f32(V); +#elif defined(_XM_SSE_INTRINSICS_) + return _mm_rsqrt_ps(V); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorReciprocalSqrt(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + 1.f / sqrtf(V.vector4_f32[0]), + 1.f / sqrtf(V.vector4_f32[1]), + 1.f / sqrtf(V.vector4_f32[2]), + 1.f / sqrtf(V.vector4_f32[3]) + } } }; + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x4_t S0 = vrsqrteq_f32(V); + + float32x4_t P0 = vmulq_f32(V, S0); + float32x4_t R0 = vrsqrtsq_f32(P0, S0); + + float32x4_t S1 = vmulq_f32(S0, R0); + float32x4_t P1 = vmulq_f32(V, S1); + float32x4_t R1 = vrsqrtsq_f32(P1, S1); + + return vmulq_f32(S1, R1); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_sqrt_ps(V); + vResult = _mm_div_ps(g_XMOne, vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + exp2f(V.vector4_f32[0]), + exp2f(V.vector4_f32[1]), + exp2f(V.vector4_f32[2]), + exp2f(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t itrunc = vcvtq_s32_f32(V); + float32x4_t ftrunc = vcvtq_f32_s32(itrunc); + float32x4_t y = vsubq_f32(V, ftrunc); + + float32x4_t poly = vmlaq_f32(g_XMExpEst6, g_XMExpEst7, y); + poly = vmlaq_f32(g_XMExpEst5, poly, y); + poly = vmlaq_f32(g_XMExpEst4, poly, y); + poly = vmlaq_f32(g_XMExpEst3, poly, y); + poly = vmlaq_f32(g_XMExpEst2, poly, y); + poly = vmlaq_f32(g_XMExpEst1, poly, y); + poly = vmlaq_f32(g_XMOne, poly, y); + + int32x4_t biased = vaddq_s32(itrunc, g_XMExponentBias); + biased = vshlq_n_s32(biased, 23); + float32x4_t result0 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly); + + biased = vaddq_s32(itrunc, g_XM253); + biased = vshlq_n_s32(biased, 23); + float32x4_t result1 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly); + result1 = vmulq_f32(g_XMMinNormal.v, result1); + + // Use selection to handle the cases + // if (V is NaN) -> QNaN; + // else if (V sign bit set) + // if (V > -150) + // if (V.exponent < -126) -> result1 + // else -> result0 + // else -> +0 + // else + // if (V < 128) -> result0 + // else -> +inf + + uint32x4_t comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBin128); + float32x4_t result2 = vbslq_f32(comp, result0, g_XMInfinity); + + comp = vcltq_s32(itrunc, g_XMSubnormalExponent); + float32x4_t result3 = vbslq_f32(comp, result1, result0); + + comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBinNeg150); + float32x4_t result4 = vbslq_f32(comp, result3, g_XMZero); + + int32x4_t sign = vandq_s32(vreinterpretq_s32_f32(V), g_XMNegativeZero); + comp = vceqq_s32(sign, g_XMNegativeZero); + float32x4_t result5 = vbslq_f32(comp, result4, result2); + + int32x4_t t0 = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); + int32x4_t t1 = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); + t0 = vreinterpretq_s32_u32(vceqq_s32(t0, g_XMZero)); + t1 = vreinterpretq_s32_u32(vceqq_s32(t1, g_XMInfinity)); + int32x4_t isNaN = vbicq_s32(t1, t0); + + float32x4_t vResult = vbslq_f32(vreinterpretq_u32_s32(isNaN), g_XMQNaN, result5); + return vResult; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_exp2_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i itrunc = _mm_cvttps_epi32(V); + __m128 ftrunc = _mm_cvtepi32_ps(itrunc); + __m128 y = _mm_sub_ps(V, ftrunc); + + __m128 poly = XM_FMADD_PS(g_XMExpEst7, y, g_XMExpEst6); + poly = XM_FMADD_PS(poly, y, g_XMExpEst5); + poly = XM_FMADD_PS(poly, y, g_XMExpEst4); + poly = XM_FMADD_PS(poly, y, g_XMExpEst3); + poly = XM_FMADD_PS(poly, y, g_XMExpEst2); + poly = XM_FMADD_PS(poly, y, g_XMExpEst1); + poly = XM_FMADD_PS(poly, y, g_XMOne); + + __m128i biased = _mm_add_epi32(itrunc, g_XMExponentBias); + biased = _mm_slli_epi32(biased, 23); + __m128 result0 = _mm_div_ps(_mm_castsi128_ps(biased), poly); + + biased = _mm_add_epi32(itrunc, g_XM253); + biased = _mm_slli_epi32(biased, 23); + __m128 result1 = _mm_div_ps(_mm_castsi128_ps(biased), poly); + result1 = _mm_mul_ps(g_XMMinNormal.v, result1); + + // Use selection to handle the cases + // if (V is NaN) -> QNaN; + // else if (V sign bit set) + // if (V > -150) + // if (V.exponent < -126) -> result1 + // else -> result0 + // else -> +0 + // else + // if (V < 128) -> result0 + // else -> +inf + + __m128i comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBin128); + __m128i select0 = _mm_and_si128(comp, _mm_castps_si128(result0)); + __m128i select1 = _mm_andnot_si128(comp, g_XMInfinity); + __m128i result2 = _mm_or_si128(select0, select1); + + comp = _mm_cmplt_epi32(itrunc, g_XMSubnormalExponent); + select1 = _mm_and_si128(comp, _mm_castps_si128(result1)); + select0 = _mm_andnot_si128(comp, _mm_castps_si128(result0)); + __m128i result3 = _mm_or_si128(select0, select1); + + comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBinNeg150); + select0 = _mm_and_si128(comp, result3); + select1 = _mm_andnot_si128(comp, g_XMZero); + __m128i result4 = _mm_or_si128(select0, select1); + + __m128i sign = _mm_and_si128(_mm_castps_si128(V), g_XMNegativeZero); + comp = _mm_cmpeq_epi32(sign, g_XMNegativeZero); + select0 = _mm_and_si128(comp, result4); + select1 = _mm_andnot_si128(comp, result2); + __m128i result5 = _mm_or_si128(select0, select1); + + __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + t0 = _mm_cmpeq_epi32(t0, g_XMZero); + t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); + __m128i isNaN = _mm_andnot_si128(t0, t1); + + select0 = _mm_and_si128(isNaN, g_XMQNaN); + select1 = _mm_andnot_si128(isNaN, result5); + __m128i vResult = _mm_or_si128(select0, select1); + + return _mm_castsi128_ps(vResult); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorExp10(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + powf(10.0f, V.vector4_f32[0]), + powf(10.0f, V.vector4_f32[1]), + powf(10.0f, V.vector4_f32[2]), + powf(10.0f, V.vector4_f32[3]) + } } }; + return Result.v; + +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_exp10_ps(V); + return Result; +#else + // exp10(V) = exp2(vin*log2(10)) + XMVECTOR Vten = XMVectorMultiply(g_XMLg10, V); + return XMVectorExp2(Vten); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorExpE(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + expf(V.vector4_f32[0]), + expf(V.vector4_f32[1]), + expf(V.vector4_f32[2]), + expf(V.vector4_f32[3]) + } } }; + return Result.v; + +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_exp_ps(V); + return Result; +#else + // expE(V) = exp2(vin*log2(e)) + XMVECTOR Ve = XMVectorMultiply(g_XMLgE, V); + return XMVectorExp2(Ve); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorExp(FXMVECTOR V) noexcept +{ + return XMVectorExp2(V); +} + +//------------------------------------------------------------------------------ + +#if defined(_XM_SSE_INTRINSICS_) + +namespace Internal +{ + inline __m128i multi_sll_epi32(__m128i value, __m128i count) noexcept + { + __m128i v = _mm_shuffle_epi32(value, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i c = _mm_shuffle_epi32(count, _MM_SHUFFLE(0, 0, 0, 0)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r0 = _mm_sll_epi32(v, c); + + v = _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)); + c = _mm_shuffle_epi32(count, _MM_SHUFFLE(1, 1, 1, 1)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r1 = _mm_sll_epi32(v, c); + + v = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 2, 2, 2)); + c = _mm_shuffle_epi32(count, _MM_SHUFFLE(2, 2, 2, 2)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r2 = _mm_sll_epi32(v, c); + + v = _mm_shuffle_epi32(value, _MM_SHUFFLE(3, 3, 3, 3)); + c = _mm_shuffle_epi32(count, _MM_SHUFFLE(3, 3, 3, 3)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r3 = _mm_sll_epi32(v, c); + + // (r0,r0,r1,r1) + __m128 r01 = _mm_shuffle_ps(_mm_castsi128_ps(r0), _mm_castsi128_ps(r1), _MM_SHUFFLE(0, 0, 0, 0)); + // (r2,r2,r3,r3) + __m128 r23 = _mm_shuffle_ps(_mm_castsi128_ps(r2), _mm_castsi128_ps(r3), _MM_SHUFFLE(0, 0, 0, 0)); + // (r0,r1,r2,r3) + __m128 result = _mm_shuffle_ps(r01, r23, _MM_SHUFFLE(2, 0, 2, 0)); + return _mm_castps_si128(result); + } + + inline __m128i multi_srl_epi32(__m128i value, __m128i count) noexcept + { + __m128i v = _mm_shuffle_epi32(value, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i c = _mm_shuffle_epi32(count, _MM_SHUFFLE(0, 0, 0, 0)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r0 = _mm_srl_epi32(v, c); + + v = _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)); + c = _mm_shuffle_epi32(count, _MM_SHUFFLE(1, 1, 1, 1)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r1 = _mm_srl_epi32(v, c); + + v = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 2, 2, 2)); + c = _mm_shuffle_epi32(count, _MM_SHUFFLE(2, 2, 2, 2)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r2 = _mm_srl_epi32(v, c); + + v = _mm_shuffle_epi32(value, _MM_SHUFFLE(3, 3, 3, 3)); + c = _mm_shuffle_epi32(count, _MM_SHUFFLE(3, 3, 3, 3)); + c = _mm_and_si128(c, g_XMMaskX); + __m128i r3 = _mm_srl_epi32(v, c); + + // (r0,r0,r1,r1) + __m128 r01 = _mm_shuffle_ps(_mm_castsi128_ps(r0), _mm_castsi128_ps(r1), _MM_SHUFFLE(0, 0, 0, 0)); + // (r2,r2,r3,r3) + __m128 r23 = _mm_shuffle_ps(_mm_castsi128_ps(r2), _mm_castsi128_ps(r3), _MM_SHUFFLE(0, 0, 0, 0)); + // (r0,r1,r2,r3) + __m128 result = _mm_shuffle_ps(r01, r23, _MM_SHUFFLE(2, 0, 2, 0)); + return _mm_castps_si128(result); + } + + inline __m128i GetLeadingBit(const __m128i value) noexcept + { + static const XMVECTORI32 g_XM0000FFFF = { { { 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF } } }; + static const XMVECTORI32 g_XM000000FF = { { { 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF } } }; + static const XMVECTORI32 g_XM0000000F = { { { 0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F } } }; + static const XMVECTORI32 g_XM00000003 = { { { 0x00000003, 0x00000003, 0x00000003, 0x00000003 } } }; + + __m128i v = value, r, c, b, s; + + c = _mm_cmpgt_epi32(v, g_XM0000FFFF); // c = (v > 0xFFFF) + b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) + r = _mm_slli_epi32(b, 4); // r = (b << 4) + v = multi_srl_epi32(v, r); // v = (v >> r) + + c = _mm_cmpgt_epi32(v, g_XM000000FF); // c = (v > 0xFF) + b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) + s = _mm_slli_epi32(b, 3); // s = (b << 3) + v = multi_srl_epi32(v, s); // v = (v >> s) + r = _mm_or_si128(r, s); // r = (r | s) + + c = _mm_cmpgt_epi32(v, g_XM0000000F); // c = (v > 0xF) + b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) + s = _mm_slli_epi32(b, 2); // s = (b << 2) + v = multi_srl_epi32(v, s); // v = (v >> s) + r = _mm_or_si128(r, s); // r = (r | s) + + c = _mm_cmpgt_epi32(v, g_XM00000003); // c = (v > 0x3) + b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) + s = _mm_slli_epi32(b, 1); // s = (b << 1) + v = multi_srl_epi32(v, s); // v = (v >> s) + r = _mm_or_si128(r, s); // r = (r | s) + + s = _mm_srli_epi32(v, 1); + r = _mm_or_si128(r, s); + return r; + } +} // namespace Internal + +#endif // _XM_SSE_INTRINSICS_ + +#if defined(_XM_ARM_NEON_INTRINSICS_) + +namespace Internal +{ + inline int32x4_t GetLeadingBit(const int32x4_t value) noexcept + { + static const XMVECTORI32 g_XM0000FFFF = { { { 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF } } }; + static const XMVECTORI32 g_XM000000FF = { { { 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF } } }; + static const XMVECTORI32 g_XM0000000F = { { { 0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F } } }; + static const XMVECTORI32 g_XM00000003 = { { { 0x00000003, 0x00000003, 0x00000003, 0x00000003 } } }; + + uint32x4_t c = vcgtq_s32(value, g_XM0000FFFF); // c = (v > 0xFFFF) + int32x4_t b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) + int32x4_t r = vshlq_n_s32(b, 4); // r = (b << 4) + r = vnegq_s32(r); + int32x4_t v = vshlq_s32(value, r); // v = (v >> r) + + c = vcgtq_s32(v, g_XM000000FF); // c = (v > 0xFF) + b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) + int32x4_t s = vshlq_n_s32(b, 3); // s = (b << 3) + s = vnegq_s32(s); + v = vshlq_s32(v, s); // v = (v >> s) + r = vorrq_s32(r, s); // r = (r | s) + + c = vcgtq_s32(v, g_XM0000000F); // c = (v > 0xF) + b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) + s = vshlq_n_s32(b, 2); // s = (b << 2) + s = vnegq_s32(s); + v = vshlq_s32(v, s); // v = (v >> s) + r = vorrq_s32(r, s); // r = (r | s) + + c = vcgtq_s32(v, g_XM00000003); // c = (v > 0x3) + b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) + s = vshlq_n_s32(b, 1); // s = (b << 1) + s = vnegq_s32(s); + v = vshlq_s32(v, s); // v = (v >> s) + r = vorrq_s32(r, s); // r = (r | s) + + s = vshrq_n_s32(v, 1); + r = vorrq_s32(r, s); + return r; + } + +} // namespace Internal + +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLog2(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + log2f(V.vector4_f32[0]), + log2f(V.vector4_f32[1]), + log2f(V.vector4_f32[2]), + log2f(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t rawBiased = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); + int32x4_t trailing = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); + uint32x4_t isExponentZero = vceqq_s32(vreinterpretq_s32_f32(g_XMZero), rawBiased); + + // Compute exponent and significand for normals. + int32x4_t biased = vshrq_n_s32(rawBiased, 23); + int32x4_t exponentNor = vsubq_s32(biased, g_XMExponentBias); + int32x4_t trailingNor = trailing; + + // Compute exponent and significand for subnormals. + int32x4_t leading = Internal::GetLeadingBit(trailing); + int32x4_t shift = vsubq_s32(g_XMNumTrailing, leading); + int32x4_t exponentSub = vsubq_s32(g_XMSubnormalExponent, shift); + int32x4_t trailingSub = vshlq_s32(trailing, shift); + trailingSub = vandq_s32(trailingSub, g_XMQNaNTest); + int32x4_t e = vbslq_s32(isExponentZero, exponentSub, exponentNor); + int32x4_t t = vbslq_s32(isExponentZero, trailingSub, trailingNor); + + // Compute the approximation. + int32x4_t tmp = vorrq_s32(vreinterpretq_s32_f32(g_XMOne), t); + float32x4_t y = vsubq_f32(vreinterpretq_f32_s32(tmp), g_XMOne); + + float32x4_t log2 = vmlaq_f32(g_XMLogEst6, g_XMLogEst7, y); + log2 = vmlaq_f32(g_XMLogEst5, log2, y); + log2 = vmlaq_f32(g_XMLogEst4, log2, y); + log2 = vmlaq_f32(g_XMLogEst3, log2, y); + log2 = vmlaq_f32(g_XMLogEst2, log2, y); + log2 = vmlaq_f32(g_XMLogEst1, log2, y); + log2 = vmlaq_f32(g_XMLogEst0, log2, y); + log2 = vmlaq_f32(vcvtq_f32_s32(e), log2, y); + + // if (x is NaN) -> QNaN + // else if (V is positive) + // if (V is infinite) -> +inf + // else -> log2(V) + // else + // if (V is zero) -> -inf + // else -> -QNaN + + uint32x4_t isInfinite = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + isInfinite = vceqq_u32(isInfinite, g_XMInfinity); + + uint32x4_t isGreaterZero = vcgtq_f32(V, g_XMZero); + uint32x4_t isNotFinite = vcgtq_f32(V, g_XMInfinity); + uint32x4_t isPositive = vbicq_u32(isGreaterZero, isNotFinite); + + uint32x4_t isZero = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + isZero = vceqq_u32(isZero, g_XMZero); + + uint32x4_t t0 = vandq_u32(vreinterpretq_u32_f32(V), g_XMQNaNTest); + uint32x4_t t1 = vandq_u32(vreinterpretq_u32_f32(V), g_XMInfinity); + t0 = vceqq_u32(t0, g_XMZero); + t1 = vceqq_u32(t1, g_XMInfinity); + uint32x4_t isNaN = vbicq_u32(t1, t0); + + float32x4_t result = vbslq_f32(isInfinite, g_XMInfinity, log2); + float32x4_t tmp2 = vbslq_f32(isZero, g_XMNegInfinity, g_XMNegQNaN); + result = vbslq_f32(isPositive, result, tmp2); + result = vbslq_f32(isNaN, g_XMQNaN, result); + return result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_log2_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + __m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i isExponentZero = _mm_cmpeq_epi32(g_XMZero, rawBiased); + + // Compute exponent and significand for normals. + __m128i biased = _mm_srli_epi32(rawBiased, 23); + __m128i exponentNor = _mm_sub_epi32(biased, g_XMExponentBias); + __m128i trailingNor = trailing; + + // Compute exponent and significand for subnormals. + __m128i leading = Internal::GetLeadingBit(trailing); + __m128i shift = _mm_sub_epi32(g_XMNumTrailing, leading); + __m128i exponentSub = _mm_sub_epi32(g_XMSubnormalExponent, shift); + __m128i trailingSub = Internal::multi_sll_epi32(trailing, shift); + trailingSub = _mm_and_si128(trailingSub, g_XMQNaNTest); + + __m128i select0 = _mm_and_si128(isExponentZero, exponentSub); + __m128i select1 = _mm_andnot_si128(isExponentZero, exponentNor); + __m128i e = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isExponentZero, trailingSub); + select1 = _mm_andnot_si128(isExponentZero, trailingNor); + __m128i t = _mm_or_si128(select0, select1); + + // Compute the approximation. + __m128i tmp = _mm_or_si128(g_XMOne, t); + __m128 y = _mm_sub_ps(_mm_castsi128_ps(tmp), g_XMOne); + + __m128 log2 = XM_FMADD_PS(g_XMLogEst7, y, g_XMLogEst6); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst5); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst4); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst3); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst2); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst1); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst0); + log2 = XM_FMADD_PS(log2, y, _mm_cvtepi32_ps(e)); + + // if (x is NaN) -> QNaN + // else if (V is positive) + // if (V is infinite) -> +inf + // else -> log2(V) + // else + // if (V is zero) -> -inf + // else -> -QNaN + + __m128i isInfinite = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + isInfinite = _mm_cmpeq_epi32(isInfinite, g_XMInfinity); + + __m128i isGreaterZero = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMZero); + __m128i isNotFinite = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMInfinity); + __m128i isPositive = _mm_andnot_si128(isNotFinite, isGreaterZero); + + __m128i isZero = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + isZero = _mm_cmpeq_epi32(isZero, g_XMZero); + + __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + t0 = _mm_cmpeq_epi32(t0, g_XMZero); + t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); + __m128i isNaN = _mm_andnot_si128(t0, t1); + + select0 = _mm_and_si128(isInfinite, g_XMInfinity); + select1 = _mm_andnot_si128(isInfinite, _mm_castps_si128(log2)); + __m128i result = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isZero, g_XMNegInfinity); + select1 = _mm_andnot_si128(isZero, g_XMNegQNaN); + tmp = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isPositive, result); + select1 = _mm_andnot_si128(isPositive, tmp); + result = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isNaN, g_XMQNaN); + select1 = _mm_andnot_si128(isNaN, result); + result = _mm_or_si128(select0, select1); + + return _mm_castsi128_ps(result); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLog10(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + log10f(V.vector4_f32[0]), + log10f(V.vector4_f32[1]), + log10f(V.vector4_f32[2]), + log10f(V.vector4_f32[3]) + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t rawBiased = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); + int32x4_t trailing = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); + uint32x4_t isExponentZero = vceqq_s32(g_XMZero, rawBiased); + + // Compute exponent and significand for normals. + int32x4_t biased = vshrq_n_s32(rawBiased, 23); + int32x4_t exponentNor = vsubq_s32(biased, g_XMExponentBias); + int32x4_t trailingNor = trailing; + + // Compute exponent and significand for subnormals. + int32x4_t leading = Internal::GetLeadingBit(trailing); + int32x4_t shift = vsubq_s32(g_XMNumTrailing, leading); + int32x4_t exponentSub = vsubq_s32(g_XMSubnormalExponent, shift); + int32x4_t trailingSub = vshlq_s32(trailing, shift); + trailingSub = vandq_s32(trailingSub, g_XMQNaNTest); + int32x4_t e = vbslq_s32(isExponentZero, exponentSub, exponentNor); + int32x4_t t = vbslq_s32(isExponentZero, trailingSub, trailingNor); + + // Compute the approximation. + int32x4_t tmp = vorrq_s32(g_XMOne, t); + float32x4_t y = vsubq_f32(vreinterpretq_f32_s32(tmp), g_XMOne); + + float32x4_t log2 = vmlaq_f32(g_XMLogEst6, g_XMLogEst7, y); + log2 = vmlaq_f32(g_XMLogEst5, log2, y); + log2 = vmlaq_f32(g_XMLogEst4, log2, y); + log2 = vmlaq_f32(g_XMLogEst3, log2, y); + log2 = vmlaq_f32(g_XMLogEst2, log2, y); + log2 = vmlaq_f32(g_XMLogEst1, log2, y); + log2 = vmlaq_f32(g_XMLogEst0, log2, y); + log2 = vmlaq_f32(vcvtq_f32_s32(e), log2, y); + + log2 = vmulq_f32(g_XMInvLg10, log2); + + // if (x is NaN) -> QNaN + // else if (V is positive) + // if (V is infinite) -> +inf + // else -> log2(V) + // else + // if (V is zero) -> -inf + // else -> -QNaN + + uint32x4_t isInfinite = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + isInfinite = vceqq_u32(isInfinite, g_XMInfinity); + + uint32x4_t isGreaterZero = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMZero); + uint32x4_t isNotFinite = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); + uint32x4_t isPositive = vbicq_u32(isGreaterZero, isNotFinite); + + uint32x4_t isZero = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + isZero = vceqq_u32(isZero, g_XMZero); + + uint32x4_t t0 = vandq_u32(vreinterpretq_u32_f32(V), g_XMQNaNTest); + uint32x4_t t1 = vandq_u32(vreinterpretq_u32_f32(V), g_XMInfinity); + t0 = vceqq_u32(t0, g_XMZero); + t1 = vceqq_u32(t1, g_XMInfinity); + uint32x4_t isNaN = vbicq_u32(t1, t0); + + float32x4_t result = vbslq_f32(isInfinite, g_XMInfinity, log2); + float32x4_t tmp2 = vbslq_f32(isZero, g_XMNegInfinity, g_XMNegQNaN); + result = vbslq_f32(isPositive, result, tmp2); + result = vbslq_f32(isNaN, g_XMQNaN, result); + return result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_log10_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + __m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i isExponentZero = _mm_cmpeq_epi32(g_XMZero, rawBiased); + + // Compute exponent and significand for normals. + __m128i biased = _mm_srli_epi32(rawBiased, 23); + __m128i exponentNor = _mm_sub_epi32(biased, g_XMExponentBias); + __m128i trailingNor = trailing; + + // Compute exponent and significand for subnormals. + __m128i leading = Internal::GetLeadingBit(trailing); + __m128i shift = _mm_sub_epi32(g_XMNumTrailing, leading); + __m128i exponentSub = _mm_sub_epi32(g_XMSubnormalExponent, shift); + __m128i trailingSub = Internal::multi_sll_epi32(trailing, shift); + trailingSub = _mm_and_si128(trailingSub, g_XMQNaNTest); + + __m128i select0 = _mm_and_si128(isExponentZero, exponentSub); + __m128i select1 = _mm_andnot_si128(isExponentZero, exponentNor); + __m128i e = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isExponentZero, trailingSub); + select1 = _mm_andnot_si128(isExponentZero, trailingNor); + __m128i t = _mm_or_si128(select0, select1); + + // Compute the approximation. + __m128i tmp = _mm_or_si128(g_XMOne, t); + __m128 y = _mm_sub_ps(_mm_castsi128_ps(tmp), g_XMOne); + + __m128 log2 = XM_FMADD_PS(g_XMLogEst7, y, g_XMLogEst6); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst5); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst4); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst3); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst2); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst1); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst0); + log2 = XM_FMADD_PS(log2, y, _mm_cvtepi32_ps(e)); + + log2 = _mm_mul_ps(g_XMInvLg10, log2); + + // if (x is NaN) -> QNaN + // else if (V is positive) + // if (V is infinite) -> +inf + // else -> log2(V) + // else + // if (V is zero) -> -inf + // else -> -QNaN + + __m128i isInfinite = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + isInfinite = _mm_cmpeq_epi32(isInfinite, g_XMInfinity); + + __m128i isGreaterZero = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMZero); + __m128i isNotFinite = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMInfinity); + __m128i isPositive = _mm_andnot_si128(isNotFinite, isGreaterZero); + + __m128i isZero = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + isZero = _mm_cmpeq_epi32(isZero, g_XMZero); + + __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + t0 = _mm_cmpeq_epi32(t0, g_XMZero); + t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); + __m128i isNaN = _mm_andnot_si128(t0, t1); + + select0 = _mm_and_si128(isInfinite, g_XMInfinity); + select1 = _mm_andnot_si128(isInfinite, _mm_castps_si128(log2)); + __m128i result = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isZero, g_XMNegInfinity); + select1 = _mm_andnot_si128(isZero, g_XMNegQNaN); + tmp = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isPositive, result); + select1 = _mm_andnot_si128(isPositive, tmp); + result = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isNaN, g_XMQNaN); + select1 = _mm_andnot_si128(isNaN, result); + result = _mm_or_si128(select0, select1); + + return _mm_castsi128_ps(result); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLogE(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + logf(V.vector4_f32[0]), + logf(V.vector4_f32[1]), + logf(V.vector4_f32[2]), + logf(V.vector4_f32[3]) + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int32x4_t rawBiased = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); + int32x4_t trailing = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); + uint32x4_t isExponentZero = vceqq_s32(g_XMZero, rawBiased); + + // Compute exponent and significand for normals. + int32x4_t biased = vshrq_n_s32(rawBiased, 23); + int32x4_t exponentNor = vsubq_s32(biased, g_XMExponentBias); + int32x4_t trailingNor = trailing; + + // Compute exponent and significand for subnormals. + int32x4_t leading = Internal::GetLeadingBit(trailing); + int32x4_t shift = vsubq_s32(g_XMNumTrailing, leading); + int32x4_t exponentSub = vsubq_s32(g_XMSubnormalExponent, shift); + int32x4_t trailingSub = vshlq_s32(trailing, shift); + trailingSub = vandq_s32(trailingSub, g_XMQNaNTest); + int32x4_t e = vbslq_s32(isExponentZero, exponentSub, exponentNor); + int32x4_t t = vbslq_s32(isExponentZero, trailingSub, trailingNor); + + // Compute the approximation. + int32x4_t tmp = vorrq_s32(g_XMOne, t); + float32x4_t y = vsubq_f32(vreinterpretq_f32_s32(tmp), g_XMOne); + + float32x4_t log2 = vmlaq_f32(g_XMLogEst6, g_XMLogEst7, y); + log2 = vmlaq_f32(g_XMLogEst5, log2, y); + log2 = vmlaq_f32(g_XMLogEst4, log2, y); + log2 = vmlaq_f32(g_XMLogEst3, log2, y); + log2 = vmlaq_f32(g_XMLogEst2, log2, y); + log2 = vmlaq_f32(g_XMLogEst1, log2, y); + log2 = vmlaq_f32(g_XMLogEst0, log2, y); + log2 = vmlaq_f32(vcvtq_f32_s32(e), log2, y); + + log2 = vmulq_f32(g_XMInvLgE, log2); + + // if (x is NaN) -> QNaN + // else if (V is positive) + // if (V is infinite) -> +inf + // else -> log2(V) + // else + // if (V is zero) -> -inf + // else -> -QNaN + + uint32x4_t isInfinite = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + isInfinite = vceqq_u32(isInfinite, g_XMInfinity); + + uint32x4_t isGreaterZero = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMZero); + uint32x4_t isNotFinite = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); + uint32x4_t isPositive = vbicq_u32(isGreaterZero, isNotFinite); + + uint32x4_t isZero = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + isZero = vceqq_u32(isZero, g_XMZero); + + uint32x4_t t0 = vandq_u32(vreinterpretq_u32_f32(V), g_XMQNaNTest); + uint32x4_t t1 = vandq_u32(vreinterpretq_u32_f32(V), g_XMInfinity); + t0 = vceqq_u32(t0, g_XMZero); + t1 = vceqq_u32(t1, g_XMInfinity); + uint32x4_t isNaN = vbicq_u32(t1, t0); + + float32x4_t result = vbslq_f32(isInfinite, g_XMInfinity, log2); + float32x4_t tmp2 = vbslq_f32(isZero, g_XMNegInfinity, g_XMNegQNaN); + result = vbslq_f32(isPositive, result, tmp2); + result = vbslq_f32(isNaN, g_XMQNaN, result); + return result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_log_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + __m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i isExponentZero = _mm_cmpeq_epi32(g_XMZero, rawBiased); + + // Compute exponent and significand for normals. + __m128i biased = _mm_srli_epi32(rawBiased, 23); + __m128i exponentNor = _mm_sub_epi32(biased, g_XMExponentBias); + __m128i trailingNor = trailing; + + // Compute exponent and significand for subnormals. + __m128i leading = Internal::GetLeadingBit(trailing); + __m128i shift = _mm_sub_epi32(g_XMNumTrailing, leading); + __m128i exponentSub = _mm_sub_epi32(g_XMSubnormalExponent, shift); + __m128i trailingSub = Internal::multi_sll_epi32(trailing, shift); + trailingSub = _mm_and_si128(trailingSub, g_XMQNaNTest); + + __m128i select0 = _mm_and_si128(isExponentZero, exponentSub); + __m128i select1 = _mm_andnot_si128(isExponentZero, exponentNor); + __m128i e = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isExponentZero, trailingSub); + select1 = _mm_andnot_si128(isExponentZero, trailingNor); + __m128i t = _mm_or_si128(select0, select1); + + // Compute the approximation. + __m128i tmp = _mm_or_si128(g_XMOne, t); + __m128 y = _mm_sub_ps(_mm_castsi128_ps(tmp), g_XMOne); + + __m128 log2 = XM_FMADD_PS(g_XMLogEst7, y, g_XMLogEst6); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst5); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst4); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst3); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst2); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst1); + log2 = XM_FMADD_PS(log2, y, g_XMLogEst0); + log2 = XM_FMADD_PS(log2, y, _mm_cvtepi32_ps(e)); + + log2 = _mm_mul_ps(g_XMInvLgE, log2); + + // if (x is NaN) -> QNaN + // else if (V is positive) + // if (V is infinite) -> +inf + // else -> log2(V) + // else + // if (V is zero) -> -inf + // else -> -QNaN + + __m128i isInfinite = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + isInfinite = _mm_cmpeq_epi32(isInfinite, g_XMInfinity); + + __m128i isGreaterZero = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMZero); + __m128i isNotFinite = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMInfinity); + __m128i isPositive = _mm_andnot_si128(isNotFinite, isGreaterZero); + + __m128i isZero = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); + isZero = _mm_cmpeq_epi32(isZero, g_XMZero); + + __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); + __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); + t0 = _mm_cmpeq_epi32(t0, g_XMZero); + t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); + __m128i isNaN = _mm_andnot_si128(t0, t1); + + select0 = _mm_and_si128(isInfinite, g_XMInfinity); + select1 = _mm_andnot_si128(isInfinite, _mm_castps_si128(log2)); + __m128i result = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isZero, g_XMNegInfinity); + select1 = _mm_andnot_si128(isZero, g_XMNegQNaN); + tmp = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isPositive, result); + select1 = _mm_andnot_si128(isPositive, tmp); + result = _mm_or_si128(select0, select1); + + select0 = _mm_and_si128(isNaN, g_XMQNaN); + select1 = _mm_andnot_si128(isNaN, result); + result = _mm_or_si128(select0, select1); + + return _mm_castsi128_ps(result); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLog(FXMVECTOR V) noexcept +{ + return XMVectorLog2(V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorPow +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + powf(V1.vector4_f32[0], V2.vector4_f32[0]), + powf(V1.vector4_f32[1], V2.vector4_f32[1]), + powf(V1.vector4_f32[2], V2.vector4_f32[2]), + powf(V1.vector4_f32[3], V2.vector4_f32[3]) + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTORF32 vResult = { { { + powf(vgetq_lane_f32(V1, 0), vgetq_lane_f32(V2, 0)), + powf(vgetq_lane_f32(V1, 1), vgetq_lane_f32(V2, 1)), + powf(vgetq_lane_f32(V1, 2), vgetq_lane_f32(V2, 2)), + powf(vgetq_lane_f32(V1, 3), vgetq_lane_f32(V2, 3)) + } } }; + return vResult.v; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_pow_ps(V1, V2); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + XM_ALIGNED_DATA(16) float a[4]; + XM_ALIGNED_DATA(16) float b[4]; + _mm_store_ps(a, V1); + _mm_store_ps(b, V2); + XMVECTOR vResult = _mm_setr_ps( + powf(a[0], b[0]), + powf(a[1], b[1]), + powf(a[2], b[2]), + powf(a[3], b[3])); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorAbs(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + fabsf(V.vector4_f32[0]), + fabsf(V.vector4_f32[1]), + fabsf(V.vector4_f32[2]), + fabsf(V.vector4_f32[3]) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + return vabsq_f32(V); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_setzero_ps(); + vResult = _mm_sub_ps(vResult, V); + vResult = _mm_max_ps(vResult, V); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorMod +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + // V1 % V2 = V1 - V2 * truncate(V1 / V2) + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Quotient = XMVectorDivide(V1, V2); + Quotient = XMVectorTruncate(Quotient); + XMVECTOR Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vResult = XMVectorDivide(V1, V2); + vResult = XMVectorTruncate(vResult); + return vmlsq_f32(V1, vResult, V2); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_div_ps(V1, V2); + vResult = XMVectorTruncate(vResult); + return XM_FNMADD_PS(vResult, V2, V1); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorModAngles(FXMVECTOR Angles) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR V; + XMVECTOR Result; + + // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI + V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v); + V = XMVectorRound(V); + Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI + XMVECTOR vResult = vmulq_f32(Angles, g_XMReciprocalTwoPi); + // Use the inline function due to complexity for rounding + vResult = XMVectorRound(vResult); + return vmlsq_f32(Angles, vResult, g_XMTwoPi); +#elif defined(_XM_SSE_INTRINSICS_) + // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI + XMVECTOR vResult = _mm_mul_ps(Angles, g_XMReciprocalTwoPi); + // Use the inline function due to complexity for rounding + vResult = XMVectorRound(vResult); + return XM_FNMADD_PS(vResult, g_XMTwoPi, Angles); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSin(FXMVECTOR V) noexcept +{ + // 11-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + sinf(V.vector4_f32[0]), + sinf(V.vector4_f32[1]), + sinf(V.vector4_f32[2]), + sinf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with sin(y) = sin(x). + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); + uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + float32x4_t absx = vabsq_f32(x); + float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); + uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); + x = vbslq_f32(comp, x, rflx); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation + const XMVECTOR SC1 = g_XMSinCoefficients1; + const XMVECTOR SC0 = g_XMSinCoefficients0; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SC0), 1); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(SC1), 0); + + vConstants = vdupq_lane_f32(vget_high_f32(SC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(SC0), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(SC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + Result = vmulq_f32(Result, x); + return Result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_sin_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with sin(y) = sin(x). + __m128 sign = _mm_and_ps(x, g_XMNegativeZero); + __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + __m128 absx = _mm_andnot_ps(sign, x); // |x| + __m128 rflx = _mm_sub_ps(c, x); + __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); + __m128 select0 = _mm_and_ps(comp, x); + __m128 select1 = _mm_andnot_ps(comp, rflx); + x = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation + const XMVECTOR SC1 = g_XMSinCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(SC1, _MM_SHUFFLE(0, 0, 0, 0)); + const XMVECTOR SC0 = g_XMSinCoefficients0; + __m128 vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(2, 2, 2, 2)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, x); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorCos(FXMVECTOR V) noexcept +{ + // 10-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + cosf(V.vector4_f32[0]), + cosf(V.vector4_f32[1]), + cosf(V.vector4_f32[2]), + cosf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Map V to x in [-pi,pi]. + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); + uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + float32x4_t absx = vabsq_f32(x); + float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); + uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); + x = vbslq_f32(comp, x, rflx); + float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation + const XMVECTOR CC1 = g_XMCosCoefficients1; + const XMVECTOR CC0 = g_XMCosCoefficients0; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(CC0), 1); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(CC1), 0); + + vConstants = vdupq_lane_f32(vget_high_f32(CC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(CC0), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(CC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + Result = vmulq_f32(Result, fsign); + return Result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_cos_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + // Map V to x in [-pi,pi]. + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). + XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); + __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + __m128 absx = _mm_andnot_ps(sign, x); // |x| + __m128 rflx = _mm_sub_ps(c, x); + __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); + __m128 select0 = _mm_and_ps(comp, x); + __m128 select1 = _mm_andnot_ps(comp, rflx); + x = _mm_or_ps(select0, select1); + select0 = _mm_and_ps(comp, g_XMOne); + select1 = _mm_andnot_ps(comp, g_XMNegativeOne); + sign = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation + const XMVECTOR CC1 = g_XMCosCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(CC1, _MM_SHUFFLE(0, 0, 0, 0)); + const XMVECTOR CC0 = g_XMCosCoefficients0; + __m128 vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(2, 2, 2, 2)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, sign); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorSinCos +( + XMVECTOR* pSin, + XMVECTOR* pCos, + FXMVECTOR V +) noexcept +{ + assert(pSin != nullptr); + assert(pCos != nullptr); + + // 11/10-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Sin = { { { + sinf(V.vector4_f32[0]), + sinf(V.vector4_f32[1]), + sinf(V.vector4_f32[2]), + sinf(V.vector4_f32[3]) + } } }; + + XMVECTORF32 Cos = { { { + cosf(V.vector4_f32[0]), + cosf(V.vector4_f32[1]), + cosf(V.vector4_f32[2]), + cosf(V.vector4_f32[3]) + } } }; + + *pSin = Sin.v; + *pCos = Cos.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); + uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + float32x4_t absx = vabsq_f32(x); + float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); + uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); + x = vbslq_f32(comp, x, rflx); + float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation for sine + const XMVECTOR SC1 = g_XMSinCoefficients1; + const XMVECTOR SC0 = g_XMSinCoefficients0; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SC0), 1); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(SC1), 0); + + vConstants = vdupq_lane_f32(vget_high_f32(SC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(SC0), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(SC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + *pSin = vmulq_f32(Result, x); + + // Compute polynomial approximation for cosine + const XMVECTOR CC1 = g_XMCosCoefficients1; + const XMVECTOR CC0 = g_XMCosCoefficients0; + vConstants = vdupq_lane_f32(vget_high_f32(CC0), 1); + Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(CC1), 0); + + vConstants = vdupq_lane_f32(vget_high_f32(CC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(CC0), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(CC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + *pCos = vmulq_f32(Result, fsign); +#elif defined(_XM_SVML_INTRINSICS_) + *pSin = _mm_sincos_ps(pCos, V); +#elif defined(_XM_SSE_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x). + XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); + __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + __m128 absx = _mm_andnot_ps(sign, x); // |x| + __m128 rflx = _mm_sub_ps(c, x); + __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); + __m128 select0 = _mm_and_ps(comp, x); + __m128 select1 = _mm_andnot_ps(comp, rflx); + x = _mm_or_ps(select0, select1); + select0 = _mm_and_ps(comp, g_XMOne); + select1 = _mm_andnot_ps(comp, g_XMNegativeOne); + sign = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation of sine + const XMVECTOR SC1 = g_XMSinCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(SC1, _MM_SHUFFLE(0, 0, 0, 0)); + const XMVECTOR SC0 = g_XMSinCoefficients0; + __m128 vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(2, 2, 2, 2)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, x); + *pSin = Result; + + // Compute polynomial approximation of cosine + const XMVECTOR CC1 = g_XMCosCoefficients1; + vConstantsB = XM_PERMUTE_PS(CC1, _MM_SHUFFLE(0, 0, 0, 0)); + const XMVECTOR CC0 = g_XMCosCoefficients0; + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(3, 3, 3, 3)); + Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(2, 2, 2, 2)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, sign); + *pCos = Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorTan(FXMVECTOR V) noexcept +{ + // Cody and Waite algorithm to compute tangent. + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + tanf(V.vector4_f32[0]), + tanf(V.vector4_f32[1]), + tanf(V.vector4_f32[2]), + tanf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_tan_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + static const XMVECTORF32 TanCoefficients0 = { { { 1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f } } }; + static const XMVECTORF32 TanCoefficients1 = { { { 4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f } } }; + static const XMVECTORF32 TanConstants = { { { 1.570796371f, 6.077100628e-11f, 0.000244140625f, 0.63661977228f /*2 / Pi*/ } } }; + static const XMVECTORU32 Mask = { { { 0x1, 0x1, 0x1, 0x1 } } }; + + XMVECTOR TwoDivPi = XMVectorSplatW(TanConstants.v); + + XMVECTOR Zero = XMVectorZero(); + + XMVECTOR C0 = XMVectorSplatX(TanConstants.v); + XMVECTOR C1 = XMVectorSplatY(TanConstants.v); + XMVECTOR Epsilon = XMVectorSplatZ(TanConstants.v); + + XMVECTOR VA = XMVectorMultiply(V, TwoDivPi); + + VA = XMVectorRound(VA); + + XMVECTOR VC = XMVectorNegativeMultiplySubtract(VA, C0, V); + + XMVECTOR VB = XMVectorAbs(VA); + + VC = XMVectorNegativeMultiplySubtract(VA, C1, VC); + +#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + VB = vreinterpretq_f32_u32(vcvtq_u32_f32(VB)); +#elif defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + reinterpret_cast<__m128i*>(&VB)[0] = _mm_cvttps_epi32(VB); +#else + for (size_t i = 0; i < 4; i++) + { + VB.vector4_u32[i] = static_cast(VB.vector4_f32[i]); + } +#endif + + XMVECTOR VC2 = XMVectorMultiply(VC, VC); + + XMVECTOR T7 = XMVectorSplatW(TanCoefficients1.v); + XMVECTOR T6 = XMVectorSplatZ(TanCoefficients1.v); + XMVECTOR T4 = XMVectorSplatX(TanCoefficients1.v); + XMVECTOR T3 = XMVectorSplatW(TanCoefficients0.v); + XMVECTOR T5 = XMVectorSplatY(TanCoefficients1.v); + XMVECTOR T2 = XMVectorSplatZ(TanCoefficients0.v); + XMVECTOR T1 = XMVectorSplatY(TanCoefficients0.v); + XMVECTOR T0 = XMVectorSplatX(TanCoefficients0.v); + + XMVECTOR VBIsEven = XMVectorAndInt(VB, Mask.v); + VBIsEven = XMVectorEqualInt(VBIsEven, Zero); + + XMVECTOR N = XMVectorMultiplyAdd(VC2, T7, T6); + XMVECTOR D = XMVectorMultiplyAdd(VC2, T4, T3); + N = XMVectorMultiplyAdd(VC2, N, T5); + D = XMVectorMultiplyAdd(VC2, D, T2); + N = XMVectorMultiply(VC2, N); + D = XMVectorMultiplyAdd(VC2, D, T1); + N = XMVectorMultiplyAdd(VC, N, VC); + XMVECTOR VCNearZero = XMVectorInBounds(VC, Epsilon); + D = XMVectorMultiplyAdd(VC2, D, T0); + + N = XMVectorSelect(N, VC, VCNearZero); + D = XMVectorSelect(D, g_XMOne.v, VCNearZero); + + XMVECTOR R0 = XMVectorNegate(N); + XMVECTOR R1 = XMVectorDivide(N, D); + R0 = XMVectorDivide(D, R0); + + XMVECTOR VIsZero = XMVectorEqual(V, Zero); + + XMVECTOR Result = XMVectorSelect(R0, R1, VBIsEven); + + Result = XMVectorSelect(Result, Zero, VIsZero); + + return Result; + +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSinH(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + sinhf(V.vector4_f32[0]), + sinhf(V.vector4_f32[1]), + sinhf(V.vector4_f32[2]), + sinhf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f } } }; // 1.0f / ln(2.0f) + + XMVECTOR V1 = vmlaq_f32(g_XMNegativeOne.v, V, Scale.v); + XMVECTOR V2 = vmlsq_f32(g_XMNegativeOne.v, V, Scale.v); + XMVECTOR E1 = XMVectorExp(V1); + XMVECTOR E2 = XMVectorExp(V2); + + return vsubq_f32(E1, E2); +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_sinh_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f } } }; // 1.0f / ln(2.0f) + + XMVECTOR V1 = XM_FMADD_PS(V, Scale, g_XMNegativeOne); + XMVECTOR V2 = XM_FNMADD_PS(V, Scale, g_XMNegativeOne); + XMVECTOR E1 = XMVectorExp(V1); + XMVECTOR E2 = XMVectorExp(V2); + + return _mm_sub_ps(E1, E2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorCosH(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + coshf(V.vector4_f32[0]), + coshf(V.vector4_f32[1]), + coshf(V.vector4_f32[2]), + coshf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f } } }; // 1.0f / ln(2.0f) + + XMVECTOR V1 = vmlaq_f32(g_XMNegativeOne.v, V, Scale.v); + XMVECTOR V2 = vmlsq_f32(g_XMNegativeOne.v, V, Scale.v); + XMVECTOR E1 = XMVectorExp(V1); + XMVECTOR E2 = XMVectorExp(V2); + return vaddq_f32(E1, E2); +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_cosh_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f } } }; // 1.0f / ln(2.0f) + + XMVECTOR V1 = XM_FMADD_PS(V, Scale.v, g_XMNegativeOne.v); + XMVECTOR V2 = XM_FNMADD_PS(V, Scale.v, g_XMNegativeOne.v); + XMVECTOR E1 = XMVectorExp(V1); + XMVECTOR E2 = XMVectorExp(V2); + return _mm_add_ps(E1, E2); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorTanH(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + tanhf(V.vector4_f32[0]), + tanhf(V.vector4_f32[1]), + tanhf(V.vector4_f32[2]), + tanhf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f } } }; // 2.0f / ln(2.0f) + + XMVECTOR E = vmulq_f32(V, Scale.v); + E = XMVectorExp(E); + E = vmlaq_f32(g_XMOneHalf.v, E, g_XMOneHalf.v); + E = XMVectorReciprocal(E); + return vsubq_f32(g_XMOne.v, E); +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_tanh_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f } } }; // 2.0f / ln(2.0f) + + XMVECTOR E = _mm_mul_ps(V, Scale.v); + E = XMVectorExp(E); + E = XM_FMADD_PS(E, g_XMOneHalf.v, g_XMOneHalf.v); + E = _mm_div_ps(g_XMOne.v, E); + return _mm_sub_ps(g_XMOne.v, E); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorASin(FXMVECTOR V) noexcept +{ + // 7-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + asinf(V.vector4_f32[0]), + asinf(V.vector4_f32[1]), + asinf(V.vector4_f32[2]), + asinf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); + float32x4_t x = vabsq_f32(V); + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + float32x4_t oneMValue = vsubq_f32(g_XMOne, x); + float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); + float32x4_t root = XMVectorSqrt(clampOneMValue); + + // Compute polynomial approximation + const XMVECTOR AC1 = g_XMArcCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AC1), 0); + XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AC1), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(AC1), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AC1), 0); + t0 = vmlaq_f32(vConstants, t0, x); + + const XMVECTOR AC0 = g_XMArcCoefficients0; + vConstants = vdupq_lane_f32(vget_high_f32(AC0), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_high_f32(AC0), 0); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AC0), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AC0), 0); + t0 = vmlaq_f32(vConstants, t0, x); + t0 = vmulq_f32(t0, root); + + float32x4_t t1 = vsubq_f32(g_XMPi, t0); + t0 = vbslq_f32(nonnegative, t0, t1); + t0 = vsubq_f32(g_XMHalfPi, t0); + return t0; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_asin_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); + __m128 mvalue = _mm_sub_ps(g_XMZero, V); + __m128 x = _mm_max_ps(V, mvalue); // |V| + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + __m128 oneMValue = _mm_sub_ps(g_XMOne, x); + __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); + __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) + + // Compute polynomial approximation + const XMVECTOR AC1 = g_XMArcCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(1, 1, 1, 1)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(0, 0, 0, 0)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + const XMVECTOR AC0 = g_XMArcCoefficients0; + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(3, 3, 3, 3)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(2, 2, 2, 2)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(1, 1, 1, 1)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(0, 0, 0, 0)); + t0 = XM_FMADD_PS(t0, x, vConstants); + t0 = _mm_mul_ps(t0, root); + + __m128 t1 = _mm_sub_ps(g_XMPi, t0); + t0 = _mm_and_ps(nonnegative, t0); + t1 = _mm_andnot_ps(nonnegative, t1); + t0 = _mm_or_ps(t0, t1); + t0 = _mm_sub_ps(g_XMHalfPi, t0); + return t0; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorACos(FXMVECTOR V) noexcept +{ + // 7-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + acosf(V.vector4_f32[0]), + acosf(V.vector4_f32[1]), + acosf(V.vector4_f32[2]), + acosf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); + float32x4_t x = vabsq_f32(V); + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + float32x4_t oneMValue = vsubq_f32(g_XMOne, x); + float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); + float32x4_t root = XMVectorSqrt(clampOneMValue); + + // Compute polynomial approximation + const XMVECTOR AC1 = g_XMArcCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AC1), 0); + XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AC1), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(AC1), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AC1), 0); + t0 = vmlaq_f32(vConstants, t0, x); + + const XMVECTOR AC0 = g_XMArcCoefficients0; + vConstants = vdupq_lane_f32(vget_high_f32(AC0), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_high_f32(AC0), 0); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AC0), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AC0), 0); + t0 = vmlaq_f32(vConstants, t0, x); + t0 = vmulq_f32(t0, root); + + float32x4_t t1 = vsubq_f32(g_XMPi, t0); + t0 = vbslq_f32(nonnegative, t0, t1); + return t0; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_acos_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); + __m128 mvalue = _mm_sub_ps(g_XMZero, V); + __m128 x = _mm_max_ps(V, mvalue); // |V| + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + __m128 oneMValue = _mm_sub_ps(g_XMOne, x); + __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); + __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) + + // Compute polynomial approximation + const XMVECTOR AC1 = g_XMArcCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(1, 1, 1, 1)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(0, 0, 0, 0)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + const XMVECTOR AC0 = g_XMArcCoefficients0; + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(3, 3, 3, 3)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(2, 2, 2, 2)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(1, 1, 1, 1)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(0, 0, 0, 0)); + t0 = XM_FMADD_PS(t0, x, vConstants); + t0 = _mm_mul_ps(t0, root); + + __m128 t1 = _mm_sub_ps(g_XMPi, t0); + t0 = _mm_and_ps(nonnegative, t0); + t1 = _mm_andnot_ps(nonnegative, t1); + t0 = _mm_or_ps(t0, t1); + return t0; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorATan(FXMVECTOR V) noexcept +{ + // 17-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + atanf(V.vector4_f32[0]), + atanf(V.vector4_f32[1]), + atanf(V.vector4_f32[2]), + atanf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t absV = vabsq_f32(V); + float32x4_t invV = XMVectorReciprocal(V); + uint32x4_t comp = vcgtq_f32(V, g_XMOne); + float32x4_t sign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); + comp = vcleq_f32(absV, g_XMOne); + sign = vbslq_f32(comp, g_XMZero, sign); + float32x4_t x = vbslq_f32(comp, V, invV); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation + const XMVECTOR TC1 = g_XMATanCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(TC1), 0); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(TC1), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(TC1), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(TC1), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + const XMVECTOR TC0 = g_XMATanCoefficients0; + vConstants = vdupq_lane_f32(vget_high_f32(TC0), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_high_f32(TC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(TC0), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(TC0), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + Result = vmulq_f32(Result, x); + + float32x4_t result1 = vmulq_f32(sign, g_XMHalfPi); + result1 = vsubq_f32(result1, Result); + + comp = vceqq_f32(sign, g_XMZero); + Result = vbslq_f32(comp, Result, result1); + return Result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_atan_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 absV = XMVectorAbs(V); + __m128 invV = _mm_div_ps(g_XMOne, V); + __m128 comp = _mm_cmpgt_ps(V, g_XMOne); + __m128 select0 = _mm_and_ps(comp, g_XMOne); + __m128 select1 = _mm_andnot_ps(comp, g_XMNegativeOne); + __m128 sign = _mm_or_ps(select0, select1); + comp = _mm_cmple_ps(absV, g_XMOne); + select0 = _mm_and_ps(comp, g_XMZero); + select1 = _mm_andnot_ps(comp, sign); + sign = _mm_or_ps(select0, select1); + select0 = _mm_and_ps(comp, V); + select1 = _mm_andnot_ps(comp, invV); + __m128 x = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation + const XMVECTOR TC1 = g_XMATanCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + const XMVECTOR TC0 = g_XMATanCoefficients0; + vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(3, 3, 3, 3)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(2, 2, 2, 2)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + Result = XM_FMADD_PS(Result, x2, g_XMOne); + + Result = _mm_mul_ps(Result, x); + __m128 result1 = _mm_mul_ps(sign, g_XMHalfPi); + result1 = _mm_sub_ps(result1, Result); + + comp = _mm_cmpeq_ps(sign, g_XMZero); + select0 = _mm_and_ps(comp, Result); + select1 = _mm_andnot_ps(comp, result1); + Result = _mm_or_ps(select0, select1); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorATan2 +( + FXMVECTOR Y, + FXMVECTOR X +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + atan2f(Y.vector4_f32[0], X.vector4_f32[0]), + atan2f(Y.vector4_f32[1], X.vector4_f32[1]), + atan2f(Y.vector4_f32[2], X.vector4_f32[2]), + atan2f(Y.vector4_f32[3], X.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_atan2_ps(Y, X); + return Result; +#else + + // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions: + + // Y == 0 and X is Negative -> Pi with the sign of Y + // y == 0 and x is positive -> 0 with the sign of y + // Y != 0 and X == 0 -> Pi / 2 with the sign of Y + // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y) + // X == -Infinity and Finite Y -> Pi with the sign of Y + // X == +Infinity and Finite Y -> 0 with the sign of Y + // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y + // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y + // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y + + static const XMVECTORF32 ATan2Constants = { { { XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f } } }; + + XMVECTOR Zero = XMVectorZero(); + XMVECTOR ATanResultValid = XMVectorTrueInt(); + + XMVECTOR Pi = XMVectorSplatX(ATan2Constants); + XMVECTOR PiOverTwo = XMVectorSplatY(ATan2Constants); + XMVECTOR PiOverFour = XMVectorSplatZ(ATan2Constants); + XMVECTOR ThreePiOverFour = XMVectorSplatW(ATan2Constants); + + XMVECTOR YEqualsZero = XMVectorEqual(Y, Zero); + XMVECTOR XEqualsZero = XMVectorEqual(X, Zero); + XMVECTOR XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); + XIsPositive = XMVectorEqualInt(XIsPositive, Zero); + XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); + XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); + + XMVECTOR YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); + Pi = XMVectorOrInt(Pi, YSign); + PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); + PiOverFour = XMVectorOrInt(PiOverFour, YSign); + ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); + + XMVECTOR R1 = XMVectorSelect(Pi, YSign, XIsPositive); + XMVECTOR R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); + XMVECTOR R3 = XMVectorSelect(R2, R1, YEqualsZero); + XMVECTOR R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); + XMVECTOR R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); + XMVECTOR Result = XMVectorSelect(R3, R5, YEqualsInfinity); + ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); + + XMVECTOR V = XMVectorDivide(Y, X); + + XMVECTOR R0 = XMVectorATan(V); + + R1 = XMVectorSelect(Pi, g_XMNegativeZero, XIsPositive); + R2 = XMVectorAdd(R0, R1); + + return XMVectorSelect(Result, R2, ATanResultValid); + +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorSinEst(FXMVECTOR V) noexcept +{ + // 7-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + sinf(V.vector4_f32[0]), + sinf(V.vector4_f32[1]), + sinf(V.vector4_f32[2]), + sinf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with sin(y) = sin(x). + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); + uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + float32x4_t absx = vabsq_f32(x); + float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); + uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); + x = vbslq_f32(comp, x, rflx); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation + const XMVECTOR SEC = g_XMSinCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SEC), 0); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(SEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(SEC), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + Result = vmulq_f32(Result, x); + return Result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_sin_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with sin(y) = sin(x). + __m128 sign = _mm_and_ps(x, g_XMNegativeZero); + __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + __m128 absx = _mm_andnot_ps(sign, x); // |x| + __m128 rflx = _mm_sub_ps(c, x); + __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); + __m128 select0 = _mm_and_ps(comp, x); + __m128 select1 = _mm_andnot_ps(comp, rflx); + x = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation + const XMVECTOR SEC = g_XMSinCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, x); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorCosEst(FXMVECTOR V) noexcept +{ + // 6-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + cosf(V.vector4_f32[0]), + cosf(V.vector4_f32[1]), + cosf(V.vector4_f32[2]), + cosf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Map V to x in [-pi,pi]. + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); + uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + float32x4_t absx = vabsq_f32(x); + float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); + uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); + x = vbslq_f32(comp, x, rflx); + float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation + const XMVECTOR CEC = g_XMCosCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(CEC), 0); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(CEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(CEC), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + Result = vmulq_f32(Result, fsign); + return Result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_cos_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + // Map V to x in [-pi,pi]. + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). + XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); + __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + __m128 absx = _mm_andnot_ps(sign, x); // |x| + __m128 rflx = _mm_sub_ps(c, x); + __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); + __m128 select0 = _mm_and_ps(comp, x); + __m128 select1 = _mm_andnot_ps(comp, rflx); + x = _mm_or_ps(select0, select1); + select0 = _mm_and_ps(comp, g_XMOne); + select1 = _mm_andnot_ps(comp, g_XMNegativeOne); + sign = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation + const XMVECTOR CEC = g_XMCosCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, sign); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XM_CALLCONV XMVectorSinCosEst +( + XMVECTOR* pSin, + XMVECTOR* pCos, + FXMVECTOR V +) noexcept +{ + assert(pSin != nullptr); + assert(pCos != nullptr); + + // 7/6-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Sin = { { { + sinf(V.vector4_f32[0]), + sinf(V.vector4_f32[1]), + sinf(V.vector4_f32[2]), + sinf(V.vector4_f32[3]) + } } }; + + XMVECTORF32 Cos = { { { + cosf(V.vector4_f32[0]), + cosf(V.vector4_f32[1]), + cosf(V.vector4_f32[2]), + cosf(V.vector4_f32[3]) + } } }; + + *pSin = Sin.v; + *pCos = Cos.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). + uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); + uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + float32x4_t absx = vabsq_f32(x); + float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); + uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); + x = vbslq_f32(comp, x, rflx); + float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation for sine + const XMVECTOR SEC = g_XMSinCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SEC), 0); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(SEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(SEC), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + *pSin = vmulq_f32(Result, x); + + // Compute polynomial approximation + const XMVECTOR CEC = g_XMCosCoefficients1; + vConstants = vdupq_lane_f32(vget_high_f32(CEC), 0); + Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(CEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(CEC), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + Result = vmlaq_f32(g_XMOne, Result, x2); + *pCos = vmulq_f32(Result, fsign); +#elif defined(_XM_SSE_INTRINSICS_) + // Force the value within the bounds of pi + XMVECTOR x = XMVectorModAngles(V); + + // Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x). + XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); + __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 + __m128 absx = _mm_andnot_ps(sign, x); // |x| + __m128 rflx = _mm_sub_ps(c, x); + __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); + __m128 select0 = _mm_and_ps(comp, x); + __m128 select1 = _mm_andnot_ps(comp, rflx); + x = _mm_or_ps(select0, select1); + select0 = _mm_and_ps(comp, g_XMOne); + select1 = _mm_andnot_ps(comp, g_XMNegativeOne); + sign = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation for sine + const XMVECTOR SEC = g_XMSinCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, x); + *pSin = Result; + + // Compute polynomial approximation for cosine + const XMVECTOR CEC = g_XMCosCoefficients1; + vConstantsB = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(3, 3, 3, 3)); + vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(2, 2, 2, 2)); + Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + Result = XM_FMADD_PS(Result, x2, g_XMOne); + Result = _mm_mul_ps(Result, sign); + *pCos = Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorTanEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + tanf(V.vector4_f32[0]), + tanf(V.vector4_f32[1]), + tanf(V.vector4_f32[2]), + tanf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_tan_ps(V); + return Result; +#else + + XMVECTOR OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v); + + XMVECTOR V1 = XMVectorMultiply(V, OneOverPi); + V1 = XMVectorRound(V1); + + V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V); + + XMVECTOR T0 = XMVectorSplatX(g_XMTanEstCoefficients.v); + XMVECTOR T1 = XMVectorSplatY(g_XMTanEstCoefficients.v); + XMVECTOR T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v); + + XMVECTOR V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2); + XMVECTOR V2 = XMVectorMultiply(V1, V1); + XMVECTOR V1T0 = XMVectorMultiply(V1, T0); + XMVECTOR V1T1 = XMVectorMultiply(V1, T1); + + XMVECTOR D = XMVectorReciprocalEst(V2T2); + XMVECTOR N = XMVectorMultiplyAdd(V2, V1T1, V1T0); + + return XMVectorMultiply(N, D); + +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorASinEst(FXMVECTOR V) noexcept +{ + // 3-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result; + Result.f[0] = asinf(V.vector4_f32[0]); + Result.f[1] = asinf(V.vector4_f32[1]); + Result.f[2] = asinf(V.vector4_f32[2]); + Result.f[3] = asinf(V.vector4_f32[3]); + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); + float32x4_t x = vabsq_f32(V); + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + float32x4_t oneMValue = vsubq_f32(g_XMOne, x); + float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); + float32x4_t root = XMVectorSqrt(clampOneMValue); + + // Compute polynomial approximation + const XMVECTOR AEC = g_XMArcEstCoefficients; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0); + XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0); + t0 = vmlaq_f32(vConstants, t0, x); + t0 = vmulq_f32(t0, root); + + float32x4_t t1 = vsubq_f32(g_XMPi, t0); + t0 = vbslq_f32(nonnegative, t0, t1); + t0 = vsubq_f32(g_XMHalfPi, t0); + return t0; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_asin_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); + __m128 mvalue = _mm_sub_ps(g_XMZero, V); + __m128 x = _mm_max_ps(V, mvalue); // |V| + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + __m128 oneMValue = _mm_sub_ps(g_XMOne, x); + __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); + __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) + + // Compute polynomial approximation + const XMVECTOR AEC = g_XMArcEstCoefficients; + __m128 vConstantsB = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); + + vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(1, 1, 1, 1)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(0, 0, 0, 0)); + t0 = XM_FMADD_PS(t0, x, vConstants); + t0 = _mm_mul_ps(t0, root); + + __m128 t1 = _mm_sub_ps(g_XMPi, t0); + t0 = _mm_and_ps(nonnegative, t0); + t1 = _mm_andnot_ps(nonnegative, t1); + t0 = _mm_or_ps(t0, t1); + t0 = _mm_sub_ps(g_XMHalfPi, t0); + return t0; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorACosEst(FXMVECTOR V) noexcept +{ + // 3-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + acosf(V.vector4_f32[0]), + acosf(V.vector4_f32[1]), + acosf(V.vector4_f32[2]), + acosf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); + float32x4_t x = vabsq_f32(V); + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + float32x4_t oneMValue = vsubq_f32(g_XMOne, x); + float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); + float32x4_t root = XMVectorSqrt(clampOneMValue); + + // Compute polynomial approximation + const XMVECTOR AEC = g_XMArcEstCoefficients; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0); + XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1); + t0 = vmlaq_f32(vConstants, t0, x); + + vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0); + t0 = vmlaq_f32(vConstants, t0, x); + t0 = vmulq_f32(t0, root); + + float32x4_t t1 = vsubq_f32(g_XMPi, t0); + t0 = vbslq_f32(nonnegative, t0, t1); + return t0; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_acos_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); + __m128 mvalue = _mm_sub_ps(g_XMZero, V); + __m128 x = _mm_max_ps(V, mvalue); // |V| + + // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. + __m128 oneMValue = _mm_sub_ps(g_XMOne, x); + __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); + __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) + + // Compute polynomial approximation + const XMVECTOR AEC = g_XMArcEstCoefficients; + __m128 vConstantsB = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); + + vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(1, 1, 1, 1)); + t0 = XM_FMADD_PS(t0, x, vConstants); + + vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(0, 0, 0, 0)); + t0 = XM_FMADD_PS(t0, x, vConstants); + t0 = _mm_mul_ps(t0, root); + + __m128 t1 = _mm_sub_ps(g_XMPi, t0); + t0 = _mm_and_ps(nonnegative, t0); + t1 = _mm_andnot_ps(nonnegative, t1); + t0 = _mm_or_ps(t0, t1); + return t0; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorATanEst(FXMVECTOR V) noexcept +{ + // 9-degree minimax approximation + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + atanf(V.vector4_f32[0]), + atanf(V.vector4_f32[1]), + atanf(V.vector4_f32[2]), + atanf(V.vector4_f32[3]) + } } }; + return Result.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t absV = vabsq_f32(V); + float32x4_t invV = XMVectorReciprocalEst(V); + uint32x4_t comp = vcgtq_f32(V, g_XMOne); + float32x4_t sign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); + comp = vcleq_f32(absV, g_XMOne); + sign = vbslq_f32(comp, g_XMZero, sign); + float32x4_t x = vbslq_f32(comp, V, invV); + + float32x4_t x2 = vmulq_f32(x, x); + + // Compute polynomial approximation + const XMVECTOR AEC = g_XMATanEstCoefficients1; + XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0); + XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(AEC), 1); + + vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1); + Result = vmlaq_f32(vConstants, Result, x2); + + vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0); + Result = vmlaq_f32(vConstants, Result, x2); + + // ATanEstCoefficients0 is already splatted + Result = vmlaq_f32(g_XMATanEstCoefficients0, Result, x2); + Result = vmulq_f32(Result, x); + + float32x4_t result1 = vmulq_f32(sign, g_XMHalfPi); + result1 = vsubq_f32(result1, Result); + + comp = vceqq_f32(sign, g_XMZero); + Result = vbslq_f32(comp, Result, result1); + return Result; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_atan_ps(V); + return Result; +#elif defined(_XM_SSE_INTRINSICS_) + __m128 absV = XMVectorAbs(V); + __m128 invV = _mm_div_ps(g_XMOne, V); + __m128 comp = _mm_cmpgt_ps(V, g_XMOne); + __m128 select0 = _mm_and_ps(comp, g_XMOne); + __m128 select1 = _mm_andnot_ps(comp, g_XMNegativeOne); + __m128 sign = _mm_or_ps(select0, select1); + comp = _mm_cmple_ps(absV, g_XMOne); + select0 = _mm_and_ps(comp, g_XMZero); + select1 = _mm_andnot_ps(comp, sign); + sign = _mm_or_ps(select0, select1); + select0 = _mm_and_ps(comp, V); + select1 = _mm_andnot_ps(comp, invV); + __m128 x = _mm_or_ps(select0, select1); + + __m128 x2 = _mm_mul_ps(x, x); + + // Compute polynomial approximation + const XMVECTOR AEC = g_XMATanEstCoefficients1; + __m128 vConstantsB = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); + + vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(1, 1, 1, 1)); + Result = XM_FMADD_PS(Result, x2, vConstants); + + vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(0, 0, 0, 0)); + Result = XM_FMADD_PS(Result, x2, vConstants); + // ATanEstCoefficients0 is already splatted + Result = XM_FMADD_PS(Result, x2, g_XMATanEstCoefficients0); + Result = _mm_mul_ps(Result, x); + __m128 result1 = _mm_mul_ps(sign, g_XMHalfPi); + result1 = _mm_sub_ps(result1, Result); + + comp = _mm_cmpeq_ps(sign, g_XMZero); + select0 = _mm_and_ps(comp, Result); + select1 = _mm_andnot_ps(comp, result1); + Result = _mm_or_ps(select0, select1); + return Result; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorATan2Est +( + FXMVECTOR Y, + FXMVECTOR X +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 Result = { { { + atan2f(Y.vector4_f32[0], X.vector4_f32[0]), + atan2f(Y.vector4_f32[1], X.vector4_f32[1]), + atan2f(Y.vector4_f32[2], X.vector4_f32[2]), + atan2f(Y.vector4_f32[3], X.vector4_f32[3]), + } } }; + return Result.v; +#elif defined(_XM_SVML_INTRINSICS_) + XMVECTOR Result = _mm_atan2_ps(Y, X); + return Result; +#else + + static const XMVECTORF32 ATan2Constants = { { { XM_PI, XM_PIDIV2, XM_PIDIV4, 2.3561944905f /* Pi*3/4 */ } } }; + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR ATanResultValid = XMVectorTrueInt(); + + XMVECTOR Pi = XMVectorSplatX(ATan2Constants); + XMVECTOR PiOverTwo = XMVectorSplatY(ATan2Constants); + XMVECTOR PiOverFour = XMVectorSplatZ(ATan2Constants); + XMVECTOR ThreePiOverFour = XMVectorSplatW(ATan2Constants); + + XMVECTOR YEqualsZero = XMVectorEqual(Y, Zero); + XMVECTOR XEqualsZero = XMVectorEqual(X, Zero); + XMVECTOR XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); + XIsPositive = XMVectorEqualInt(XIsPositive, Zero); + XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); + XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); + + XMVECTOR YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); + Pi = XMVectorOrInt(Pi, YSign); + PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); + PiOverFour = XMVectorOrInt(PiOverFour, YSign); + ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); + + XMVECTOR R1 = XMVectorSelect(Pi, YSign, XIsPositive); + XMVECTOR R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); + XMVECTOR R3 = XMVectorSelect(R2, R1, YEqualsZero); + XMVECTOR R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); + XMVECTOR R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); + XMVECTOR Result = XMVectorSelect(R3, R5, YEqualsInfinity); + ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); + + XMVECTOR Reciprocal = XMVectorReciprocalEst(X); + XMVECTOR V = XMVectorMultiply(Y, Reciprocal); + XMVECTOR R0 = XMVectorATanEst(V); + + R1 = XMVectorSelect(Pi, g_XMNegativeZero, XIsPositive); + R2 = XMVectorAdd(R0, R1); + + Result = XMVectorSelect(Result, R2, ATanResultValid); + + return Result; + +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLerp +( + FXMVECTOR V0, + FXMVECTOR V1, + float t +) noexcept +{ + // V0 + t * (V1 - V0) + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Scale = XMVectorReplicate(t); + XMVECTOR Length = XMVectorSubtract(V1, V0); + return XMVectorMultiplyAdd(Length, Scale, V0); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR L = vsubq_f32(V1, V0); + return vmlaq_n_f32(V0, L, t); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR L = _mm_sub_ps(V1, V0); + XMVECTOR S = _mm_set_ps1(t); + return XM_FMADD_PS(L, S, V0); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorLerpV +( + FXMVECTOR V0, + FXMVECTOR V1, + FXMVECTOR T +) noexcept +{ + // V0 + T * (V1 - V0) + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Length = XMVectorSubtract(V1, V0); + return XMVectorMultiplyAdd(Length, T, V0); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR L = vsubq_f32(V1, V0); + return vmlaq_f32(V0, L, T); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR Length = _mm_sub_ps(V1, V0); + return XM_FMADD_PS(Length, T, V0); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorHermite +( + FXMVECTOR Position0, + FXMVECTOR Tangent0, + FXMVECTOR Position1, + GXMVECTOR Tangent1, + float t +) noexcept +{ + // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + + // (t^3 - 2 * t^2 + t) * Tangent0 + + // (-2 * t^3 + 3 * t^2) * Position1 + + // (t^3 - t^2) * Tangent1 + +#if defined(_XM_NO_INTRINSICS_) + + float t2 = t * t; + float t3 = t * t2; + + XMVECTOR P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f); + XMVECTOR T0 = XMVectorReplicate(t3 - 2.0f * t2 + t); + XMVECTOR P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2); + XMVECTOR T1 = XMVectorReplicate(t3 - t2); + + XMVECTOR Result = XMVectorMultiply(P0, Position0); + Result = XMVectorMultiplyAdd(T0, Tangent0, Result); + Result = XMVectorMultiplyAdd(P1, Position1, Result); + Result = XMVectorMultiplyAdd(T1, Tangent1, Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float t2 = t * t; + float t3 = t * t2; + + float p0 = 2.0f * t3 - 3.0f * t2 + 1.0f; + float t0 = t3 - 2.0f * t2 + t; + float p1 = -2.0f * t3 + 3.0f * t2; + float t1 = t3 - t2; + + XMVECTOR vResult = vmulq_n_f32(Position0, p0); + vResult = vmlaq_n_f32(vResult, Tangent0, t0); + vResult = vmlaq_n_f32(vResult, Position1, p1); + vResult = vmlaq_n_f32(vResult, Tangent1, t1); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + float t2 = t * t; + float t3 = t * t2; + + XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f); + XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t); + XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2); + XMVECTOR T1 = _mm_set_ps1(t3 - t2); + + XMVECTOR vResult = _mm_mul_ps(P0, Position0); + vResult = XM_FMADD_PS(Tangent0, T0, vResult); + vResult = XM_FMADD_PS(Position1, P1, vResult); + vResult = XM_FMADD_PS(Tangent1, T1, vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorHermiteV +( + FXMVECTOR Position0, + FXMVECTOR Tangent0, + FXMVECTOR Position1, + GXMVECTOR Tangent1, + HXMVECTOR T +) noexcept +{ + // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + + // (t^3 - 2 * t^2 + t) * Tangent0 + + // (-2 * t^3 + 3 * t^2) * Position1 + + // (t^3 - t^2) * Tangent1 + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR T2 = XMVectorMultiply(T, T); + XMVECTOR T3 = XMVectorMultiply(T, T2); + + XMVECTOR P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f); + XMVECTOR T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]); + XMVECTOR P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]); + XMVECTOR T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]); + + XMVECTOR Result = XMVectorMultiply(P0, Position0); + Result = XMVectorMultiplyAdd(T0, Tangent0, Result); + Result = XMVectorMultiplyAdd(P1, Position1, Result); + Result = XMVectorMultiplyAdd(T1, Tangent1, Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 CatMulT2 = { { { -3.0f, -2.0f, 3.0f, -1.0f } } }; + static const XMVECTORF32 CatMulT3 = { { { 2.0f, 1.0f, -2.0f, 1.0f } } }; + + XMVECTOR T2 = vmulq_f32(T, T); + XMVECTOR T3 = vmulq_f32(T, T2); + // Mul by the constants against t^2 + T2 = vmulq_f32(T2, CatMulT2); + // Mul by the constants against t^3 + T3 = vmlaq_f32(T2, T3, CatMulT3); + // T3 now has the pre-result. + // I need to add t.y only + T2 = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T), g_XMMaskY)); + T3 = vaddq_f32(T3, T2); + // Add 1.0f to x + T3 = vaddq_f32(T3, g_XMIdentityR0); + // Now, I have the constants created + // Mul the x constant to Position0 + XMVECTOR vResult = vmulq_lane_f32(Position0, vget_low_f32(T3), 0); // T3[0] + // Mul the y constant to Tangent0 + vResult = vmlaq_lane_f32(vResult, Tangent0, vget_low_f32(T3), 1); // T3[1] + // Mul the z constant to Position1 + vResult = vmlaq_lane_f32(vResult, Position1, vget_high_f32(T3), 0); // T3[2] + // Mul the w constant to Tangent1 + vResult = vmlaq_lane_f32(vResult, Tangent1, vget_high_f32(T3), 1); // T3[3] + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 CatMulT2 = { { { -3.0f, -2.0f, 3.0f, -1.0f } } }; + static const XMVECTORF32 CatMulT3 = { { { 2.0f, 1.0f, -2.0f, 1.0f } } }; + + XMVECTOR T2 = _mm_mul_ps(T, T); + XMVECTOR T3 = _mm_mul_ps(T, T2); + // Mul by the constants against t^2 + T2 = _mm_mul_ps(T2, CatMulT2); + // Mul by the constants against t^3 + T3 = XM_FMADD_PS(T3, CatMulT3, T2); + // T3 now has the pre-result. + // I need to add t.y only + T2 = _mm_and_ps(T, g_XMMaskY); + T3 = _mm_add_ps(T3, T2); + // Add 1.0f to x + T3 = _mm_add_ps(T3, g_XMIdentityR0); + // Now, I have the constants created + // Mul the x constant to Position0 + XMVECTOR vResult = XM_PERMUTE_PS(T3, _MM_SHUFFLE(0, 0, 0, 0)); + vResult = _mm_mul_ps(vResult, Position0); + // Mul the y constant to Tangent0 + T2 = XM_PERMUTE_PS(T3, _MM_SHUFFLE(1, 1, 1, 1)); + vResult = XM_FMADD_PS(T2, Tangent0, vResult); + // Mul the z constant to Position1 + T2 = XM_PERMUTE_PS(T3, _MM_SHUFFLE(2, 2, 2, 2)); + vResult = XM_FMADD_PS(T2, Position1, vResult); + // Mul the w constant to Tangent1 + T3 = XM_PERMUTE_PS(T3, _MM_SHUFFLE(3, 3, 3, 3)); + vResult = XM_FMADD_PS(T3, Tangent1, vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorCatmullRom +( + FXMVECTOR Position0, + FXMVECTOR Position1, + FXMVECTOR Position2, + GXMVECTOR Position3, + float t +) noexcept +{ + // Result = ((-t^3 + 2 * t^2 - t) * Position0 + + // (3 * t^3 - 5 * t^2 + 2) * Position1 + + // (-3 * t^3 + 4 * t^2 + t) * Position2 + + // (t^3 - t^2) * Position3) * 0.5 + +#if defined(_XM_NO_INTRINSICS_) + + float t2 = t * t; + float t3 = t * t2; + + XMVECTOR P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f); + XMVECTOR P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); + XMVECTOR P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); + XMVECTOR P3 = XMVectorReplicate((t3 - t2) * 0.5f); + + XMVECTOR Result = XMVectorMultiply(P0, Position0); + Result = XMVectorMultiplyAdd(P1, Position1, Result); + Result = XMVectorMultiplyAdd(P2, Position2, Result); + Result = XMVectorMultiplyAdd(P3, Position3, Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float t2 = t * t; + float t3 = t * t2; + + float p0 = (-t3 + 2.0f * t2 - t) * 0.5f; + float p1 = (3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f; + float p2 = (-3.0f * t3 + 4.0f * t2 + t) * 0.5f; + float p3 = (t3 - t2) * 0.5f; + + XMVECTOR P1 = vmulq_n_f32(Position1, p1); + XMVECTOR P0 = vmlaq_n_f32(P1, Position0, p0); + XMVECTOR P3 = vmulq_n_f32(Position3, p3); + XMVECTOR P2 = vmlaq_n_f32(P3, Position2, p2); + P0 = vaddq_f32(P0, P2); + return P0; +#elif defined(_XM_SSE_INTRINSICS_) + float t2 = t * t; + float t3 = t * t2; + + XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f); + XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); + XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); + XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f); + + P1 = _mm_mul_ps(Position1, P1); + P0 = XM_FMADD_PS(Position0, P0, P1); + P3 = _mm_mul_ps(Position3, P3); + P2 = XM_FMADD_PS(Position2, P2, P3); + P0 = _mm_add_ps(P0, P2); + return P0; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorCatmullRomV +( + FXMVECTOR Position0, + FXMVECTOR Position1, + FXMVECTOR Position2, + GXMVECTOR Position3, + HXMVECTOR T +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float fx = T.vector4_f32[0]; + float fy = T.vector4_f32[1]; + float fz = T.vector4_f32[2]; + float fw = T.vector4_f32[3]; + XMVECTORF32 vResult = { { { + 0.5f * ((-fx * fx * fx + 2 * fx * fx - fx) * Position0.vector4_f32[0] + + (3 * fx * fx * fx - 5 * fx * fx + 2) * Position1.vector4_f32[0] + + (-3 * fx * fx * fx + 4 * fx * fx + fx) * Position2.vector4_f32[0] + + (fx * fx * fx - fx * fx) * Position3.vector4_f32[0]), + + 0.5f * ((-fy * fy * fy + 2 * fy * fy - fy) * Position0.vector4_f32[1] + + (3 * fy * fy * fy - 5 * fy * fy + 2) * Position1.vector4_f32[1] + + (-3 * fy * fy * fy + 4 * fy * fy + fy) * Position2.vector4_f32[1] + + (fy * fy * fy - fy * fy) * Position3.vector4_f32[1]), + + 0.5f * ((-fz * fz * fz + 2 * fz * fz - fz) * Position0.vector4_f32[2] + + (3 * fz * fz * fz - 5 * fz * fz + 2) * Position1.vector4_f32[2] + + (-3 * fz * fz * fz + 4 * fz * fz + fz) * Position2.vector4_f32[2] + + (fz * fz * fz - fz * fz) * Position3.vector4_f32[2]), + + 0.5f * ((-fw * fw * fw + 2 * fw * fw - fw) * Position0.vector4_f32[3] + + (3 * fw * fw * fw - 5 * fw * fw + 2) * Position1.vector4_f32[3] + + (-3 * fw * fw * fw + 4 * fw * fw + fw) * Position2.vector4_f32[3] + + (fw * fw * fw - fw * fw) * Position3.vector4_f32[3]) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Catmul2 = { { { 2.0f, 2.0f, 2.0f, 2.0f } } }; + static const XMVECTORF32 Catmul3 = { { { 3.0f, 3.0f, 3.0f, 3.0f } } }; + static const XMVECTORF32 Catmul4 = { { { 4.0f, 4.0f, 4.0f, 4.0f } } }; + static const XMVECTORF32 Catmul5 = { { { 5.0f, 5.0f, 5.0f, 5.0f } } }; + // Cache T^2 and T^3 + XMVECTOR T2 = vmulq_f32(T, T); + XMVECTOR T3 = vmulq_f32(T, T2); + // Perform the Position0 term + XMVECTOR vResult = vaddq_f32(T2, T2); + vResult = vsubq_f32(vResult, T); + vResult = vsubq_f32(vResult, T3); + vResult = vmulq_f32(vResult, Position0); + // Perform the Position1 term and add + XMVECTOR vTemp = vmulq_f32(T3, Catmul3); + vTemp = vmlsq_f32(vTemp, T2, Catmul5); + vTemp = vaddq_f32(vTemp, Catmul2); + vResult = vmlaq_f32(vResult, vTemp, Position1); + // Perform the Position2 term and add + vTemp = vmulq_f32(T2, Catmul4); + vTemp = vmlsq_f32(vTemp, T3, Catmul3); + vTemp = vaddq_f32(vTemp, T); + vResult = vmlaq_f32(vResult, vTemp, Position2); + // Position3 is the last term + T3 = vsubq_f32(T3, T2); + vResult = vmlaq_f32(vResult, T3, Position3); + // Multiply by 0.5f and exit + vResult = vmulq_f32(vResult, g_XMOneHalf); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Catmul2 = { { { 2.0f, 2.0f, 2.0f, 2.0f } } }; + static const XMVECTORF32 Catmul3 = { { { 3.0f, 3.0f, 3.0f, 3.0f } } }; + static const XMVECTORF32 Catmul4 = { { { 4.0f, 4.0f, 4.0f, 4.0f } } }; + static const XMVECTORF32 Catmul5 = { { { 5.0f, 5.0f, 5.0f, 5.0f } } }; + // Cache T^2 and T^3 + XMVECTOR T2 = _mm_mul_ps(T, T); + XMVECTOR T3 = _mm_mul_ps(T, T2); + // Perform the Position0 term + XMVECTOR vResult = _mm_add_ps(T2, T2); + vResult = _mm_sub_ps(vResult, T); + vResult = _mm_sub_ps(vResult, T3); + vResult = _mm_mul_ps(vResult, Position0); + // Perform the Position1 term and add + XMVECTOR vTemp = _mm_mul_ps(T3, Catmul3); + vTemp = XM_FNMADD_PS(T2, Catmul5, vTemp); + vTemp = _mm_add_ps(vTemp, Catmul2); + vResult = XM_FMADD_PS(vTemp, Position1, vResult); + // Perform the Position2 term and add + vTemp = _mm_mul_ps(T2, Catmul4); + vTemp = XM_FNMADD_PS(T3, Catmul3, vTemp); + vTemp = _mm_add_ps(vTemp, T); + vResult = XM_FMADD_PS(vTemp, Position2, vResult); + // Position3 is the last term + T3 = _mm_sub_ps(T3, T2); + vResult = XM_FMADD_PS(T3, Position3, vResult); + // Multiply by 0.5f and exit + vResult = _mm_mul_ps(vResult, g_XMOneHalf); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorBaryCentric +( + FXMVECTOR Position0, + FXMVECTOR Position1, + FXMVECTOR Position2, + float f, + float g +) noexcept +{ + // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0) + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR P10 = XMVectorSubtract(Position1, Position0); + XMVECTOR ScaleF = XMVectorReplicate(f); + + XMVECTOR P20 = XMVectorSubtract(Position2, Position0); + XMVECTOR ScaleG = XMVectorReplicate(g); + + XMVECTOR Result = XMVectorMultiplyAdd(P10, ScaleF, Position0); + Result = XMVectorMultiplyAdd(P20, ScaleG, Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR R1 = vsubq_f32(Position1, Position0); + XMVECTOR R2 = vsubq_f32(Position2, Position0); + R1 = vmlaq_n_f32(Position0, R1, f); + return vmlaq_n_f32(R1, R2, g); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR R1 = _mm_sub_ps(Position1, Position0); + XMVECTOR R2 = _mm_sub_ps(Position2, Position0); + XMVECTOR SF = _mm_set_ps1(f); + R1 = XM_FMADD_PS(R1, SF, Position0); + XMVECTOR SG = _mm_set_ps1(g); + return XM_FMADD_PS(R2, SG, R1); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVectorBaryCentricV +( + FXMVECTOR Position0, + FXMVECTOR Position1, + FXMVECTOR Position2, + GXMVECTOR F, + HXMVECTOR G +) noexcept +{ + // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0) + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR P10 = XMVectorSubtract(Position1, Position0); + XMVECTOR P20 = XMVectorSubtract(Position2, Position0); + + XMVECTOR Result = XMVectorMultiplyAdd(P10, F, Position0); + Result = XMVectorMultiplyAdd(P20, G, Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR R1 = vsubq_f32(Position1, Position0); + XMVECTOR R2 = vsubq_f32(Position2, Position0); + R1 = vmlaq_f32(Position0, R1, F); + return vmlaq_f32(R1, R2, G); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR R1 = _mm_sub_ps(Position1, Position0); + XMVECTOR R2 = _mm_sub_ps(Position2, Position0); + R1 = XM_FMADD_PS(R1, F, Position0); + return XM_FMADD_PS(R2, G, R1); +#endif +} + +/**************************************************************************** + * + * 2D Vector + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2Equal +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vceq_f32(vget_low_f32(V1), vget_low_f32(V2)); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + // z and w are don't care + return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); +#endif +} + + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector2EqualR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + uint32_t CR = 0; + if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && + (V1.vector4_f32[1] == V2.vector4_f32[1])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && + (V1.vector4_f32[1] != V2.vector4_f32[1])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vceq_f32(vget_low_f32(V1), vget_low_f32(V2)); + uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); + uint32_t CR = 0; + if (r == 0xFFFFFFFFFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + // z and w are don't care + int iTest = _mm_movemask_ps(vTemp) & 3; + uint32_t CR = 0; + if (iTest == 3) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2EqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vceq_u32(vget_low_u32(vreinterpretq_u32_f32(V1)), vget_low_u32(vreinterpretq_u32_f32(V2))); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 3) == 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector2EqualIntR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + uint32_t CR = 0; + if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && + (V1.vector4_u32[1] == V2.vector4_u32[1])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && + (V1.vector4_u32[1] != V2.vector4_u32[1])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vceq_u32(vget_low_u32(vreinterpretq_u32_f32(V1)), vget_low_u32(vreinterpretq_u32_f32(V2))); + uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); + uint32_t CR = 0; + if (r == 0xFFFFFFFFFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + int iTest = _mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 3; + uint32_t CR = 0; + if (iTest == 3) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2NearEqual +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR Epsilon +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float dx = fabsf(V1.vector4_f32[0] - V2.vector4_f32[0]); + float dy = fabsf(V1.vector4_f32[1] - V2.vector4_f32[1]); + return ((dx <= Epsilon.vector4_f32[0]) && + (dy <= Epsilon.vector4_f32[1])); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t vDelta = vsub_f32(vget_low_f32(V1), vget_low_f32(V2)); +#ifdef _MSC_VER + uint32x2_t vTemp = vacle_f32(vDelta, vget_low_u32(Epsilon)); +#else + uint32x2_t vTemp = vcle_f32(vabs_f32(vDelta), vget_low_f32(Epsilon)); +#endif + uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); + return (r == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Get the difference + XMVECTOR vDelta = _mm_sub_ps(V1, V2); + // Get the absolute value of the difference + XMVECTOR vTemp = _mm_setzero_ps(); + vTemp = _mm_sub_ps(vTemp, vDelta); + vTemp = _mm_max_ps(vTemp, vDelta); + vTemp = _mm_cmple_ps(vTemp, Epsilon); + // z and w are don't care + return (((_mm_movemask_ps(vTemp) & 3) == 0x3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2NotEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vceq_f32(vget_low_f32(V1), vget_low_f32(V2)); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) != 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + // z and w are don't care + return (((_mm_movemask_ps(vTemp) & 3) != 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2NotEqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vceq_u32(vget_low_u32(vreinterpretq_u32_f32(V1)), vget_low_u32(vreinterpretq_u32_f32(V2))); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) != 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 3) != 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2Greater +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vcgt_f32(vget_low_f32(V1), vget_low_f32(V2)); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + // z and w are don't care + return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector2GreaterR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + uint32_t CR = 0; + if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && + (V1.vector4_f32[1] > V2.vector4_f32[1])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && + (V1.vector4_f32[1] <= V2.vector4_f32[1])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vcgt_f32(vget_low_f32(V1), vget_low_f32(V2)); + uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); + uint32_t CR = 0; + if (r == 0xFFFFFFFFFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + int iTest = _mm_movemask_ps(vTemp) & 3; + uint32_t CR = 0; + if (iTest == 3) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2GreaterOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vcge_f32(vget_low_f32(V1), vget_low_f32(V2)); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector2GreaterOrEqualR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + uint32_t CR = 0; + if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && + (V1.vector4_f32[1] >= V2.vector4_f32[1])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && + (V1.vector4_f32[1] < V2.vector4_f32[1])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vcge_f32(vget_low_f32(V1), vget_low_f32(V2)); + uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); + uint32_t CR = 0; + if (r == 0xFFFFFFFFFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + int iTest = _mm_movemask_ps(vTemp) & 3; + uint32_t CR = 0; + if (iTest == 3) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2Less +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vclt_f32(vget_low_f32(V1), vget_low_f32(V2)); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmplt_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2LessOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vTemp = vcle_f32(vget_low_f32(V1), vget_low_f32(V2)); + return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmple_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2InBounds +( + FXMVECTOR V, + FXMVECTOR Bounds +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && + (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + float32x2_t B = vget_low_f32(Bounds); + // Test if less than or equal + uint32x2_t ivTemp1 = vcle_f32(VL, B); + // Negate the bounds + float32x2_t vTemp2 = vneg_f32(B); + // Test if greater or equal (Reversed) + uint32x2_t ivTemp2 = vcle_f32(vTemp2, VL); + // Blend answers + ivTemp1 = vand_u32(ivTemp1, ivTemp2); + // x and y in bounds? + return (vget_lane_u64(vreinterpret_u64_u32(ivTemp1), 0) == 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Test if less than or equal + XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); + // Negate the bounds + XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); + // Test if greater or equal (Reversed) + vTemp2 = _mm_cmple_ps(vTemp2, V); + // Blend answers + vTemp1 = _mm_and_ps(vTemp1, vTemp2); + // x and y in bounds? (z and w are don't care) + return (((_mm_movemask_ps(vTemp1) & 0x3) == 0x3) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(push) +#pragma float_control(precise, on) +#endif + +inline bool XM_CALLCONV XMVector2IsNaN(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (XMISNAN(V.vector4_f32[0]) || + XMISNAN(V.vector4_f32[1])); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Test against itself. NaN is always not equal + uint32x2_t vTempNan = vceq_f32(VL, VL); + // If x or y are NaN, the mask is zero + return (vget_lane_u64(vreinterpret_u64_u32(vTempNan), 0) != 0xFFFFFFFFFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Test against itself. NaN is always not equal + XMVECTOR vTempNan = _mm_cmpneq_ps(V, V); + // If x or y are NaN, the mask is non-zero + return ((_mm_movemask_ps(vTempNan) & 3) != 0); +#endif +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(pop) +#endif + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector2IsInfinite(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + return (XMISINF(V.vector4_f32[0]) || + XMISINF(V.vector4_f32[1])); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Mask off the sign bit + uint32x2_t vTemp = vand_u32(vget_low_u32(vreinterpretq_u32_f32(V)), vget_low_u32(g_XMAbsMask)); + // Compare to infinity + vTemp = vceq_f32(vreinterpret_f32_u32(vTemp), vget_low_f32(g_XMInfinity)); + // If any are infinity, the signs are true. + return vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) != 0; +#elif defined(_XM_SSE_INTRINSICS_) + // Mask off the sign bit + __m128 vTemp = _mm_and_ps(V, g_XMAbsMask); + // Compare to infinity + vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); + // If x or z are infinity, the signs are true. + return ((_mm_movemask_ps(vTemp) & 3) != 0); +#endif +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Dot +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result; + Result.f[0] = + Result.f[1] = + Result.f[2] = + Result.f[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1]; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Perform the dot product on x and y + float32x2_t vTemp = vmul_f32(vget_low_f32(V1), vget_low_f32(V2)); + vTemp = vpadd_f32(vTemp, vTemp); + return vcombine_f32(vTemp, vTemp); +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_dp_ps(V1, V2, 0x3f); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vDot = _mm_mul_ps(V1, V2); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_moveldup_ps(vDot); + return vDot; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y + XMVECTOR vLengthSq = _mm_mul_ps(V1, V2); + // vTemp has y splatted + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + // x+y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Cross +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + // [ V1.x*V2.y - V1.y*V2.x, V1.x*V2.y - V1.y*V2.x ] + +#if defined(_XM_NO_INTRINSICS_) + float fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]); + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = fCross; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Negate = { { { 1.f, -1.f, 0, 0 } } }; + + float32x2_t vTemp = vmul_f32(vget_low_f32(V1), vrev64_f32(vget_low_f32(V2))); + vTemp = vmul_f32(vTemp, vget_low_f32(Negate)); + vTemp = vpadd_f32(vTemp, vTemp); + return vcombine_f32(vTemp, vTemp); +#elif defined(_XM_SSE_INTRINSICS_) + // Swap x and y + XMVECTOR vResult = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 1, 0, 1)); + // Perform the muls + vResult = _mm_mul_ps(vResult, V1); + // Splat y + XMVECTOR vTemp = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1)); + // Sub the values + vResult = _mm_sub_ss(vResult, vTemp); + // Splat the cross product + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 0, 0, 0)); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2LengthSq(FXMVECTOR V) noexcept +{ + return XMVector2Dot(V, V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector2LengthSq(V); + Result = XMVectorReciprocalSqrtEst(Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Dot2 + float32x2_t vTemp = vmul_f32(VL, VL); + vTemp = vpadd_f32(vTemp, vTemp); + // Reciprocal sqrt (estimate) + vTemp = vrsqrte_f32(vTemp); + return vcombine_f32(vTemp, vTemp); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); + return _mm_rsqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_rsqrt_ss(vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has y splatted + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + // x+y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = _mm_rsqrt_ss(vLengthSq); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector2LengthSq(V); + Result = XMVectorReciprocalSqrt(Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Dot2 + float32x2_t vTemp = vmul_f32(VL, VL); + vTemp = vpadd_f32(vTemp, vTemp); + // Reciprocal sqrt + float32x2_t S0 = vrsqrte_f32(vTemp); + float32x2_t P0 = vmul_f32(vTemp, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(vTemp, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t Result = vmul_f32(S1, R1); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); + XMVECTOR vLengthSq = _mm_sqrt_ps(vTemp); + return _mm_div_ps(g_XMOne, vLengthSq); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ss(vTemp); + vLengthSq = _mm_div_ss(g_XMOne, vLengthSq); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has y splatted + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + // x+y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = _mm_sqrt_ss(vLengthSq); + vLengthSq = _mm_div_ss(g_XMOne, vLengthSq); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2LengthEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector2LengthSq(V); + Result = XMVectorSqrtEst(Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Dot2 + float32x2_t vTemp = vmul_f32(VL, VL); + vTemp = vpadd_f32(vTemp, vTemp); + const float32x2_t zero = vdup_n_f32(0); + uint32x2_t VEqualsZero = vceq_f32(vTemp, zero); + // Sqrt (estimate) + float32x2_t Result = vrsqrte_f32(vTemp); + Result = vmul_f32(vTemp, Result); + Result = vbsl_f32(VEqualsZero, zero, Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); + return _mm_sqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ss(vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has y splatted + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + // x+y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = _mm_sqrt_ss(vLengthSq); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Length(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector2LengthSq(V); + Result = XMVectorSqrt(Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Dot2 + float32x2_t vTemp = vmul_f32(VL, VL); + vTemp = vpadd_f32(vTemp, vTemp); + const float32x2_t zero = vdup_n_f32(0); + uint32x2_t VEqualsZero = vceq_f32(vTemp, zero); + // Sqrt + float32x2_t S0 = vrsqrte_f32(vTemp); + float32x2_t P0 = vmul_f32(vTemp, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(vTemp, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t Result = vmul_f32(S1, R1); + Result = vmul_f32(vTemp, Result); + Result = vbsl_f32(VEqualsZero, zero, Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); + return _mm_sqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ss(vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has y splatted + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + // x+y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ +// XMVector2NormalizeEst uses a reciprocal estimate and +// returns QNaN on zero and infinite vectors. + +inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector2ReciprocalLength(V); + Result = XMVectorMultiply(V, Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Dot2 + float32x2_t vTemp = vmul_f32(VL, VL); + vTemp = vpadd_f32(vTemp, vTemp); + // Reciprocal sqrt (estimate) + vTemp = vrsqrte_f32(vTemp); + // Normalize + float32x2_t Result = vmul_f32(VL, vTemp); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); + XMVECTOR vResult = _mm_rsqrt_ps(vTemp); + return _mm_mul_ps(vResult, V); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_rsqrt_ss(vLengthSq); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + vLengthSq = _mm_mul_ps(vLengthSq, V); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has y splatted + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + // x+y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = _mm_rsqrt_ss(vLengthSq); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + vLengthSq = _mm_mul_ps(vLengthSq, V); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Normalize(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR vResult = XMVector2Length(V); + float fLength = vResult.vector4_f32[0]; + + // Prevent divide by zero + if (fLength > 0) + { + fLength = 1.0f / fLength; + } + + vResult.vector4_f32[0] = V.vector4_f32[0] * fLength; + vResult.vector4_f32[1] = V.vector4_f32[1] * fLength; + vResult.vector4_f32[2] = V.vector4_f32[2] * fLength; + vResult.vector4_f32[3] = V.vector4_f32[3] * fLength; + return vResult; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + // Dot2 + float32x2_t vTemp = vmul_f32(VL, VL); + vTemp = vpadd_f32(vTemp, vTemp); + uint32x2_t VEqualsZero = vceq_f32(vTemp, vdup_n_f32(0)); + uint32x2_t VEqualsInf = vceq_f32(vTemp, vget_low_f32(g_XMInfinity)); + // Reciprocal sqrt (2 iterations of Newton-Raphson) + float32x2_t S0 = vrsqrte_f32(vTemp); + float32x2_t P0 = vmul_f32(vTemp, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(vTemp, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + vTemp = vmul_f32(S1, R1); + // Normalize + float32x2_t Result = vmul_f32(VL, vTemp); + Result = vbsl_f32(VEqualsZero, vdup_n_f32(0), Result); + Result = vbsl_f32(VEqualsInf, vget_low_f32(g_XMQNaN), Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vLengthSq = _mm_dp_ps(V, V, 0x3f); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(_XM_SSE3_INTRINSICS_) + // Perform the dot product on x and y only + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_moveldup_ps(vLengthSq); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x and y only + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2ClampLength +( + FXMVECTOR V, + float LengthMin, + float LengthMax +) noexcept +{ + XMVECTOR ClampMax = XMVectorReplicate(LengthMax); + XMVECTOR ClampMin = XMVectorReplicate(LengthMin); + return XMVector2ClampLengthV(V, ClampMin, ClampMax); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2ClampLengthV +( + FXMVECTOR V, + FXMVECTOR LengthMin, + FXMVECTOR LengthMax +) noexcept +{ + assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin))); + assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax))); + assert(XMVector2GreaterOrEqual(LengthMin, g_XMZero)); + assert(XMVector2GreaterOrEqual(LengthMax, g_XMZero)); + assert(XMVector2GreaterOrEqual(LengthMax, LengthMin)); + + XMVECTOR LengthSq = XMVector2LengthSq(V); + + const XMVECTOR Zero = XMVectorZero(); + + XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq); + + XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); + XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero); + + XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength); + + XMVECTOR Normal = XMVectorMultiply(V, RcpLength); + + XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength); + Length = XMVectorSelect(LengthSq, Length, Select); + Normal = XMVectorSelect(LengthSq, Normal, Select); + + XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax); + XMVECTOR ControlMin = XMVectorLess(Length, LengthMin); + + XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); + ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); + + XMVECTOR Result = XMVectorMultiply(Normal, ClampLength); + + // Preserve the original vector (with no precision loss) if the length falls within the given range + XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin); + Result = XMVectorSelect(Result, V, Control); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Reflect +( + FXMVECTOR Incident, + FXMVECTOR Normal +) noexcept +{ + // Result = Incident - (2 * dot(Incident, Normal)) * Normal + + XMVECTOR Result; + Result = XMVector2Dot(Incident, Normal); + Result = XMVectorAdd(Result, Result); + Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Refract +( + FXMVECTOR Incident, + FXMVECTOR Normal, + float RefractionIndex +) noexcept +{ + XMVECTOR Index = XMVectorReplicate(RefractionIndex); + return XMVector2RefractV(Incident, Normal, Index); +} + +//------------------------------------------------------------------------------ + +// Return the refraction of a 2D vector +inline XMVECTOR XM_CALLCONV XMVector2RefractV +( + FXMVECTOR Incident, + FXMVECTOR Normal, + FXMVECTOR RefractionIndex +) noexcept +{ + // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + + // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) + +#if defined(_XM_NO_INTRINSICS_) + + float IDotN = (Incident.vector4_f32[0] * Normal.vector4_f32[0]) + (Incident.vector4_f32[1] * Normal.vector4_f32[1]); + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + float RY = 1.0f - (IDotN * IDotN); + float RX = 1.0f - (RY * RefractionIndex.vector4_f32[0] * RefractionIndex.vector4_f32[0]); + RY = 1.0f - (RY * RefractionIndex.vector4_f32[1] * RefractionIndex.vector4_f32[1]); + if (RX >= 0.0f) + { + RX = (RefractionIndex.vector4_f32[0] * Incident.vector4_f32[0]) - (Normal.vector4_f32[0] * ((RefractionIndex.vector4_f32[0] * IDotN) + sqrtf(RX))); + } + else + { + RX = 0.0f; + } + if (RY >= 0.0f) + { + RY = (RefractionIndex.vector4_f32[1] * Incident.vector4_f32[1]) - (Normal.vector4_f32[1] * ((RefractionIndex.vector4_f32[1] * IDotN) + sqrtf(RY))); + } + else + { + RY = 0.0f; + } + + XMVECTOR vResult; + vResult.vector4_f32[0] = RX; + vResult.vector4_f32[1] = RY; + vResult.vector4_f32[2] = 0.0f; + vResult.vector4_f32[3] = 0.0f; + return vResult; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t IL = vget_low_f32(Incident); + float32x2_t NL = vget_low_f32(Normal); + float32x2_t RIL = vget_low_f32(RefractionIndex); + // Get the 2D Dot product of Incident-Normal + float32x2_t vTemp = vmul_f32(IL, NL); + float32x2_t IDotN = vpadd_f32(vTemp, vTemp); + // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + vTemp = vmls_f32(vget_low_f32(g_XMOne), IDotN, IDotN); + vTemp = vmul_f32(vTemp, RIL); + vTemp = vmls_f32(vget_low_f32(g_XMOne), vTemp, RIL); + // If any terms are <=0, sqrt() will fail, punt to zero + uint32x2_t vMask = vcgt_f32(vTemp, vget_low_f32(g_XMZero)); + // Sqrt(vTemp) + float32x2_t S0 = vrsqrte_f32(vTemp); + float32x2_t P0 = vmul_f32(vTemp, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(vTemp, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t S2 = vmul_f32(S1, R1); + vTemp = vmul_f32(vTemp, S2); + // R = RefractionIndex * IDotN + sqrt(R) + vTemp = vmla_f32(vTemp, RIL, IDotN); + // Result = RefractionIndex * Incident - Normal * R + float32x2_t vResult = vmul_f32(RIL, IL); + vResult = vmls_f32(vResult, vTemp, NL); + vResult = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(vResult), vMask)); + return vcombine_f32(vResult, vResult); +#elif defined(_XM_SSE_INTRINSICS_) + // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + + // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) + // Get the 2D Dot product of Incident-Normal + XMVECTOR IDotN = XMVector2Dot(Incident, Normal); + // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + XMVECTOR vTemp = XM_FNMADD_PS(IDotN, IDotN, g_XMOne); + vTemp = _mm_mul_ps(vTemp, RefractionIndex); + vTemp = XM_FNMADD_PS(vTemp, RefractionIndex, g_XMOne); + // If any terms are <=0, sqrt() will fail, punt to zero + XMVECTOR vMask = _mm_cmpgt_ps(vTemp, g_XMZero); + // R = RefractionIndex * IDotN + sqrt(R) + vTemp = _mm_sqrt_ps(vTemp); + vTemp = XM_FMADD_PS(RefractionIndex, IDotN, vTemp); + // Result = RefractionIndex * Incident - Normal * R + XMVECTOR vResult = _mm_mul_ps(RefractionIndex, Incident); + vResult = XM_FNMADD_PS(vTemp, Normal, vResult); + vResult = _mm_and_ps(vResult, vMask); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Orthogonal(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + -V.vector4_f32[1], + V.vector4_f32[0], + 0.f, + 0.f + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Negate = { { { -1.f, 1.f, 0, 0 } } }; + const float32x2_t zero = vdup_n_f32(0); + + float32x2_t VL = vget_low_f32(V); + float32x2_t Result = vmul_f32(vrev64_f32(VL), vget_low_f32(Negate)); + return vcombine_f32(Result, zero); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); + vResult = _mm_mul_ps(vResult, g_XMNegateX); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2AngleBetweenNormalsEst +( + FXMVECTOR N1, + FXMVECTOR N2 +) noexcept +{ + XMVECTOR Result = XMVector2Dot(N1, N2); + Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); + Result = XMVectorACosEst(Result); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2AngleBetweenNormals +( + FXMVECTOR N1, + FXMVECTOR N2 +) noexcept +{ + XMVECTOR Result = XMVector2Dot(N1, N2); + Result = XMVectorClamp(Result, g_XMNegativeOne, g_XMOne); + Result = XMVectorACos(Result); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2AngleBetweenVectors +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + XMVECTOR L1 = XMVector2ReciprocalLength(V1); + XMVECTOR L2 = XMVector2ReciprocalLength(V2); + + XMVECTOR Dot = XMVector2Dot(V1, V2); + + L1 = XMVectorMultiply(L1, L2); + + XMVECTOR CosAngle = XMVectorMultiply(Dot, L1); + CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v); + + return XMVectorACos(CosAngle); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2LinePointDistance +( + FXMVECTOR LinePoint1, + FXMVECTOR LinePoint2, + FXMVECTOR Point +) noexcept +{ + // Given a vector PointVector from LinePoint1 to Point and a vector + // LineVector from LinePoint1 to LinePoint2, the scaled distance + // PointProjectionScale from LinePoint1 to the perpendicular projection + // of PointVector onto the line is defined as: + // + // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector) + + XMVECTOR PointVector = XMVectorSubtract(Point, LinePoint1); + XMVECTOR LineVector = XMVectorSubtract(LinePoint2, LinePoint1); + + XMVECTOR LengthSq = XMVector2LengthSq(LineVector); + + XMVECTOR PointProjectionScale = XMVector2Dot(PointVector, LineVector); + PointProjectionScale = XMVectorDivide(PointProjectionScale, LengthSq); + + XMVECTOR DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale); + DistanceVector = XMVectorSubtract(PointVector, DistanceVector); + + return XMVector2Length(DistanceVector); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2IntersectLine +( + FXMVECTOR Line1Point1, + FXMVECTOR Line1Point2, + FXMVECTOR Line2Point1, + GXMVECTOR Line2Point2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR V1 = XMVectorSubtract(Line1Point2, Line1Point1); + XMVECTOR V2 = XMVectorSubtract(Line2Point2, Line2Point1); + XMVECTOR V3 = XMVectorSubtract(Line1Point1, Line2Point1); + + XMVECTOR C1 = XMVector2Cross(V1, V2); + XMVECTOR C2 = XMVector2Cross(V2, V3); + + XMVECTOR Result; + const XMVECTOR Zero = XMVectorZero(); + if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v)) + { + if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v)) + { + // Coincident + Result = g_XMInfinity.v; + } + else + { + // Parallel + Result = g_XMQNaN.v; + } + } + else + { + // Intersection point = Line1Point1 + V1 * (C2 / C1) + XMVECTOR Scale = XMVectorReciprocal(C1); + Scale = XMVectorMultiply(C2, Scale); + Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1); + } + + return Result; + +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1); + XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1); + XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1); + // Generate the cross products + XMVECTOR C1 = XMVector2Cross(V1, V2); + XMVECTOR C2 = XMVector2Cross(V2, V3); + // If C1 is not close to epsilon, use the calculated value + XMVECTOR vResultMask = _mm_setzero_ps(); + vResultMask = _mm_sub_ps(vResultMask, C1); + vResultMask = _mm_max_ps(vResultMask, C1); + // 0xFFFFFFFF if the calculated value is to be used + vResultMask = _mm_cmpgt_ps(vResultMask, g_XMEpsilon); + // If C1 is close to epsilon, which fail type is it? INFINITY or NAN? + XMVECTOR vFailMask = _mm_setzero_ps(); + vFailMask = _mm_sub_ps(vFailMask, C2); + vFailMask = _mm_max_ps(vFailMask, C2); + vFailMask = _mm_cmple_ps(vFailMask, g_XMEpsilon); + XMVECTOR vFail = _mm_and_ps(vFailMask, g_XMInfinity); + vFailMask = _mm_andnot_ps(vFailMask, g_XMQNaN); + // vFail is NAN or INF + vFail = _mm_or_ps(vFail, vFailMask); + // Intersection point = Line1Point1 + V1 * (C2 / C1) + XMVECTOR vResult = _mm_div_ps(C2, C1); + vResult = XM_FMADD_PS(vResult, V1, Line1Point1); + // Use result, or failure value + vResult = _mm_and_ps(vResult, vResultMask); + vResultMask = _mm_andnot_ps(vResultMask, vFail); + vResult = _mm_or_ps(vResult, vResultMask); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2Transform +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + float32x4_t Result = vmlaq_lane_f32(M.r[3], M.r[1], VL, 1); // Y + return vmlaq_lane_f32(Result, M.r[0], VL, 0); // X +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y + vResult = XM_FMADD_PS(vResult, M.r[1], M.r[3]); + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X + vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMFLOAT4* XM_CALLCONV XMVector2TransformStream +( + XMFLOAT4* pOutputStream, + size_t OutputStride, + const XMFLOAT2* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT2)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT2)); + + assert(OutputStride >= sizeof(XMFLOAT4)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT4)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat2(reinterpret_cast(pInputVector)); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Y, row1, row3); + Result = XMVectorMultiplyAdd(X, row0, Result); + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015, "PREfast noise: Esp:1307" ) +#endif + + XMStoreFloat4(reinterpret_cast(pOutputVector), Result); + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT2)) && (OutputStride == sizeof(XMFLOAT4))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x2_t V = vld2q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + float32x2_t r3 = vget_low_f32(row3); + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Ax+M + XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Bx+N + + XM_PREFETCH(pInputVector); + + r3 = vget_high_f32(row3); + r = vget_high_f32(row0); + XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Cx+O + XMVECTOR vResult3 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Dx+P + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(row1); + vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O + vResult3 = vmlaq_lane_f32(vResult3, V.val[1], r, 1); // Dx+Hy+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + float32x4x4_t R; + R.val[0] = vResult0; + R.val[1] = vResult1; + R.val[2] = vResult2; + R.val[3] = vResult3; + + vst4q_f32(reinterpret_cast(pOutputVector), R); + pOutputVector += sizeof(XMFLOAT4) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + float32x2_t V = vld1_f32(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vResult = vmlaq_lane_f32(row3, row0, V, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, V, 1); // Y + + vst1q_f32(reinterpret_cast(pOutputVector), vResult); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_AVX2_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + __m256 row0 = _mm256_broadcast_ps(&M.r[0]); + __m256 row1 = _mm256_broadcast_ps(&M.r[1]); + __m256 row3 = _mm256_broadcast_ps(&M.r[3]); + + if (InputStride == sizeof(XMFLOAT2)) + { + if (OutputStride == sizeof(XMFLOAT4)) + { + if (!(reinterpret_cast(pOutputStream) & 0x1F)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); + __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); + __m256 vTempA = _mm256_mul_ps(X1, row0); + __m256 vTempA2 = _mm256_mul_ps(X2, row0); + vTempA = _mm256_add_ps(vTempA, vTempB); + vTempA2 = _mm256_add_ps(vTempA2, vTempB2); + + X1 = _mm256_insertf128_ps(vTempA, _mm256_castps256_ps128(vTempA2), 1); + XM256_STREAM_PS(reinterpret_cast(pOutputVector), X1); + pOutputVector += sizeof(XMFLOAT4) * 2; + + X2 = _mm256_insertf128_ps(vTempA2, _mm256_extractf128_ps(vTempA, 1), 0); + XM256_STREAM_PS(reinterpret_cast(pOutputVector), X2); + pOutputVector += sizeof(XMFLOAT4) * 2; + + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); + __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); + __m256 vTempA = _mm256_mul_ps(X1, row0); + __m256 vTempA2 = _mm256_mul_ps(X2, row0); + vTempA = _mm256_add_ps(vTempA, vTempB); + vTempA2 = _mm256_add_ps(vTempA2, vTempB2); + + X1 = _mm256_insertf128_ps(vTempA, _mm256_castps256_ps128(vTempA2), 1); + _mm256_storeu_ps(reinterpret_cast(pOutputVector), X1); + pOutputVector += sizeof(XMFLOAT4) * 2; + + X2 = _mm256_insertf128_ps(vTempA2, _mm256_extractf128_ps(vTempA, 1), 0); + _mm256_storeu_ps(reinterpret_cast(pOutputVector), X2); + pOutputVector += sizeof(XMFLOAT4) * 2; + + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); + __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); + __m256 vTempA = _mm256_mul_ps(X1, row0); + __m256 vTempA2 = _mm256_mul_ps(X2, row0); + vTempA = _mm256_add_ps(vTempA, vTempB); + vTempA2 = _mm256_add_ps(vTempA2, vTempB2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), _mm256_castps256_ps128(vTempA)); + pOutputVector += OutputStride; + + _mm_storeu_ps(reinterpret_cast(pOutputVector), _mm256_castps256_ps128(vTempA2)); + pOutputVector += OutputStride; + + _mm_storeu_ps(reinterpret_cast(pOutputVector), _mm256_extractf128_ps(vTempA, 1)); + pOutputVector += OutputStride; + + _mm_storeu_ps(reinterpret_cast(pOutputVector), _mm256_extractf128_ps(vTempA2, 1)); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + if (i < VectorCount) + { + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + for (; i < VectorCount; i++) + { + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t two = VectorCount >> 1; + if (two > 0) + { + if (InputStride == sizeof(XMFLOAT2)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF) && !(OutputStride & 0xF)) + { + // Packed input, aligned output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = XM_FMADD_PS(Y, row1, row3); + vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 2; + } + } + else + { + // Packed input, unaligned output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = XM_FMADD_PS(Y, row1, row3); + vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 2; + } + } + } + } + + if (!(reinterpret_cast(pInputVector) & 0xF) && !(InputStride & 0xF)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF) && !(OutputStride & 0xF)) + { + // Aligned input, aligned output + for (; i < VectorCount; i++) + { + XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + } + else + { + // Aligned input, unaligned output + for (; i < VectorCount; i++) + { + XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + } + } + else + { + // Unaligned input + for (; i < VectorCount; i++) + { + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2TransformCoord +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + + XMVECTOR W = XMVectorSplatW(Result); + return XMVectorDivide(Result, W); +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream +( + XMFLOAT2* pOutputStream, + size_t OutputStride, + const XMFLOAT2* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT2)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT2)); + + assert(OutputStride >= sizeof(XMFLOAT2)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT2)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat2(reinterpret_cast(pInputVector)); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Y, row1, row3); + Result = XMVectorMultiplyAdd(X, row0, Result); + + XMVECTOR W = XMVectorSplatW(Result); + + Result = XMVectorDivide(Result, W); + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015, "PREfast noise: Esp:1307" ) +#endif + + XMStoreFloat2(reinterpret_cast(pOutputVector), Result); + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT2)) && (OutputStride == sizeof(XMFLOAT2))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x2_t V = vld2q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + float32x2_t r3 = vget_low_f32(row3); + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Ax+M + XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Bx+N + + XM_PREFETCH(pInputVector); + + r3 = vget_high_f32(row3); + r = vget_high_f32(row0); + XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Dx+P + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(row1); + W = vmlaq_lane_f32(W, V.val[1], r, 1); // Dx+Hy+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + V.val[0] = vdivq_f32(vResult0, W); + V.val[1] = vdivq_f32(vResult1, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + V.val[0] = vmulq_f32(vResult0, Reciprocal); + V.val[1] = vmulq_f32(vResult1, Reciprocal); +#endif + + vst2q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT2) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + float32x2_t V = vld1_f32(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vResult = vmlaq_lane_f32(row3, row0, V, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, V, 1); // Y + + V = vget_high_f32(vResult); + float32x2_t W = vdup_lane_f32(V, 1); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + V = vget_low_f32(vResult); + V = vdiv_f32(V, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal for W + float32x2_t Reciprocal = vrecpe_f32(W); + float32x2_t S = vrecps_f32(Reciprocal, W); + Reciprocal = vmul_f32(S, Reciprocal); + S = vrecps_f32(Reciprocal, W); + Reciprocal = vmul_f32(S, Reciprocal); + + V = vget_low_f32(vResult); + V = vmul_f32(V, Reciprocal); +#endif + + vst1_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_AVX2_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + __m256 row0 = _mm256_broadcast_ps(&M.r[0]); + __m256 row1 = _mm256_broadcast_ps(&M.r[1]); + __m256 row3 = _mm256_broadcast_ps(&M.r[3]); + + if (InputStride == sizeof(XMFLOAT2)) + { + if (OutputStride == sizeof(XMFLOAT2)) + { + if (!(reinterpret_cast(pOutputStream) & 0x1F)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); + __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); + __m256 vTempA = _mm256_mul_ps(X1, row0); + __m256 vTempA2 = _mm256_mul_ps(X2, row0); + vTempA = _mm256_add_ps(vTempA, vTempB); + vTempA2 = _mm256_add_ps(vTempA2, vTempB2); + + __m256 W = _mm256_shuffle_ps(vTempA, vTempA, _MM_SHUFFLE(3, 3, 3, 3)); + vTempA = _mm256_div_ps(vTempA, W); + + W = _mm256_shuffle_ps(vTempA2, vTempA2, _MM_SHUFFLE(3, 3, 3, 3)); + vTempA2 = _mm256_div_ps(vTempA2, W); + + X1 = _mm256_shuffle_ps(vTempA, vTempA2, 0x44); + XM256_STREAM_PS(reinterpret_cast(pOutputVector), X1); + pOutputVector += sizeof(XMFLOAT2) * 4; + + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); + __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); + __m256 vTempA = _mm256_mul_ps(X1, row0); + __m256 vTempA2 = _mm256_mul_ps(X2, row0); + vTempA = _mm256_add_ps(vTempA, vTempB); + vTempA2 = _mm256_add_ps(vTempA2, vTempB2); + + __m256 W = _mm256_shuffle_ps(vTempA, vTempA, _MM_SHUFFLE(3, 3, 3, 3)); + vTempA = _mm256_div_ps(vTempA, W); + + W = _mm256_shuffle_ps(vTempA2, vTempA2, _MM_SHUFFLE(3, 3, 3, 3)); + vTempA2 = _mm256_div_ps(vTempA2, W); + + X1 = _mm256_shuffle_ps(vTempA, vTempA2, 0x44); + _mm256_storeu_ps(reinterpret_cast(pOutputVector), X1); + pOutputVector += sizeof(XMFLOAT2) * 4; + + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); + __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); + __m256 vTempA = _mm256_mul_ps(X1, row0); + __m256 vTempA2 = _mm256_mul_ps(X2, row0); + vTempA = _mm256_add_ps(vTempA, vTempB); + vTempA2 = _mm256_add_ps(vTempA2, vTempB2); + + __m256 W = _mm256_shuffle_ps(vTempA, vTempA, _MM_SHUFFLE(3, 3, 3, 3)); + vTempA = _mm256_div_ps(vTempA, W); + + W = _mm256_shuffle_ps(vTempA2, vTempA2, _MM_SHUFFLE(3, 3, 3, 3)); + vTempA2 = _mm256_div_ps(vTempA2, W); + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_castps256_ps128(vTempA))); + pOutputVector += OutputStride; + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_castps256_ps128(vTempA2))); + pOutputVector += OutputStride; + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_extractf128_ps(vTempA, 1))); + pOutputVector += OutputStride; + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_extractf128_ps(vTempA2, 1))); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + if (i < VectorCount) + { + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + for (; i < VectorCount; i++) + { + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t two = VectorCount >> 1; + if (two > 0) + { + if (InputStride == sizeof(XMFLOAT2)) + { + if (OutputStride == sizeof(XMFLOAT2)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + // Result 1 + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + XMVECTOR V1 = _mm_div_ps(vTemp, W); + + // Result 2 + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = XM_FMADD_PS(Y, row1, row3); + vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + XMVECTOR V2 = _mm_div_ps(vTemp, W); + + vTemp = _mm_movelh_ps(V1, V2); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += sizeof(XMFLOAT2) * 2; + + i += 2; + } + } + else + { + // Packed input, unaligned & packed output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + // Result 1 + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + XMVECTOR V1 = _mm_div_ps(vTemp, W); + + // Result 2 + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = XM_FMADD_PS(Y, row1, row3); + vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + XMVECTOR V2 = _mm_div_ps(vTemp, W); + + vTemp = _mm_movelh_ps(V1, V2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += sizeof(XMFLOAT2) * 2; + + i += 2; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + // Result 1 + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + + // Result 2 + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = XM_FMADD_PS(Y, row1, row3); + vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + + i += 2; + } + } + } + } + + if (!(reinterpret_cast(pInputVector) & 0xF) && !(InputStride & 0xF)) + { + // Aligned input + for (; i < VectorCount; i++) + { + XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + } + } + else + { + // Unaligned input + for (; i < VectorCount; i++) + { + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); + XMVECTOR vTemp2 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector2TransformNormal +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiply(Y, M.r[1]); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + float32x4_t Result = vmulq_lane_f32(M.r[1], VL, 1); // Y + return vmlaq_lane_f32(Result, M.r[0], VL, 0); // X +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y + vResult = _mm_mul_ps(vResult, M.r[1]); + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X + vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMFLOAT2* XM_CALLCONV XMVector2TransformNormalStream +( + XMFLOAT2* pOutputStream, + size_t OutputStride, + const XMFLOAT2* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT2)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT2)); + + assert(OutputStride >= sizeof(XMFLOAT2)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT2)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat2(reinterpret_cast(pInputVector)); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiply(Y, row1); + Result = XMVectorMultiplyAdd(X, row0, Result); + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015, "PREfast noise: Esp:1307" ) +#endif + + XMStoreFloat2(reinterpret_cast(pOutputVector), Result); + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT2)) && (OutputStride == sizeof(XMFLOAT2))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x2_t V = vld2q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmulq_lane_f32(V.val[0], r, 0); // Ax + XMVECTOR vResult1 = vmulq_lane_f32(V.val[0], r, 1); // Bx + + XM_PREFETCH(pInputVector); + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + V.val[0] = vResult0; + V.val[1] = vResult1; + + vst2q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT2) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + float32x2_t V = vld1_f32(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vResult = vmulq_lane_f32(row0, V, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, V, 1); // Y + + V = vget_low_f32(vResult); + vst1_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_AVX2_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + __m256 row0 = _mm256_broadcast_ps(&M.r[0]); + __m256 row1 = _mm256_broadcast_ps(&M.r[1]); + + if (InputStride == sizeof(XMFLOAT2)) + { + if (OutputStride == sizeof(XMFLOAT2)) + { + if (!(reinterpret_cast(pOutputStream) & 0x1F)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempA = _mm256_mul_ps(Y1, row1); + __m256 vTempB = _mm256_mul_ps(Y2, row1); + vTempA = _mm256_fmadd_ps(X1, row0, vTempA); + vTempB = _mm256_fmadd_ps(X2, row0, vTempB); + + X1 = _mm256_shuffle_ps(vTempA, vTempB, 0x44); + XM256_STREAM_PS(reinterpret_cast(pOutputVector), X1); + pOutputVector += sizeof(XMFLOAT2) * 4; + + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempA = _mm256_mul_ps(Y1, row1); + __m256 vTempB = _mm256_mul_ps(Y2, row1); + vTempA = _mm256_fmadd_ps(X1, row0, vTempA); + vTempB = _mm256_fmadd_ps(X2, row0, vTempB); + + X1 = _mm256_shuffle_ps(vTempA, vTempB, 0x44); + _mm256_storeu_ps(reinterpret_cast(pOutputVector), X1); + pOutputVector += sizeof(XMFLOAT2) * 4; + + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 4; + + __m256 Y2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + __m256 X2 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 Y1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 X1 = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + + __m256 vTempA = _mm256_mul_ps(Y1, row1); + __m256 vTempB = _mm256_mul_ps(Y2, row1); + vTempA = _mm256_fmadd_ps(X1, row0, vTempA); + vTempB = _mm256_fmadd_ps(X2, row0, vTempB); + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_castps256_ps128(vTempA))); + pOutputVector += OutputStride; + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_castps256_ps128(vTempB))); + pOutputVector += OutputStride; + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_extractf128_ps(vTempA, 1))); + pOutputVector += OutputStride; + + _mm_store_sd(reinterpret_cast(pOutputVector), + _mm_castps_pd(_mm256_extractf128_ps(vTempB, 1))); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + if (i < VectorCount) + { + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + + for (; i < VectorCount; i++) + { + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Y, row1); + vTemp = XM_FMADD_PS(X, row0, vTemp); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + + size_t i = 0; + size_t two = VectorCount >> 1; + if (two > 0) + { + if (InputStride == sizeof(XMFLOAT2)) + { + if (OutputStride == sizeof(XMFLOAT2)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + // Result 1 + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Y, row1); + XMVECTOR V1 = XM_FMADD_PS(X, row0, vTemp); + + // Result 2 + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = _mm_mul_ps(Y, row1); + XMVECTOR V2 = XM_FMADD_PS(X, row0, vTemp); + + vTemp = _mm_movelh_ps(V1, V2); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += sizeof(XMFLOAT2) * 2; + + i += 2; + } + } + else + { + // Packed input, unaligned & packed output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + // Result 1 + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Y, row1); + XMVECTOR V1 = XM_FMADD_PS(X, row0, vTemp); + + // Result 2 + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = _mm_mul_ps(Y, row1); + XMVECTOR V2 = XM_FMADD_PS(X, row0, vTemp); + + vTemp = _mm_movelh_ps(V1, V2); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += sizeof(XMFLOAT2) * 2; + + i += 2; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < two; ++j) + { + XMVECTOR V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT2) * 2; + + // Result 1 + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Y, row1); + vTemp = XM_FMADD_PS(X, row0, vTemp); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + + // Result 2 + Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + + vTemp = _mm_mul_ps(Y, row1); + vTemp = XM_FMADD_PS(X, row0, vTemp); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + + i += 2; + } + } + } + } + + if (!(reinterpret_cast(pInputVector) & 0xF) && !(InputStride & 0xF)) + { + // Aligned input + for (; i < VectorCount; i++) + { + XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Y, row1); + vTemp = XM_FMADD_PS(X, row0, vTemp); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + } + } + else + { + // Unaligned input + for (; i < VectorCount; i++) + { + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pInputVector))); + pInputVector += InputStride; + + XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Y, row1); + vTemp = XM_FMADD_PS(X, row0, vTemp); + + _mm_store_sd(reinterpret_cast(pOutputVector), _mm_castps_pd(vTemp)); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +/**************************************************************************** + * + * 3D Vector + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3Equal +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector3EqualR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t CR = 0; + if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && + (V1.vector4_f32[1] == V2.vector4_f32[1]) && + (V1.vector4_f32[2] == V2.vector4_f32[2])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && + (V1.vector4_f32[1] != V2.vector4_f32[1]) && + (V1.vector4_f32[2] != V2.vector4_f32[2])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; + + uint32_t CR = 0; + if (r == 0xFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + int iTest = _mm_movemask_ps(vTemp) & 7; + uint32_t CR = 0; + if (iTest == 7) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3EqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 7) == 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector3EqualIntR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t CR = 0; + if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && + (V1.vector4_u32[1] == V2.vector4_u32[1]) && + (V1.vector4_u32[2] == V2.vector4_u32[2])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && + (V1.vector4_u32[1] != V2.vector4_u32[1]) && + (V1.vector4_u32[2] != V2.vector4_u32[2])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; + + uint32_t CR = 0; + if (r == 0xFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + int iTemp = _mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 7; + uint32_t CR = 0; + if (iTemp == 7) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTemp) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3NearEqual +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR Epsilon +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float dx, dy, dz; + + dx = fabsf(V1.vector4_f32[0] - V2.vector4_f32[0]); + dy = fabsf(V1.vector4_f32[1] - V2.vector4_f32[1]); + dz = fabsf(V1.vector4_f32[2] - V2.vector4_f32[2]); + return (((dx <= Epsilon.vector4_f32[0]) && + (dy <= Epsilon.vector4_f32[1]) && + (dz <= Epsilon.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vDelta = vsubq_f32(V1, V2); +#ifdef _MSC_VER + uint32x4_t vResult = vacleq_f32(vDelta, Epsilon); +#else + uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon); +#endif + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Get the difference + XMVECTOR vDelta = _mm_sub_ps(V1, V2); + // Get the absolute value of the difference + XMVECTOR vTemp = _mm_setzero_ps(); + vTemp = _mm_sub_ps(vTemp, vDelta); + vTemp = _mm_max_ps(vTemp, vDelta); + vTemp = _mm_cmple_ps(vTemp, Epsilon); + // w is don't care + return (((_mm_movemask_ps(vTemp) & 7) == 0x7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3NotEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) != 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 7) != 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3NotEqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) != 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 7) != 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3Greater +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgtq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector3GreaterR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t CR = 0; + if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && + (V1.vector4_f32[1] > V2.vector4_f32[1]) && + (V1.vector4_f32[2] > V2.vector4_f32[2])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && + (V1.vector4_f32[1] <= V2.vector4_f32[1]) && + (V1.vector4_f32[2] <= V2.vector4_f32[2])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgtq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; + + uint32_t CR = 0; + if (r == 0xFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + uint32_t CR = 0; + int iTest = _mm_movemask_ps(vTemp) & 7; + if (iTest == 7) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3GreaterOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgeq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector3GreaterOrEqualR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + uint32_t CR = 0; + if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && + (V1.vector4_f32[1] >= V2.vector4_f32[1]) && + (V1.vector4_f32[2] >= V2.vector4_f32[2])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && + (V1.vector4_f32[1] < V2.vector4_f32[1]) && + (V1.vector4_f32[2] < V2.vector4_f32[2])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgeq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; + + uint32_t CR = 0; + if (r == 0xFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + uint32_t CR = 0; + int iTest = _mm_movemask_ps(vTemp) & 7; + if (iTest == 7) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3Less +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcltq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmplt_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3LessOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcleq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmple_ps(V1, V2); + return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3InBounds +( + FXMVECTOR V, + FXMVECTOR Bounds +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && + (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && + (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test if less than or equal + uint32x4_t ivTemp1 = vcleq_f32(V, Bounds); + // Negate the bounds + float32x4_t vTemp2 = vnegq_f32(Bounds); + // Test if greater or equal (Reversed) + uint32x4_t ivTemp2 = vcleq_f32(vTemp2, V); + // Blend answers + ivTemp1 = vandq_u32(ivTemp1, ivTemp2); + // in bounds? + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(ivTemp1)), vget_high_u8(vreinterpretq_u8_u32(ivTemp1))); + uint16x4x2_t vTemp3 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp3.val[1]), 1) & 0xFFFFFFU) == 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Test if less than or equal + XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); + // Negate the bounds + XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); + // Test if greater or equal (Reversed) + vTemp2 = _mm_cmple_ps(vTemp2, V); + // Blend answers + vTemp1 = _mm_and_ps(vTemp1, vTemp2); + // x,y and z in bounds? (w is don't care) + return (((_mm_movemask_ps(vTemp1) & 0x7) == 0x7) != 0); +#else + return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds)); +#endif +} + +//------------------------------------------------------------------------------ + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(push) +#pragma float_control(precise, on) +#endif + +inline bool XM_CALLCONV XMVector3IsNaN(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + return (XMISNAN(V.vector4_f32[0]) || + XMISNAN(V.vector4_f32[1]) || + XMISNAN(V.vector4_f32[2])); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test against itself. NaN is always not equal + uint32x4_t vTempNan = vceqq_f32(V, V); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempNan)), vget_high_u8(vreinterpretq_u8_u32(vTempNan))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + // If x or y or z are NaN, the mask is zero + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) != 0xFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Test against itself. NaN is always not equal + XMVECTOR vTempNan = _mm_cmpneq_ps(V, V); + // If x or y or z are NaN, the mask is non-zero + return ((_mm_movemask_ps(vTempNan) & 7) != 0); +#endif +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(pop) +#endif + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector3IsInfinite(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (XMISINF(V.vector4_f32[0]) || + XMISINF(V.vector4_f32[1]) || + XMISINF(V.vector4_f32[2])); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Mask off the sign bit + uint32x4_t vTempInf = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + // Compare to infinity + vTempInf = vceqq_f32(vreinterpretq_f32_u32(vTempInf), g_XMInfinity); + // If any are infinity, the signs are true. + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempInf)), vget_high_u8(vreinterpretq_u8_u32(vTempInf))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU) != 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Mask off the sign bit + __m128 vTemp = _mm_and_ps(V, g_XMAbsMask); + // Compare to infinity + vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); + // If x,y or z are infinity, the signs are true. + return ((_mm_movemask_ps(vTemp) & 7) != 0); +#endif +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Dot +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2]; + XMVECTORF32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = fValue; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vTemp = vmulq_f32(V1, V2); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + return vcombine_f32(v1, v1); +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_dp_ps(V1, V2, 0x7f); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vTemp = _mm_mul_ps(V1, V2); + vTemp = _mm_and_ps(vTemp, g_XMMask3); + vTemp = _mm_hadd_ps(vTemp, vTemp); + return _mm_hadd_ps(vTemp, vTemp); +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product + XMVECTOR vDot = _mm_mul_ps(V1, V2); + // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2] + XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); + // Result.vector4_f32[0] = x+y + vDot = _mm_add_ss(vDot, vTemp); + // x=Dot.vector4_f32[2] + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // Result.vector4_f32[0] = (x+y)+z + vDot = _mm_add_ss(vDot, vTemp); + // Splat x + return XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Cross +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + // [ V1.y*V2.z - V1.z*V2.y, V1.z*V2.x - V1.x*V2.z, V1.x*V2.y - V1.y*V2.x ] + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]), + (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]), + (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]), + 0.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t v1xy = vget_low_f32(V1); + float32x2_t v2xy = vget_low_f32(V2); + + float32x2_t v1yx = vrev64_f32(v1xy); + float32x2_t v2yx = vrev64_f32(v2xy); + + float32x2_t v1zz = vdup_lane_f32(vget_high_f32(V1), 0); + float32x2_t v2zz = vdup_lane_f32(vget_high_f32(V2), 0); + + XMVECTOR vResult = vmulq_f32(vcombine_f32(v1yx, v1xy), vcombine_f32(v2zz, v2yx)); + vResult = vmlsq_f32(vResult, vcombine_f32(v1zz, v1yx), vcombine_f32(v2yx, v2xy)); + vResult = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(vResult), g_XMFlipY)); + return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vResult), g_XMMask3)); +#elif defined(_XM_SSE_INTRINSICS_) + // y1,z1,x1,w1 + XMVECTOR vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(3, 0, 2, 1)); + // z2,x2,y2,w2 + XMVECTOR vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(3, 1, 0, 2)); + // Perform the left operation + XMVECTOR vResult = _mm_mul_ps(vTemp1, vTemp2); + // z1,x1,y1,w1 + vTemp1 = XM_PERMUTE_PS(vTemp1, _MM_SHUFFLE(3, 0, 2, 1)); + // y2,z2,x2,w2 + vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(3, 1, 0, 2)); + // Perform the right operation + vResult = XM_FNMADD_PS(vTemp1, vTemp2, vResult); + // Set w to zero + return _mm_and_ps(vResult, g_XMMask3); +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3LengthSq(FXMVECTOR V) noexcept +{ + return XMVector3Dot(V, V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector3LengthSq(V); + Result = XMVectorReciprocalSqrtEst(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot3 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + // Reciprocal sqrt (estimate) + v2 = vrsqrte_f32(v1); + return vcombine_f32(v2, v2); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); + return _mm_rsqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_rsqrt_ps(vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y and z + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and y + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); + // x+z, y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // y,y,y,y + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // x+z+y,??,??,?? + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // Splat the length squared + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the reciprocal + vLengthSq = _mm_rsqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector3LengthSq(V); + Result = XMVectorReciprocalSqrt(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot3 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + // Reciprocal sqrt + float32x2_t S0 = vrsqrte_f32(v1); + float32x2_t P0 = vmul_f32(v1, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(v1, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t Result = vmul_f32(S1, R1); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); + XMVECTOR vLengthSq = _mm_sqrt_ps(vTemp); + return _mm_div_ps(g_XMOne, vLengthSq); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vDot = _mm_mul_ps(V, V); + vDot = _mm_and_ps(vDot, g_XMMask3); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_sqrt_ps(vDot); + vDot = _mm_div_ps(g_XMOne, vDot); + return vDot; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product + XMVECTOR vDot = _mm_mul_ps(V, V); + // x=Dot.y, y=Dot.z + XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); + // Result.x = x+y + vDot = _mm_add_ss(vDot, vTemp); + // x=Dot.z + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // Result.x = (x+y)+z + vDot = _mm_add_ss(vDot, vTemp); + // Splat x + vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the reciprocal + vDot = _mm_sqrt_ps(vDot); + // Get the reciprocal + vDot = _mm_div_ps(g_XMOne, vDot); + return vDot; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3LengthEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector3LengthSq(V); + Result = XMVectorSqrtEst(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot3 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + const float32x2_t zero = vdup_n_f32(0); + uint32x2_t VEqualsZero = vceq_f32(v1, zero); + // Sqrt (estimate) + float32x2_t Result = vrsqrte_f32(v1); + Result = vmul_f32(v1, Result); + Result = vbsl_f32(VEqualsZero, zero, Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); + return _mm_sqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y and z + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and y + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); + // x+z, y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // y,y,y,y + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // x+z+y,??,??,?? + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // Splat the length squared + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the length + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Length(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector3LengthSq(V); + Result = XMVectorSqrt(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot3 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + const float32x2_t zero = vdup_n_f32(0); + uint32x2_t VEqualsZero = vceq_f32(v1, zero); + // Sqrt + float32x2_t S0 = vrsqrte_f32(v1); + float32x2_t P0 = vmul_f32(v1, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(v1, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t Result = vmul_f32(S1, R1); + Result = vmul_f32(v1, Result); + Result = vbsl_f32(VEqualsZero, zero, Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); + return _mm_sqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y and z + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and y + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); + // x+z, y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // y,y,y,y + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // x+z+y,??,??,?? + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // Splat the length squared + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the length + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ +// XMVector3NormalizeEst uses a reciprocal estimate and +// returns QNaN on zero and infinite vectors. + +inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector3ReciprocalLength(V); + Result = XMVectorMultiply(V, Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot3 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + // Reciprocal sqrt (estimate) + v2 = vrsqrte_f32(v1); + // Normalize + return vmulq_f32(V, vcombine_f32(v2, v2)); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); + XMVECTOR vResult = _mm_rsqrt_ps(vTemp); + return _mm_mul_ps(vResult, V); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vDot = _mm_mul_ps(V, V); + vDot = _mm_and_ps(vDot, g_XMMask3); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_rsqrt_ps(vDot); + vDot = _mm_mul_ps(vDot, V); + return vDot; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product + XMVECTOR vDot = _mm_mul_ps(V, V); + // x=Dot.y, y=Dot.z + XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); + // Result.x = x+y + vDot = _mm_add_ss(vDot, vTemp); + // x=Dot.z + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // Result.x = (x+y)+z + vDot = _mm_add_ss(vDot, vTemp); + // Splat x + vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the reciprocal + vDot = _mm_rsqrt_ps(vDot); + // Perform the normalization + vDot = _mm_mul_ps(vDot, V); + return vDot; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Normalize(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float fLength; + XMVECTOR vResult; + + vResult = XMVector3Length(V); + fLength = vResult.vector4_f32[0]; + + // Prevent divide by zero + if (fLength > 0) + { + fLength = 1.0f / fLength; + } + + vResult.vector4_f32[0] = V.vector4_f32[0] * fLength; + vResult.vector4_f32[1] = V.vector4_f32[1] * fLength; + vResult.vector4_f32[2] = V.vector4_f32[2] * fLength; + vResult.vector4_f32[3] = V.vector4_f32[3] * fLength; + return vResult; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot3 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vpadd_f32(v1, v1); + v2 = vdup_lane_f32(v2, 0); + v1 = vadd_f32(v1, v2); + uint32x2_t VEqualsZero = vceq_f32(v1, vdup_n_f32(0)); + uint32x2_t VEqualsInf = vceq_f32(v1, vget_low_f32(g_XMInfinity)); + // Reciprocal sqrt (2 iterations of Newton-Raphson) + float32x2_t S0 = vrsqrte_f32(v1); + float32x2_t P0 = vmul_f32(v1, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(v1, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + v2 = vmul_f32(S1, R1); + // Normalize + XMVECTOR vResult = vmulq_f32(V, vcombine_f32(v2, v2)); + vResult = vbslq_f32(vcombine_u32(VEqualsZero, VEqualsZero), vdupq_n_f32(0), vResult); + return vbslq_f32(vcombine_u32(VEqualsInf, VEqualsInf), g_XMQNaN, vResult); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vLengthSq = _mm_dp_ps(V, V, 0x7f); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(_XM_SSE3_INTRINSICS_) + // Perform the dot product on x,y and z only + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y and z only + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3ClampLength +( + FXMVECTOR V, + float LengthMin, + float LengthMax +) noexcept +{ + XMVECTOR ClampMax = XMVectorReplicate(LengthMax); + XMVECTOR ClampMin = XMVectorReplicate(LengthMin); + + return XMVector3ClampLengthV(V, ClampMin, ClampMax); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3ClampLengthV +( + FXMVECTOR V, + FXMVECTOR LengthMin, + FXMVECTOR LengthMax +) noexcept +{ + assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin))); + assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax))); + assert(XMVector3GreaterOrEqual(LengthMin, XMVectorZero())); + assert(XMVector3GreaterOrEqual(LengthMax, XMVectorZero())); + assert(XMVector3GreaterOrEqual(LengthMax, LengthMin)); + + XMVECTOR LengthSq = XMVector3LengthSq(V); + + const XMVECTOR Zero = XMVectorZero(); + + XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq); + + XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); + XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero); + + XMVECTOR Normal = XMVectorMultiply(V, RcpLength); + + XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength); + + XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength); + Length = XMVectorSelect(LengthSq, Length, Select); + Normal = XMVectorSelect(LengthSq, Normal, Select); + + XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax); + XMVECTOR ControlMin = XMVectorLess(Length, LengthMin); + + XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); + ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); + + XMVECTOR Result = XMVectorMultiply(Normal, ClampLength); + + // Preserve the original vector (with no precision loss) if the length falls within the given range + XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin); + Result = XMVectorSelect(Result, V, Control); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Reflect +( + FXMVECTOR Incident, + FXMVECTOR Normal +) noexcept +{ + // Result = Incident - (2 * dot(Incident, Normal)) * Normal + + XMVECTOR Result = XMVector3Dot(Incident, Normal); + Result = XMVectorAdd(Result, Result); + Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Refract +( + FXMVECTOR Incident, + FXMVECTOR Normal, + float RefractionIndex +) noexcept +{ + XMVECTOR Index = XMVectorReplicate(RefractionIndex); + return XMVector3RefractV(Incident, Normal, Index); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3RefractV +( + FXMVECTOR Incident, + FXMVECTOR Normal, + FXMVECTOR RefractionIndex +) noexcept +{ + // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + + // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) + +#if defined(_XM_NO_INTRINSICS_) + + const XMVECTOR Zero = XMVectorZero(); + + XMVECTOR IDotN = XMVector3Dot(Incident, Normal); + + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + XMVECTOR R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); + R = XMVectorMultiply(R, RefractionIndex); + R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); + + if (XMVector4LessOrEqual(R, Zero)) + { + // Total internal reflection + return Zero; + } + else + { + // R = RefractionIndex * IDotN + sqrt(R) + R = XMVectorSqrt(R); + R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); + + // Result = RefractionIndex * Incident - Normal * R + XMVECTOR Result = XMVectorMultiply(RefractionIndex, Incident); + Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); + + return Result; + } + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR IDotN = XMVector3Dot(Incident, Normal); + + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + float32x4_t R = vmlsq_f32(g_XMOne, IDotN, IDotN); + R = vmulq_f32(R, RefractionIndex); + R = vmlsq_f32(g_XMOne, R, RefractionIndex); + + uint32x4_t isrzero = vcleq_f32(R, g_XMZero); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(isrzero)), vget_high_u8(vreinterpretq_u8_u32(isrzero))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + + float32x4_t vResult; + if (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU) + { + // Total internal reflection + vResult = g_XMZero; + } + else + { + // Sqrt(R) + float32x4_t S0 = vrsqrteq_f32(R); + float32x4_t P0 = vmulq_f32(R, S0); + float32x4_t R0 = vrsqrtsq_f32(P0, S0); + float32x4_t S1 = vmulq_f32(S0, R0); + float32x4_t P1 = vmulq_f32(R, S1); + float32x4_t R1 = vrsqrtsq_f32(P1, S1); + float32x4_t S2 = vmulq_f32(S1, R1); + R = vmulq_f32(R, S2); + // R = RefractionIndex * IDotN + sqrt(R) + R = vmlaq_f32(R, RefractionIndex, IDotN); + // Result = RefractionIndex * Incident - Normal * R + vResult = vmulq_f32(RefractionIndex, Incident); + vResult = vmlsq_f32(vResult, R, Normal); + } + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + + // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) + XMVECTOR IDotN = XMVector3Dot(Incident, Normal); + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + XMVECTOR R = XM_FNMADD_PS(IDotN, IDotN, g_XMOne); + XMVECTOR R2 = _mm_mul_ps(RefractionIndex, RefractionIndex); + R = XM_FNMADD_PS(R, R2, g_XMOne); + + XMVECTOR vResult = _mm_cmple_ps(R, g_XMZero); + if (_mm_movemask_ps(vResult) == 0x0f) + { + // Total internal reflection + vResult = g_XMZero; + } + else + { + // R = RefractionIndex * IDotN + sqrt(R) + R = _mm_sqrt_ps(R); + R = XM_FMADD_PS(RefractionIndex, IDotN, R); + // Result = RefractionIndex * Incident - Normal * R + vResult = _mm_mul_ps(RefractionIndex, Incident); + vResult = XM_FNMADD_PS(R, Normal, vResult); + } + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Orthogonal(FXMVECTOR V) noexcept +{ + XMVECTOR Zero = XMVectorZero(); + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR YZYY = XMVectorSwizzle(V); + + XMVECTOR NegativeV = XMVectorSubtract(Zero, V); + + XMVECTOR ZIsNegative = XMVectorLess(Z, Zero); + XMVECTOR YZYYIsNegative = XMVectorLess(YZYY, Zero); + + XMVECTOR S = XMVectorAdd(YZYY, Z); + XMVECTOR D = XMVectorSubtract(YZYY, Z); + + XMVECTOR Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative); + + XMVECTOR R0 = XMVectorPermute(NegativeV, S); + XMVECTOR R1 = XMVectorPermute(V, D); + + return XMVectorSelect(R1, R0, Select); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3AngleBetweenNormalsEst +( + FXMVECTOR N1, + FXMVECTOR N2 +) noexcept +{ + XMVECTOR Result = XMVector3Dot(N1, N2); + Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); + Result = XMVectorACosEst(Result); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3AngleBetweenNormals +( + FXMVECTOR N1, + FXMVECTOR N2 +) noexcept +{ + XMVECTOR Result = XMVector3Dot(N1, N2); + Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); + Result = XMVectorACos(Result); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3AngleBetweenVectors +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + XMVECTOR L1 = XMVector3ReciprocalLength(V1); + XMVECTOR L2 = XMVector3ReciprocalLength(V2); + + XMVECTOR Dot = XMVector3Dot(V1, V2); + + L1 = XMVectorMultiply(L1, L2); + + XMVECTOR CosAngle = XMVectorMultiply(Dot, L1); + CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v); + + return XMVectorACos(CosAngle); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3LinePointDistance +( + FXMVECTOR LinePoint1, + FXMVECTOR LinePoint2, + FXMVECTOR Point +) noexcept +{ + // Given a vector PointVector from LinePoint1 to Point and a vector + // LineVector from LinePoint1 to LinePoint2, the scaled distance + // PointProjectionScale from LinePoint1 to the perpendicular projection + // of PointVector onto the line is defined as: + // + // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector) + + XMVECTOR PointVector = XMVectorSubtract(Point, LinePoint1); + XMVECTOR LineVector = XMVectorSubtract(LinePoint2, LinePoint1); + + XMVECTOR LengthSq = XMVector3LengthSq(LineVector); + + XMVECTOR PointProjectionScale = XMVector3Dot(PointVector, LineVector); + PointProjectionScale = XMVectorDivide(PointProjectionScale, LengthSq); + + XMVECTOR DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale); + DistanceVector = XMVectorSubtract(PointVector, DistanceVector); + + return XMVector3Length(DistanceVector); +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XM_CALLCONV XMVector3ComponentsFromNormal +( + XMVECTOR* pParallel, + XMVECTOR* pPerpendicular, + FXMVECTOR V, + FXMVECTOR Normal +) noexcept +{ + assert(pParallel != nullptr); + assert(pPerpendicular != nullptr); + + XMVECTOR Scale = XMVector3Dot(V, Normal); + + XMVECTOR Parallel = XMVectorMultiply(Normal, Scale); + + *pParallel = Parallel; + *pPerpendicular = XMVectorSubtract(V, Parallel); +} + +//------------------------------------------------------------------------------ +// Transform a vector using a rotation expressed as a unit quaternion + +inline XMVECTOR XM_CALLCONV XMVector3Rotate +( + FXMVECTOR V, + FXMVECTOR RotationQuaternion +) noexcept +{ + XMVECTOR A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); + XMVECTOR Q = XMQuaternionConjugate(RotationQuaternion); + XMVECTOR Result = XMQuaternionMultiply(Q, A); + return XMQuaternionMultiply(Result, RotationQuaternion); +} + +//------------------------------------------------------------------------------ +// Transform a vector using the inverse of a rotation expressed as a unit quaternion + +inline XMVECTOR XM_CALLCONV XMVector3InverseRotate +( + FXMVECTOR V, + FXMVECTOR RotationQuaternion +) noexcept +{ + XMVECTOR A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); + XMVECTOR Result = XMQuaternionMultiply(RotationQuaternion, A); + XMVECTOR Q = XMQuaternionConjugate(RotationQuaternion); + return XMQuaternionMultiply(Result, Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Transform +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); + Result = XMVectorMultiplyAdd(Y, M.r[1], Result); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + XMVECTOR vResult = vmlaq_lane_f32(M.r[3], M.r[0], VL, 0); // X + vResult = vmlaq_lane_f32(vResult, M.r[1], VL, 1); // Y + return vmlaq_lane_f32(vResult, M.r[2], vget_high_f32(V), 0); // Z +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z + vResult = XM_FMADD_PS(vResult, M.r[2], M.r[3]); + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y + vResult = XM_FMADD_PS(vTemp, M.r[1], vResult); + vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X + vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" ) +#endif + +_Use_decl_annotations_ +inline XMFLOAT4* XM_CALLCONV XMVector3TransformStream +( + XMFLOAT4* pOutputStream, + size_t OutputStride, + const XMFLOAT3* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); + + assert(OutputStride >= sizeof(XMFLOAT4)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT4)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Z, row2, row3); + Result = XMVectorMultiplyAdd(Y, row1, Result); + Result = XMVectorMultiplyAdd(X, row0, Result); + + XMStoreFloat4(reinterpret_cast(pOutputVector), Result); + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT3)) && (OutputStride == sizeof(XMFLOAT4))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x3_t V = vld3q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT3) * 4; + + float32x2_t r3 = vget_low_f32(row3); + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Ax+M + XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Bx+N + + XM_PREFETCH(pInputVector); + + r3 = vget_high_f32(row3); + r = vget_high_f32(row0); + XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Cx+O + XMVECTOR vResult3 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Dx+P + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(row1); + vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O + vResult3 = vmlaq_lane_f32(vResult3, V.val[1], r, 1); // Dx+Hy+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + r = vget_low_f32(row2); + vResult0 = vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); + + r = vget_high_f32(row2); + vResult2 = vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz+O + vResult3 = vmlaq_lane_f32(vResult3, V.val[2], r, 1); // Dx+Hy+Lz+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); + + float32x4x4_t R; + R.val[0] = vResult0; + R.val[1] = vResult1; + R.val[2] = vResult2; + R.val[3] = vResult3; + + vst4q_f32(reinterpret_cast(pOutputVector), R); + pOutputVector += sizeof(XMFLOAT4) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); + float32x2_t zero = vdup_n_f32(0); + float32x2_t VH = vld1_lane_f32(reinterpret_cast(pInputVector) + 2, zero, 0); + pInputVector += InputStride; + + XMVECTOR vResult = vmlaq_lane_f32(row3, row0, VL, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y + vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z + + vst1q_f32(reinterpret_cast(pOutputVector), vResult); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(XMFLOAT3)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF) && !(OutputStride & 0xF)) + { + // Packed input, aligned output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 4; + } + } + else + { + // Packed input, unaligned output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + if (!(reinterpret_cast(pOutputStream) & 0xF) && !(OutputStride & 0xF)) + { + // Aligned output + for (; i < VectorCount; ++i) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + } + else + { + // Unaligned output + for (; i < VectorCount; ++i) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3TransformCoord +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); + Result = XMVectorMultiplyAdd(Y, M.r[1], Result); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + + XMVECTOR W = XMVectorSplatW(Result); + return XMVectorDivide(Result, W); +} + +//------------------------------------------------------------------------------ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" ) +#endif + +_Use_decl_annotations_ +inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream +( + XMFLOAT3* pOutputStream, + size_t OutputStride, + const XMFLOAT3* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); + + assert(OutputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiplyAdd(Z, row2, row3); + Result = XMVectorMultiplyAdd(Y, row1, Result); + Result = XMVectorMultiplyAdd(X, row0, Result); + + XMVECTOR W = XMVectorSplatW(Result); + + Result = XMVectorDivide(Result, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), Result); + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT3)) && (OutputStride == sizeof(XMFLOAT3))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x3_t V = vld3q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT3) * 4; + + float32x2_t r3 = vget_low_f32(row3); + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Ax+M + XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Bx+N + + XM_PREFETCH(pInputVector); + + r3 = vget_high_f32(row3); + r = vget_high_f32(row0); + XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Cx+O + XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Dx+P + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(row1); + vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O + W = vmlaq_lane_f32(W, V.val[1], r, 1); // Dx+Hy+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + r = vget_low_f32(row2); + vResult0 = vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); + + r = vget_high_f32(row2); + vResult2 = vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz+O + W = vmlaq_lane_f32(W, V.val[2], r, 1); // Dx+Hy+Lz+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + V.val[0] = vdivq_f32(vResult0, W); + V.val[1] = vdivq_f32(vResult1, W); + V.val[2] = vdivq_f32(vResult2, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + V.val[0] = vmulq_f32(vResult0, Reciprocal); + V.val[1] = vmulq_f32(vResult1, Reciprocal); + V.val[2] = vmulq_f32(vResult2, Reciprocal); +#endif + + vst3q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT3) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); + float32x2_t zero = vdup_n_f32(0); + float32x2_t VH = vld1_lane_f32(reinterpret_cast(pInputVector) + 2, zero, 0); + pInputVector += InputStride; + + XMVECTOR vResult = vmlaq_lane_f32(row3, row0, VL, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y + vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z + + VH = vget_high_f32(vResult); + XMVECTOR W = vdupq_lane_f32(VH, 1); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + vResult = vdivq_f32(vResult, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal for W + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + vResult = vmulq_f32(vResult, Reciprocal); +#endif + + VL = vget_low_f32(vResult); + vst1_f32(reinterpret_cast(pOutputVector), VL); + vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, 2); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(XMFLOAT3)) + { + if (OutputStride == sizeof(XMFLOAT3)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V1 = _mm_div_ps(vTemp, W); + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V2 = _mm_div_ps(vTemp, W); + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V3 = _mm_div_ps(vTemp, W); + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V4 = _mm_div_ps(vTemp, W); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector), V1); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 16), vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + else + { + // Packed input, unaligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V1 = _mm_div_ps(vTemp, W); + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V2 = _mm_div_ps(vTemp, W); + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V3 = _mm_div_ps(vTemp, W); + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + V4 = _mm_div_ps(vTemp, W); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector), V1); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 16), vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, row2, row3); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + for (; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + + vTemp = _mm_div_ps(vTemp, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3TransformNormal +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiply(Z, M.r[2]); + Result = XMVectorMultiplyAdd(Y, M.r[1], Result); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + XMVECTOR vResult = vmulq_lane_f32(M.r[0], VL, 0); // X + vResult = vmlaq_lane_f32(vResult, M.r[1], VL, 1); // Y + return vmlaq_lane_f32(vResult, M.r[2], vget_high_f32(V), 0); // Z +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z + vResult = _mm_mul_ps(vResult, M.r[2]); + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y + vResult = XM_FMADD_PS(vTemp, M.r[1], vResult); + vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X + vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" ) +#endif + +_Use_decl_annotations_ +inline XMFLOAT3* XM_CALLCONV XMVector3TransformNormalStream +( + XMFLOAT3* pOutputStream, + size_t OutputStride, + const XMFLOAT3* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); + + assert(OutputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiply(Z, row2); + Result = XMVectorMultiplyAdd(Y, row1, Result); + Result = XMVectorMultiplyAdd(X, row0, Result); + + XMStoreFloat3(reinterpret_cast(pOutputVector), Result); + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT3)) && (OutputStride == sizeof(XMFLOAT3))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x3_t V = vld3q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT3) * 4; + + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmulq_lane_f32(V.val[0], r, 0); // Ax + XMVECTOR vResult1 = vmulq_lane_f32(V.val[0], r, 1); // Bx + + XM_PREFETCH(pInputVector); + + r = vget_high_f32(row0); + XMVECTOR vResult2 = vmulq_lane_f32(V.val[0], r, 0); // Cx + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(row1); + vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + r = vget_low_f32(row2); + vResult0 = vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz + vResult1 = vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); + + r = vget_high_f32(row2); + vResult2 = vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); + + V.val[0] = vResult0; + V.val[1] = vResult1; + V.val[2] = vResult2; + + vst3q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT3) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); + float32x2_t zero = vdup_n_f32(0); + float32x2_t VH = vld1_lane_f32(reinterpret_cast(pInputVector) + 2, zero, 0); + pInputVector += InputStride; + + XMVECTOR vResult = vmulq_lane_f32(row0, VL, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y + vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z + + VL = vget_low_f32(vResult); + vst1_f32(reinterpret_cast(pOutputVector), VL); + vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, 2); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(XMFLOAT3)) + { + if (OutputStride == sizeof(XMFLOAT3)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Z, row2); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V1 = _mm_add_ps(vTemp, vTemp3); + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V2 = _mm_add_ps(vTemp, vTemp3); + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V3 = _mm_add_ps(vTemp, vTemp3); + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V4 = _mm_add_ps(vTemp, vTemp3); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector), V1); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 16), vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + else + { + // Packed input, unaligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Z, row2); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V1 = _mm_add_ps(vTemp, vTemp3); + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V2 = _mm_add_ps(vTemp, vTemp3); + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V3 = _mm_add_ps(vTemp, vTemp3); + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + V4 = _mm_add_ps(vTemp, vTemp3); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector), V1); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 16), vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Z, row2); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = _mm_mul_ps(Z, row2); + vTemp2 = _mm_mul_ps(Y, row1); + vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + for (; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = _mm_mul_ps(Z, row2); + XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); + XMVECTOR vTemp3 = _mm_mul_ps(X, row0); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Project +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + FXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) noexcept +{ + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + + XMVECTOR Result = XMVector3TransformCoord(V, Transform); + + Result = XMVectorMultiplyAdd(Result, Scale, Offset); + + return Result; +} + +//------------------------------------------------------------------------------ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" ) +#endif + +_Use_decl_annotations_ +inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream +( + XMFLOAT3* pOutputStream, + size_t OutputStride, + const XMFLOAT3* pInputStream, + size_t InputStride, + size_t VectorCount, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + FXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); + + assert(OutputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); + +#if defined(_XM_NO_INTRINSICS_) + + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 1.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + + XMVECTOR Result = XMVector3TransformCoord(V, Transform); + Result = XMVectorMultiplyAdd(Result, Scale, Offset); + + XMStoreFloat3(reinterpret_cast(pOutputVector), Result); + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT3)) && (OutputStride == sizeof(XMFLOAT3))) + { + XMVECTOR ScaleX = vdupq_n_f32(HalfViewportWidth); + XMVECTOR ScaleY = vdupq_n_f32(-HalfViewportHeight); + XMVECTOR ScaleZ = vdupq_n_f32(ViewportMaxZ - ViewportMinZ); + + XMVECTOR OffsetX = vdupq_n_f32(ViewportX + HalfViewportWidth); + XMVECTOR OffsetY = vdupq_n_f32(ViewportY + HalfViewportHeight); + XMVECTOR OffsetZ = vdupq_n_f32(ViewportMinZ); + + for (size_t j = 0; j < four; ++j) + { + float32x4x3_t V = vld3q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT3) * 4; + + float32x2_t r3 = vget_low_f32(Transform.r[3]); + float32x2_t r = vget_low_f32(Transform.r[0]); + XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Ax+M + XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Bx+N + + XM_PREFETCH(pInputVector); + + r3 = vget_high_f32(Transform.r[3]); + r = vget_high_f32(Transform.r[0]); + XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), V.val[0], r, 0); // Cx+O + XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, 1); // Dx+P + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(Transform.r[1]); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(Transform.r[1]); + vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O + W = vmlaq_lane_f32(W, V.val[1], r, 1); // Dx+Hy+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + r = vget_low_f32(Transform.r[2]); + vResult0 = vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz+M + vResult1 = vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); + + r = vget_high_f32(Transform.r[2]); + vResult2 = vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz+O + W = vmlaq_lane_f32(W, V.val[2], r, 1); // Dx+Hy+Lz+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + vResult0 = vdivq_f32(vResult0, W); + vResult1 = vdivq_f32(vResult1, W); + vResult2 = vdivq_f32(vResult2, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + vResult0 = vmulq_f32(vResult0, Reciprocal); + vResult1 = vmulq_f32(vResult1, Reciprocal); + vResult2 = vmulq_f32(vResult2, Reciprocal); +#endif + + V.val[0] = vmlaq_f32(OffsetX, vResult0, ScaleX); + V.val[1] = vmlaq_f32(OffsetY, vResult1, ScaleY); + V.val[2] = vmlaq_f32(OffsetZ, vResult2, ScaleZ); + + vst3q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT3) * 4; + + i += 4; + } + } + } + + if (i < VectorCount) + { + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 1.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + for (; i < VectorCount; i++) + { + float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); + float32x2_t zero = vdup_n_f32(0); + float32x2_t VH = vld1_lane_f32(reinterpret_cast(pInputVector) + 2, zero, 0); + pInputVector += InputStride; + + XMVECTOR vResult = vmlaq_lane_f32(Transform.r[3], Transform.r[0], VL, 0); // X + vResult = vmlaq_lane_f32(vResult, Transform.r[1], VL, 1); // Y + vResult = vmlaq_lane_f32(vResult, Transform.r[2], VH, 0); // Z + + VH = vget_high_f32(vResult); + XMVECTOR W = vdupq_lane_f32(VH, 1); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + vResult = vdivq_f32(vResult, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal for W + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + vResult = vmulq_f32(vResult, Reciprocal); +#endif + + vResult = vmlaq_f32(Offset, vResult, Scale); + + VL = vget_low_f32(vResult); + vst1_f32(reinterpret_cast(pOutputVector), VL); + vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, 2); + pOutputVector += OutputStride; + } + } + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + const float HalfViewportWidth = ViewportWidth * 0.5f; + const float HalfViewportHeight = ViewportHeight * 0.5f; + + XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 1.0f); + XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(XMFLOAT3)) + { + if (OutputStride == sizeof(XMFLOAT3)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V1 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V2 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V3 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V4 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector), V1); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 16), vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + else + { + // Packed input, unaligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V1 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V2 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V3 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + V4 = XM_FMADD_PS(vTemp, Scale, Offset); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector), V1); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 16), vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + vTemp = XM_FMADD_PS(vTemp, Scale, Offset); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 2 + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + vTemp = XM_FMADD_PS(vTemp, Scale, Offset); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 3 + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + vTemp = XM_FMADD_PS(vTemp, Scale, Offset); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 4 + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + vTemp = XM_FMADD_PS(vTemp, Scale, Offset); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + for (; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + vTemp = XM_FMADD_PS(vTemp, Scale, Offset); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector3Unproject +( + FXMVECTOR V, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + FXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) noexcept +{ + static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } }; + + XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f); + Scale = XMVectorReciprocal(Scale); + + XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); + Offset = XMVectorMultiplyAdd(Scale, Offset, D.v); + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + XMVECTOR Result = XMVectorMultiplyAdd(V, Scale, Offset); + + return XMVector3TransformCoord(Result, Transform); +} + +//------------------------------------------------------------------------------ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" ) +#endif + +_Use_decl_annotations_ +inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream +( + XMFLOAT3* pOutputStream, + size_t OutputStride, + const XMFLOAT3* pInputStream, + size_t InputStride, + size_t VectorCount, + float ViewportX, + float ViewportY, + float ViewportWidth, + float ViewportHeight, + float ViewportMinZ, + float ViewportMaxZ, + FXMMATRIX Projection, + CXMMATRIX View, + CXMMATRIX World +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); + + assert(OutputStride >= sizeof(XMFLOAT3)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); + +#if defined(_XM_NO_INTRINSICS_) + + static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } }; + + XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f); + Scale = XMVectorReciprocal(Scale); + + XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); + Offset = XMVectorMultiplyAdd(Scale, Offset, D.v); + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + + XMVECTOR Result = XMVectorMultiplyAdd(V, Scale, Offset); + + Result = XMVector3TransformCoord(Result, Transform); + + XMStoreFloat3(reinterpret_cast(pOutputVector), Result); + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + float sx = 1.f / (ViewportWidth * 0.5f); + float sy = 1.f / (-ViewportHeight * 0.5f); + float sz = 1.f / (ViewportMaxZ - ViewportMinZ); + + float ox = (-ViewportX * sx) - 1.f; + float oy = (-ViewportY * sy) + 1.f; + float oz = (-ViewportMinZ * sz); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT3)) && (OutputStride == sizeof(XMFLOAT3))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x3_t V = vld3q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT3) * 4; + + XMVECTOR ScaleX = vdupq_n_f32(sx); + XMVECTOR OffsetX = vdupq_n_f32(ox); + XMVECTOR VX = vmlaq_f32(OffsetX, ScaleX, V.val[0]); + + float32x2_t r3 = vget_low_f32(Transform.r[3]); + float32x2_t r = vget_low_f32(Transform.r[0]); + XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), VX, r, 0); // Ax+M + XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), VX, r, 1); // Bx+N + + XM_PREFETCH(pInputVector); + + r3 = vget_high_f32(Transform.r[3]); + r = vget_high_f32(Transform.r[0]); + XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), VX, r, 0); // Cx+O + XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), VX, r, 1); // Dx+P + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + XMVECTOR ScaleY = vdupq_n_f32(sy); + XMVECTOR OffsetY = vdupq_n_f32(oy); + XMVECTOR VY = vmlaq_f32(OffsetY, ScaleY, V.val[1]); + + r = vget_low_f32(Transform.r[1]); + vResult0 = vmlaq_lane_f32(vResult0, VY, r, 0); // Ax+Ey+M + vResult1 = vmlaq_lane_f32(vResult1, VY, r, 1); // Bx+Fy+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(Transform.r[1]); + vResult2 = vmlaq_lane_f32(vResult2, VY, r, 0); // Cx+Gy+O + W = vmlaq_lane_f32(W, VY, r, 1); // Dx+Hy+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + XMVECTOR ScaleZ = vdupq_n_f32(sz); + XMVECTOR OffsetZ = vdupq_n_f32(oz); + XMVECTOR VZ = vmlaq_f32(OffsetZ, ScaleZ, V.val[2]); + + r = vget_low_f32(Transform.r[2]); + vResult0 = vmlaq_lane_f32(vResult0, VZ, r, 0); // Ax+Ey+Iz+M + vResult1 = vmlaq_lane_f32(vResult1, VZ, r, 1); // Bx+Fy+Jz+N + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); + + r = vget_high_f32(Transform.r[2]); + vResult2 = vmlaq_lane_f32(vResult2, VZ, r, 0); // Cx+Gy+Kz+O + W = vmlaq_lane_f32(W, VZ, r, 1); // Dx+Hy+Lz+P + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + V.val[0] = vdivq_f32(vResult0, W); + V.val[1] = vdivq_f32(vResult1, W); + V.val[2] = vdivq_f32(vResult2, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + V.val[0] = vmulq_f32(vResult0, Reciprocal); + V.val[1] = vmulq_f32(vResult1, Reciprocal); + V.val[2] = vmulq_f32(vResult2, Reciprocal); +#endif + + vst3q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT3) * 4; + + i += 4; + } + } + } + + if (i < VectorCount) + { + float32x2_t ScaleL = vcreate_f32( + static_cast(*reinterpret_cast(&sx)) + | (static_cast(*reinterpret_cast(&sy)) << 32)); + float32x2_t ScaleH = vcreate_f32(static_cast(*reinterpret_cast(&sz))); + + float32x2_t OffsetL = vcreate_f32( + static_cast(*reinterpret_cast(&ox)) + | (static_cast(*reinterpret_cast(&oy)) << 32)); + float32x2_t OffsetH = vcreate_f32(static_cast(*reinterpret_cast(&oz))); + + for (; i < VectorCount; i++) + { + float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); + float32x2_t zero = vdup_n_f32(0); + float32x2_t VH = vld1_lane_f32(reinterpret_cast(pInputVector) + 2, zero, 0); + pInputVector += InputStride; + + VL = vmla_f32(OffsetL, VL, ScaleL); + VH = vmla_f32(OffsetH, VH, ScaleH); + + XMVECTOR vResult = vmlaq_lane_f32(Transform.r[3], Transform.r[0], VL, 0); // X + vResult = vmlaq_lane_f32(vResult, Transform.r[1], VL, 1); // Y + vResult = vmlaq_lane_f32(vResult, Transform.r[2], VH, 0); // Z + + VH = vget_high_f32(vResult); + XMVECTOR W = vdupq_lane_f32(VH, 1); + +#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__ + vResult = vdivq_f32(vResult, W); +#else + // 2 iterations of Newton-Raphson refinement of reciprocal for W + float32x4_t Reciprocal = vrecpeq_f32(W); + float32x4_t S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + S = vrecpsq_f32(Reciprocal, W); + Reciprocal = vmulq_f32(S, Reciprocal); + + vResult = vmulq_f32(vResult, Reciprocal); +#endif + + VL = vget_low_f32(vResult); + vst1_f32(reinterpret_cast(pOutputVector), VL); + vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, 2); + pOutputVector += OutputStride; + } + } + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } }; + + XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f); + Scale = XMVectorReciprocal(Scale); + + XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); + Offset = _mm_mul_ps(Scale, Offset); + Offset = _mm_add_ps(Offset, D); + + XMMATRIX Transform = XMMatrixMultiply(World, View); + Transform = XMMatrixMultiply(Transform, Projection); + Transform = XMMatrixInverse(nullptr, Transform); + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(XMFLOAT3)) + { + if (OutputStride == sizeof(XMFLOAT3)) + { + if (!(reinterpret_cast(pOutputStream) & 0xF)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + V1 = XM_FMADD_PS(V1, Scale, Offset); + + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V1 = _mm_div_ps(vTemp, W); + + // Result 2 + V2 = XM_FMADD_PS(V2, Scale, Offset); + + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V2 = _mm_div_ps(vTemp, W); + + // Result 3 + V3 = XM_FMADD_PS(V3, Scale, Offset); + + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V3 = _mm_div_ps(vTemp, W); + + // Result 4 + V4 = XM_FMADD_PS(V4, Scale, Offset); + + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V4 = _mm_div_ps(vTemp, W); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector), V1); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 16), vTemp); + XM_STREAM_PS(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + else + { + // Packed input, unaligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + V1 = XM_FMADD_PS(V1, Scale, Offset); + + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V1 = _mm_div_ps(vTemp, W); + + // Result 2 + V2 = XM_FMADD_PS(V2, Scale, Offset); + + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V2 = _mm_div_ps(vTemp, W); + + // Result 3 + V3 = XM_FMADD_PS(V3, Scale, Offset); + + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V3 = _mm_div_ps(vTemp, W); + + // Result 4 + V4 = XM_FMADD_PS(V4, Scale, Offset); + + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + V4 = _mm_div_ps(vTemp, W); + + // Pack and store the vectors + XM3PACK4INTO3(vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector), V1); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 16), vTemp); + _mm_storeu_ps(reinterpret_cast(pOutputVector + 32), V3); + pOutputVector += sizeof(XMFLOAT3) * 4; + i += 4; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < four; ++j) + { + __m128 V1 = _mm_loadu_ps(reinterpret_cast(pInputVector)); + __m128 L2 = _mm_loadu_ps(reinterpret_cast(pInputVector + 16)); + __m128 L3 = _mm_loadu_ps(reinterpret_cast(pInputVector + 32)); + pInputVector += sizeof(XMFLOAT3) * 4; + + // Unpack the 4 vectors (.w components are junk) + XM3UNPACK3INTO4(V1, L2, L3); + + // Result 1 + V1 = XM_FMADD_PS(V1, Scale, Offset); + + XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 2 + V2 = XM_FMADD_PS(V2, Scale, Offset); + + Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 3 + V3 = XM_FMADD_PS(V3, Scale, Offset); + + Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + // Result 4 + V4 = XM_FMADD_PS(V4, Scale, Offset); + + Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); + Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); + X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); + + vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + + i += 4; + } + } + } + } + + for (; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + V = _mm_mul_ps(V, Scale); + V = _mm_add_ps(V, Offset); + + XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + + XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); + XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); + XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); + vTemp = _mm_add_ps(vTemp, vTemp2); + vTemp = _mm_add_ps(vTemp, vTemp3); + + XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); + vTemp = _mm_div_ps(vTemp, W); + + XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); + pOutputVector += OutputStride; + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +/**************************************************************************** + * + * 4D Vector + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + // Comparison operations + //------------------------------------------------------------------------------ + + //------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4Equal +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); +#else + return XMComparisonAllTrue(XMVector4EqualR(V1, V2)); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector4EqualR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + uint32_t CR = 0; + + if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && + (V1.vector4_f32[1] == V2.vector4_f32[1]) && + (V1.vector4_f32[2] == V2.vector4_f32[2]) && + (V1.vector4_f32[3] == V2.vector4_f32[3])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && + (V1.vector4_f32[1] != V2.vector4_f32[1]) && + (V1.vector4_f32[2] != V2.vector4_f32[2]) && + (V1.vector4_f32[3] != V2.vector4_f32[3])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); + int iTest = _mm_movemask_ps(vTemp); + uint32_t CR = 0; + if (iTest == 0xf) // All equal? + { + CR = XM_CRMASK_CR6TRUE; + } + else if (iTest == 0) // All not equal? + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4EqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return ((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) == 0xf) != 0); +#else + return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2)); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector4EqualIntR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t CR = 0; + if (V1.vector4_u32[0] == V2.vector4_u32[0] && + V1.vector4_u32[1] == V2.vector4_u32[1] && + V1.vector4_u32[2] == V2.vector4_u32[2] && + V1.vector4_u32[3] == V2.vector4_u32[3]) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (V1.vector4_u32[0] != V2.vector4_u32[0] && + V1.vector4_u32[1] != V2.vector4_u32[1] && + V1.vector4_u32[2] != V2.vector4_u32[2] && + V1.vector4_u32[3] != V2.vector4_u32[3]) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + int iTest = _mm_movemask_ps(_mm_castsi128_ps(vTemp)); + uint32_t CR = 0; + if (iTest == 0xf) // All equal? + { + CR = XM_CRMASK_CR6TRUE; + } + else if (iTest == 0) // All not equal? + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +inline bool XM_CALLCONV XMVector4NearEqual +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR Epsilon +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float dx, dy, dz, dw; + + dx = fabsf(V1.vector4_f32[0] - V2.vector4_f32[0]); + dy = fabsf(V1.vector4_f32[1] - V2.vector4_f32[1]); + dz = fabsf(V1.vector4_f32[2] - V2.vector4_f32[2]); + dw = fabsf(V1.vector4_f32[3] - V2.vector4_f32[3]); + return (((dx <= Epsilon.vector4_f32[0]) && + (dy <= Epsilon.vector4_f32[1]) && + (dz <= Epsilon.vector4_f32[2]) && + (dw <= Epsilon.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vDelta = vsubq_f32(V1, V2); +#ifdef _MSC_VER + uint32x4_t vResult = vacleq_f32(vDelta, Epsilon); +#else + uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon); +#endif + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Get the difference + XMVECTOR vDelta = _mm_sub_ps(V1, V2); + // Get the absolute value of the difference + XMVECTOR vTemp = _mm_setzero_ps(); + vTemp = _mm_sub_ps(vTemp, vDelta); + vTemp = _mm_max_ps(vTemp, vDelta); + vTemp = _mm_cmple_ps(vTemp, Epsilon); + return ((_mm_movemask_ps(vTemp) == 0xf) != 0); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4NotEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpneq_ps(V1, V2); + return ((_mm_movemask_ps(vTemp)) != 0); +#else + return XMComparisonAnyFalse(XMVector4EqualR(V1, V2)); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4NotEqualInt +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); + return ((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) != 0xF) != 0); +#else + return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2)); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4Greater +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgtq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); +#else + return XMComparisonAllTrue(XMVector4GreaterR(V1, V2)); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector4GreaterR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t CR = 0; + if (V1.vector4_f32[0] > V2.vector4_f32[0] && + V1.vector4_f32[1] > V2.vector4_f32[1] && + V1.vector4_f32[2] > V2.vector4_f32[2] && + V1.vector4_f32[3] > V2.vector4_f32[3]) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (V1.vector4_f32[0] <= V2.vector4_f32[0] && + V1.vector4_f32[1] <= V2.vector4_f32[1] && + V1.vector4_f32[2] <= V2.vector4_f32[2] && + V1.vector4_f32[3] <= V2.vector4_f32[3]) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgtq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + uint32_t CR = 0; + XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); + int iTest = _mm_movemask_ps(vTemp); + if (iTest == 0xf) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4GreaterOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgeq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); +#else + return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2)); +#endif +} + +//------------------------------------------------------------------------------ + +inline uint32_t XM_CALLCONV XMVector4GreaterOrEqualR +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + uint32_t CR = 0; + if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && + (V1.vector4_f32[1] >= V2.vector4_f32[1]) && + (V1.vector4_f32[2] >= V2.vector4_f32[2]) && + (V1.vector4_f32[3] >= V2.vector4_f32[3])) + { + CR = XM_CRMASK_CR6TRUE; + } + else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && + (V1.vector4_f32[1] < V2.vector4_f32[1]) && + (V1.vector4_f32[2] < V2.vector4_f32[2]) && + (V1.vector4_f32[3] < V2.vector4_f32[3])) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcgeq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); + + uint32_t CR = 0; + if (r == 0xFFFFFFFFU) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!r) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#elif defined(_XM_SSE_INTRINSICS_) + uint32_t CR = 0; + XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); + int iTest = _mm_movemask_ps(vTemp); + if (iTest == 0x0f) + { + CR = XM_CRMASK_CR6TRUE; + } + else if (!iTest) + { + CR = XM_CRMASK_CR6FALSE; + } + return CR; +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4Less +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcltq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmplt_ps(V1, V2); + return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); +#else + return XMComparisonAllTrue(XMVector4GreaterR(V2, V1)); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4LessOrEqual +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vResult = vcleq_f32(V1, V2); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), vget_high_u8(vreinterpretq_u8_u32(vResult))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp = _mm_cmple_ps(V1, V2); + return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); +#else + return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1)); +#endif +} + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4InBounds +( + FXMVECTOR V, + FXMVECTOR Bounds +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && + (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && + (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) && + (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test if less than or equal + uint32x4_t ivTemp1 = vcleq_f32(V, Bounds); + // Negate the bounds + float32x4_t vTemp2 = vnegq_f32(Bounds); + // Test if greater or equal (Reversed) + uint32x4_t ivTemp2 = vcleq_f32(vTemp2, V); + // Blend answers + ivTemp1 = vandq_u32(ivTemp1, ivTemp2); + // in bounds? + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(ivTemp1)), vget_high_u8(vreinterpretq_u8_u32(ivTemp1))); + uint16x4x2_t vTemp3 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp3.val[1]), 1) == 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Test if less than or equal + XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); + // Negate the bounds + XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); + // Test if greater or equal (Reversed) + vTemp2 = _mm_cmple_ps(vTemp2, V); + // Blend answers + vTemp1 = _mm_and_ps(vTemp1, vTemp2); + // All in bounds? + return ((_mm_movemask_ps(vTemp1) == 0x0f) != 0); +#else + return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds)); +#endif +} + +//------------------------------------------------------------------------------ + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(push) +#pragma float_control(precise, on) +#endif + +inline bool XM_CALLCONV XMVector4IsNaN(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + return (XMISNAN(V.vector4_f32[0]) || + XMISNAN(V.vector4_f32[1]) || + XMISNAN(V.vector4_f32[2]) || + XMISNAN(V.vector4_f32[3])); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Test against itself. NaN is always not equal + uint32x4_t vTempNan = vceqq_f32(V, V); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempNan)), vget_high_u8(vreinterpretq_u8_u32(vTempNan))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + // If any are NaN, the mask is zero + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != 0xFFFFFFFFU); +#elif defined(_XM_SSE_INTRINSICS_) + // Test against itself. NaN is always not equal + XMVECTOR vTempNan = _mm_cmpneq_ps(V, V); + // If any are NaN, the mask is non-zero + return (_mm_movemask_ps(vTempNan) != 0); +#endif +} + +#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma float_control(pop) +#endif + +//------------------------------------------------------------------------------ + +inline bool XM_CALLCONV XMVector4IsInfinite(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + return (XMISINF(V.vector4_f32[0]) || + XMISINF(V.vector4_f32[1]) || + XMISINF(V.vector4_f32[2]) || + XMISINF(V.vector4_f32[3])); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Mask off the sign bit + uint32x4_t vTempInf = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); + // Compare to infinity + vTempInf = vceqq_f32(vreinterpretq_f32_u32(vTempInf), g_XMInfinity); + // If any are infinity, the signs are true. + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempInf)), vget_high_u8(vreinterpretq_u8_u32(vTempInf))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Mask off the sign bit + XMVECTOR vTemp = _mm_and_ps(V, g_XMAbsMask); + // Compare to infinity + vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); + // If any are infinity, the signs are true. + return (_mm_movemask_ps(vTemp) != 0); +#endif +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Dot +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result; + Result.f[0] = + Result.f[1] = + Result.f[2] = + Result.f[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3]; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vTemp = vmulq_f32(V1, V2); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + return vcombine_f32(v1, v1); +#elif defined(_XM_SSE4_INTRINSICS_) + return _mm_dp_ps(V1, V2, 0xff); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vTemp = _mm_mul_ps(V1, V2); + vTemp = _mm_hadd_ps(vTemp, vTemp); + return _mm_hadd_ps(vTemp, vTemp); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vTemp2 = V2; + XMVECTOR vTemp = _mm_mul_ps(V1, vTemp2); + vTemp2 = _mm_shuffle_ps(vTemp2, vTemp, _MM_SHUFFLE(1, 0, 0, 0)); // Copy X to the Z position and Y to the W position + vTemp2 = _mm_add_ps(vTemp2, vTemp); // Add Z = X+Z; W = Y+W; + vTemp = _mm_shuffle_ps(vTemp, vTemp2, _MM_SHUFFLE(0, 3, 0, 0)); // Copy W to the Z position + vTemp = _mm_add_ps(vTemp, vTemp2); // Add Z and W together + return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(2, 2, 2, 2)); // Splat Z and return +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Cross +( + FXMVECTOR V1, + FXMVECTOR V2, + FXMVECTOR V3 +) noexcept +{ + // [ ((v2.z*v3.w-v2.w*v3.z)*v1.y)-((v2.y*v3.w-v2.w*v3.y)*v1.z)+((v2.y*v3.z-v2.z*v3.y)*v1.w), + // ((v2.w*v3.z-v2.z*v3.w)*v1.x)-((v2.w*v3.x-v2.x*v3.w)*v1.z)+((v2.z*v3.x-v2.x*v3.z)*v1.w), + // ((v2.y*v3.w-v2.w*v3.y)*v1.x)-((v2.x*v3.w-v2.w*v3.x)*v1.y)+((v2.x*v3.y-v2.y*v3.x)*v1.w), + // ((v2.z*v3.y-v2.y*v3.z)*v1.x)-((v2.z*v3.x-v2.x*v3.z)*v1.y)+((v2.y*v3.x-v2.x*v3.y)*v1.z) ] + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + (((V2.vector4_f32[2] * V3.vector4_f32[3]) - (V2.vector4_f32[3] * V3.vector4_f32[2])) * V1.vector4_f32[1]) - (((V2.vector4_f32[1] * V3.vector4_f32[3]) - (V2.vector4_f32[3] * V3.vector4_f32[1])) * V1.vector4_f32[2]) + (((V2.vector4_f32[1] * V3.vector4_f32[2]) - (V2.vector4_f32[2] * V3.vector4_f32[1])) * V1.vector4_f32[3]), + (((V2.vector4_f32[3] * V3.vector4_f32[2]) - (V2.vector4_f32[2] * V3.vector4_f32[3])) * V1.vector4_f32[0]) - (((V2.vector4_f32[3] * V3.vector4_f32[0]) - (V2.vector4_f32[0] * V3.vector4_f32[3])) * V1.vector4_f32[2]) + (((V2.vector4_f32[2] * V3.vector4_f32[0]) - (V2.vector4_f32[0] * V3.vector4_f32[2])) * V1.vector4_f32[3]), + (((V2.vector4_f32[1] * V3.vector4_f32[3]) - (V2.vector4_f32[3] * V3.vector4_f32[1])) * V1.vector4_f32[0]) - (((V2.vector4_f32[0] * V3.vector4_f32[3]) - (V2.vector4_f32[3] * V3.vector4_f32[0])) * V1.vector4_f32[1]) + (((V2.vector4_f32[0] * V3.vector4_f32[1]) - (V2.vector4_f32[1] * V3.vector4_f32[0])) * V1.vector4_f32[3]), + (((V2.vector4_f32[2] * V3.vector4_f32[1]) - (V2.vector4_f32[1] * V3.vector4_f32[2])) * V1.vector4_f32[0]) - (((V2.vector4_f32[2] * V3.vector4_f32[0]) - (V2.vector4_f32[0] * V3.vector4_f32[2])) * V1.vector4_f32[1]) + (((V2.vector4_f32[1] * V3.vector4_f32[0]) - (V2.vector4_f32[0] * V3.vector4_f32[1])) * V1.vector4_f32[2]), + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + const uint32x2_t select = vget_low_u32(g_XMMaskX); + + // Term1: V2zwyz * V3wzwy + const float32x2_t v2xy = vget_low_f32(V2); + const float32x2_t v2zw = vget_high_f32(V2); + const float32x2_t v2yx = vrev64_f32(v2xy); + const float32x2_t v2wz = vrev64_f32(v2zw); + const float32x2_t v2yz = vbsl_f32(select, v2yx, v2wz); + + const float32x2_t v3zw = vget_high_f32(V3); + const float32x2_t v3wz = vrev64_f32(v3zw); + const float32x2_t v3xy = vget_low_f32(V3); + const float32x2_t v3wy = vbsl_f32(select, v3wz, v3xy); + + float32x4_t vTemp1 = vcombine_f32(v2zw, v2yz); + float32x4_t vTemp2 = vcombine_f32(v3wz, v3wy); + XMVECTOR vResult = vmulq_f32(vTemp1, vTemp2); + + // - V2wzwy * V3zwyz + const float32x2_t v2wy = vbsl_f32(select, v2wz, v2xy); + + const float32x2_t v3yx = vrev64_f32(v3xy); + const float32x2_t v3yz = vbsl_f32(select, v3yx, v3wz); + + vTemp1 = vcombine_f32(v2wz, v2wy); + vTemp2 = vcombine_f32(v3zw, v3yz); + vResult = vmlsq_f32(vResult, vTemp1, vTemp2); + + // term1 * V1yxxx + const float32x2_t v1xy = vget_low_f32(V1); + const float32x2_t v1yx = vrev64_f32(v1xy); + + vTemp1 = vcombine_f32(v1yx, vdup_lane_f32(v1yx, 1)); + vResult = vmulq_f32(vResult, vTemp1); + + // Term2: V2ywxz * V3wxwx + const float32x2_t v2yw = vrev64_f32(v2wy); + const float32x2_t v2xz = vbsl_f32(select, v2xy, v2wz); + + const float32x2_t v3wx = vbsl_f32(select, v3wz, v3yx); + + vTemp1 = vcombine_f32(v2yw, v2xz); + vTemp2 = vcombine_f32(v3wx, v3wx); + float32x4_t vTerm = vmulq_f32(vTemp1, vTemp2); + + // - V2wxwx * V3ywxz + const float32x2_t v2wx = vbsl_f32(select, v2wz, v2yx); + + const float32x2_t v3yw = vrev64_f32(v3wy); + const float32x2_t v3xz = vbsl_f32(select, v3xy, v3wz); + + vTemp1 = vcombine_f32(v2wx, v2wx); + vTemp2 = vcombine_f32(v3yw, v3xz); + vTerm = vmlsq_f32(vTerm, vTemp1, vTemp2); + + // vResult - term2 * V1zzyy + const float32x2_t v1zw = vget_high_f32(V1); + + vTemp1 = vcombine_f32(vdup_lane_f32(v1zw, 0), vdup_lane_f32(v1yx, 0)); + vResult = vmlsq_f32(vResult, vTerm, vTemp1); + + // Term3: V2yzxy * V3zxyx + const float32x2_t v3zx = vrev64_f32(v3xz); + + vTemp1 = vcombine_f32(v2yz, v2xy); + vTemp2 = vcombine_f32(v3zx, v3yx); + vTerm = vmulq_f32(vTemp1, vTemp2); + + // - V2zxyx * V3yzxy + const float32x2_t v2zx = vrev64_f32(v2xz); + + vTemp1 = vcombine_f32(v2zx, v2yx); + vTemp2 = vcombine_f32(v3yz, v3xy); + vTerm = vmlsq_f32(vTerm, vTemp1, vTemp2); + + // vResult + term3 * V1wwwz + const float32x2_t v1wz = vrev64_f32(v1zw); + + vTemp1 = vcombine_f32(vdup_lane_f32(v1wz, 0), v1wz); + return vmlaq_f32(vResult, vTerm, vTemp1); +#elif defined(_XM_SSE_INTRINSICS_) + // V2zwyz * V3wzwy + XMVECTOR vResult = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 1, 3, 2)); + XMVECTOR vTemp3 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 3, 2, 3)); + vResult = _mm_mul_ps(vResult, vTemp3); + // - V2wzwy * V3zwyz + XMVECTOR vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 3, 2, 3)); + vTemp3 = XM_PERMUTE_PS(vTemp3, _MM_SHUFFLE(1, 3, 0, 1)); + vResult = XM_FNMADD_PS(vTemp2, vTemp3, vResult); + // term1 * V1yxxx + XMVECTOR vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 1)); + vResult = _mm_mul_ps(vResult, vTemp1); + + // V2ywxz * V3wxwx + vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 0, 3, 1)); + vTemp3 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 3, 0, 3)); + vTemp3 = _mm_mul_ps(vTemp3, vTemp2); + // - V2wxwx * V3ywxz + vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(2, 1, 2, 1)); + vTemp1 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 0, 3, 1)); + vTemp3 = XM_FNMADD_PS(vTemp2, vTemp1, vTemp3); + // vResult - temp * V1zzyy + vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 2, 2)); + vResult = XM_FNMADD_PS(vTemp1, vTemp3, vResult); + + // V2yzxy * V3zxyx + vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 0, 2, 1)); + vTemp3 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 1, 0, 2)); + vTemp3 = _mm_mul_ps(vTemp3, vTemp2); + // - V2zxyx * V3yzxy + vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(2, 0, 2, 1)); + vTemp1 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 0, 2, 1)); + vTemp3 = XM_FNMADD_PS(vTemp1, vTemp2, vTemp3); + // vResult + term * V1wwwz + vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 3, 3, 3)); + vResult = XM_FMADD_PS(vTemp3, vTemp1, vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4LengthSq(FXMVECTOR V) noexcept +{ + return XMVector4Dot(V, V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector4LengthSq(V); + Result = XMVectorReciprocalSqrtEst(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot4 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + // Reciprocal sqrt (estimate) + v2 = vrsqrte_f32(v1); + return vcombine_f32(v2, v2); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); + return _mm_rsqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_rsqrt_ps(vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and w + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); + // x+z, y+w + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // x+z,x+z,x+z,y+w + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); + // ??,??,y+w,y+w + vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); + // ??,??,x+z+y+w,?? + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // Splat the length + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); + // Get the reciprocal + vLengthSq = _mm_rsqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector4LengthSq(V); + Result = XMVectorReciprocalSqrt(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot4 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + // Reciprocal sqrt + float32x2_t S0 = vrsqrte_f32(v1); + float32x2_t P0 = vmul_f32(v1, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(v1, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t Result = vmul_f32(S1, R1); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); + XMVECTOR vLengthSq = _mm_sqrt_ps(vTemp); + return _mm_div_ps(g_XMOne, vLengthSq); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ps(vLengthSq); + vLengthSq = _mm_div_ps(g_XMOne, vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and w + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); + // x+z, y+w + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // x+z,x+z,x+z,y+w + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); + // ??,??,y+w,y+w + vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); + // ??,??,x+z+y+w,?? + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // Splat the length + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); + // Get the reciprocal + vLengthSq = _mm_sqrt_ps(vLengthSq); + // Accurate! + vLengthSq = _mm_div_ps(g_XMOne, vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4LengthEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector4LengthSq(V); + Result = XMVectorSqrtEst(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot4 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + const float32x2_t zero = vdup_n_f32(0); + uint32x2_t VEqualsZero = vceq_f32(v1, zero); + // Sqrt (estimate) + float32x2_t Result = vrsqrte_f32(v1); + Result = vmul_f32(v1, Result); + Result = vbsl_f32(VEqualsZero, zero, Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); + return _mm_sqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and w + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); + // x+z, y+w + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // x+z,x+z,x+z,y+w + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); + // ??,??,y+w,y+w + vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); + // ??,??,x+z+y+w,?? + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // Splat the length + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); + // Get the length + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Length(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + + Result = XMVector4LengthSq(V); + Result = XMVectorSqrt(Result); + + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot4 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + const float32x2_t zero = vdup_n_f32(0); + uint32x2_t VEqualsZero = vceq_f32(v1, zero); + // Sqrt + float32x2_t S0 = vrsqrte_f32(v1); + float32x2_t P0 = vmul_f32(v1, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(v1, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + float32x2_t Result = vmul_f32(S1, R1); + Result = vmul_f32(v1, Result); + Result = vbsl_f32(VEqualsZero, zero, Result); + return vcombine_f32(Result, Result); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); + return _mm_sqrt_ps(vTemp); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and w + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); + // x+z, y+w + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // x+z,x+z,x+z,y+w + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); + // ??,??,y+w,y+w + vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); + // ??,??,x+z+y+w,?? + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // Splat the length + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); + // Get the length + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +//------------------------------------------------------------------------------ +// XMVector4NormalizeEst uses a reciprocal estimate and +// returns QNaN on zero and infinite vectors. + +inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR Result; + Result = XMVector4ReciprocalLength(V); + Result = XMVectorMultiply(V, Result); + return Result; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot4 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + // Reciprocal sqrt (estimate) + v2 = vrsqrte_f32(v1); + // Normalize + return vmulq_f32(V, vcombine_f32(v2, v2)); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); + XMVECTOR vResult = _mm_rsqrt_ps(vTemp); + return _mm_mul_ps(vResult, V); +#elif defined(_XM_SSE3_INTRINSICS_) + XMVECTOR vDot = _mm_mul_ps(V, V); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_hadd_ps(vDot, vDot); + vDot = _mm_rsqrt_ps(vDot); + vDot = _mm_mul_ps(vDot, V); + return vDot; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and w + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); + // x+z, y+w + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // x+z,x+z,x+z,y+w + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); + // ??,??,y+w,y+w + vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); + // ??,??,x+z+y+w,?? + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // Splat the length + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); + // Get the reciprocal + XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq); + // Reciprocal mul to perform the normalization + vResult = _mm_mul_ps(vResult, V); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Normalize(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + float fLength; + XMVECTOR vResult; + + vResult = XMVector4Length(V); + fLength = vResult.vector4_f32[0]; + + // Prevent divide by zero + if (fLength > 0) + { + fLength = 1.0f / fLength; + } + + vResult.vector4_f32[0] = V.vector4_f32[0] * fLength; + vResult.vector4_f32[1] = V.vector4_f32[1] * fLength; + vResult.vector4_f32[2] = V.vector4_f32[2] * fLength; + vResult.vector4_f32[3] = V.vector4_f32[3] * fLength; + return vResult; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + // Dot4 + float32x4_t vTemp = vmulq_f32(V, V); + float32x2_t v1 = vget_low_f32(vTemp); + float32x2_t v2 = vget_high_f32(vTemp); + v1 = vadd_f32(v1, v2); + v1 = vpadd_f32(v1, v1); + uint32x2_t VEqualsZero = vceq_f32(v1, vdup_n_f32(0)); + uint32x2_t VEqualsInf = vceq_f32(v1, vget_low_f32(g_XMInfinity)); + // Reciprocal sqrt (2 iterations of Newton-Raphson) + float32x2_t S0 = vrsqrte_f32(v1); + float32x2_t P0 = vmul_f32(v1, S0); + float32x2_t R0 = vrsqrts_f32(P0, S0); + float32x2_t S1 = vmul_f32(S0, R0); + float32x2_t P1 = vmul_f32(v1, S1); + float32x2_t R1 = vrsqrts_f32(P1, S1); + v2 = vmul_f32(S1, R1); + // Normalize + XMVECTOR vResult = vmulq_f32(V, vcombine_f32(v2, v2)); + vResult = vbslq_f32(vcombine_u32(VEqualsZero, VEqualsZero), vdupq_n_f32(0), vResult); + return vbslq_f32(vcombine_u32(VEqualsInf, VEqualsInf), g_XMQNaN, vResult); +#elif defined(_XM_SSE4_INTRINSICS_) + XMVECTOR vLengthSq = _mm_dp_ps(V, V, 0xff); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(_XM_SSE3_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y,z and w + XMVECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and w + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); + // x+z, y+w + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // x+z,x+z,x+z,y+w + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); + // ??,??,y+w,y+w + vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); + // ??,??,x+z+y+w,?? + vLengthSq = _mm_add_ps(vLengthSq, vTemp); + // Splat the length + vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + XMVECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4ClampLength +( + FXMVECTOR V, + float LengthMin, + float LengthMax +) noexcept +{ + XMVECTOR ClampMax = XMVectorReplicate(LengthMax); + XMVECTOR ClampMin = XMVectorReplicate(LengthMin); + + return XMVector4ClampLengthV(V, ClampMin, ClampMax); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4ClampLengthV +( + FXMVECTOR V, + FXMVECTOR LengthMin, + FXMVECTOR LengthMax +) noexcept +{ + assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin))); + assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax))); + assert(XMVector4GreaterOrEqual(LengthMin, XMVectorZero())); + assert(XMVector4GreaterOrEqual(LengthMax, XMVectorZero())); + assert(XMVector4GreaterOrEqual(LengthMax, LengthMin)); + + XMVECTOR LengthSq = XMVector4LengthSq(V); + + const XMVECTOR Zero = XMVectorZero(); + + XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq); + + XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); + XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero); + + XMVECTOR Normal = XMVectorMultiply(V, RcpLength); + + XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength); + + XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength); + Length = XMVectorSelect(LengthSq, Length, Select); + Normal = XMVectorSelect(LengthSq, Normal, Select); + + XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax); + XMVECTOR ControlMin = XMVectorLess(Length, LengthMin); + + XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); + ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); + + XMVECTOR Result = XMVectorMultiply(Normal, ClampLength); + + // Preserve the original vector (with no precision loss) if the length falls within the given range + XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin); + Result = XMVectorSelect(Result, V, Control); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Reflect +( + FXMVECTOR Incident, + FXMVECTOR Normal +) noexcept +{ + // Result = Incident - (2 * dot(Incident, Normal)) * Normal + + XMVECTOR Result = XMVector4Dot(Incident, Normal); + Result = XMVectorAdd(Result, Result); + Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Refract +( + FXMVECTOR Incident, + FXMVECTOR Normal, + float RefractionIndex +) noexcept +{ + XMVECTOR Index = XMVectorReplicate(RefractionIndex); + return XMVector4RefractV(Incident, Normal, Index); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4RefractV +( + FXMVECTOR Incident, + FXMVECTOR Normal, + FXMVECTOR RefractionIndex +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR IDotN; + XMVECTOR R; + const XMVECTOR Zero = XMVectorZero(); + + // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + + // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) + + IDotN = XMVector4Dot(Incident, Normal); + + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); + R = XMVectorMultiply(R, RefractionIndex); + R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); + + if (XMVector4LessOrEqual(R, Zero)) + { + // Total internal reflection + return Zero; + } + else + { + XMVECTOR Result; + + // R = RefractionIndex * IDotN + sqrt(R) + R = XMVectorSqrt(R); + R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); + + // Result = RefractionIndex * Incident - Normal * R + Result = XMVectorMultiply(RefractionIndex, Incident); + Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); + + return Result; + } + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR IDotN = XMVector4Dot(Incident, Normal); + + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + float32x4_t R = vmlsq_f32(g_XMOne, IDotN, IDotN); + R = vmulq_f32(R, RefractionIndex); + R = vmlsq_f32(g_XMOne, R, RefractionIndex); + + uint32x4_t isrzero = vcleq_f32(R, g_XMZero); + uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(isrzero)), vget_high_u8(vreinterpretq_u8_u32(isrzero))); + uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1])); + + float32x4_t vResult; + if (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU) + { + // Total internal reflection + vResult = g_XMZero; + } + else + { + // Sqrt(R) + float32x4_t S0 = vrsqrteq_f32(R); + float32x4_t P0 = vmulq_f32(R, S0); + float32x4_t R0 = vrsqrtsq_f32(P0, S0); + float32x4_t S1 = vmulq_f32(S0, R0); + float32x4_t P1 = vmulq_f32(R, S1); + float32x4_t R1 = vrsqrtsq_f32(P1, S1); + float32x4_t S2 = vmulq_f32(S1, R1); + R = vmulq_f32(R, S2); + // R = RefractionIndex * IDotN + sqrt(R) + R = vmlaq_f32(R, RefractionIndex, IDotN); + // Result = RefractionIndex * Incident - Normal * R + vResult = vmulq_f32(RefractionIndex, Incident); + vResult = vmlsq_f32(vResult, R, Normal); + } + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR IDotN = XMVector4Dot(Incident, Normal); + + // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) + XMVECTOR R = XM_FNMADD_PS(IDotN, IDotN, g_XMOne); + XMVECTOR R2 = _mm_mul_ps(RefractionIndex, RefractionIndex); + R = XM_FNMADD_PS(R, R2, g_XMOne); + + XMVECTOR vResult = _mm_cmple_ps(R, g_XMZero); + if (_mm_movemask_ps(vResult) == 0x0f) + { + // Total internal reflection + vResult = g_XMZero; + } + else + { + // R = RefractionIndex * IDotN + sqrt(R) + R = _mm_sqrt_ps(R); + R = XM_FMADD_PS(RefractionIndex, IDotN, R); + // Result = RefractionIndex * Incident - Normal * R + vResult = _mm_mul_ps(RefractionIndex, Incident); + vResult = XM_FNMADD_PS(R, Normal, vResult); + } + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Orthogonal(FXMVECTOR V) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 Result = { { { + V.vector4_f32[2], + V.vector4_f32[3], + -V.vector4_f32[0], + -V.vector4_f32[1] + } } }; + return Result.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Negate = { { { 1.f, 1.f, -1.f, -1.f } } }; + + float32x4_t Result = vcombine_f32(vget_high_f32(V), vget_low_f32(V)); + return vmulq_f32(Result, Negate); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 FlipZW = { { { 1.0f, 1.0f, -1.0f, -1.0f } } }; + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 0, 3, 2)); + vResult = _mm_mul_ps(vResult, FlipZW); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4AngleBetweenNormalsEst +( + FXMVECTOR N1, + FXMVECTOR N2 +) noexcept +{ + XMVECTOR Result = XMVector4Dot(N1, N2); + Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); + Result = XMVectorACosEst(Result); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4AngleBetweenNormals +( + FXMVECTOR N1, + FXMVECTOR N2 +) noexcept +{ + XMVECTOR Result = XMVector4Dot(N1, N2); + Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); + Result = XMVectorACos(Result); + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4AngleBetweenVectors +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + XMVECTOR L1 = XMVector4ReciprocalLength(V1); + XMVECTOR L2 = XMVector4ReciprocalLength(V2); + + XMVECTOR Dot = XMVector4Dot(V1, V2); + + L1 = XMVectorMultiply(L1, L2); + + XMVECTOR CosAngle = XMVectorMultiply(Dot, L1); + CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v); + + return XMVectorACos(CosAngle); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV XMVector4Transform +( + FXMVECTOR V, + FXMMATRIX M +) noexcept +{ +#if defined(_XM_NO_INTRINSICS_) + + float fX = (M.m[0][0] * V.vector4_f32[0]) + (M.m[1][0] * V.vector4_f32[1]) + (M.m[2][0] * V.vector4_f32[2]) + (M.m[3][0] * V.vector4_f32[3]); + float fY = (M.m[0][1] * V.vector4_f32[0]) + (M.m[1][1] * V.vector4_f32[1]) + (M.m[2][1] * V.vector4_f32[2]) + (M.m[3][1] * V.vector4_f32[3]); + float fZ = (M.m[0][2] * V.vector4_f32[0]) + (M.m[1][2] * V.vector4_f32[1]) + (M.m[2][2] * V.vector4_f32[2]) + (M.m[3][2] * V.vector4_f32[3]); + float fW = (M.m[0][3] * V.vector4_f32[0]) + (M.m[1][3] * V.vector4_f32[1]) + (M.m[2][3] * V.vector4_f32[2]) + (M.m[3][3] * V.vector4_f32[3]); + XMVECTORF32 vResult = { { { fX, fY, fZ, fW } } }; + return vResult.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x2_t VL = vget_low_f32(V); + XMVECTOR vResult = vmulq_lane_f32(M.r[0], VL, 0); // X + vResult = vmlaq_lane_f32(vResult, M.r[1], VL, 1); // Y + float32x2_t VH = vget_high_f32(V); + vResult = vmlaq_lane_f32(vResult, M.r[2], VH, 0); // Z + return vmlaq_lane_f32(vResult, M.r[3], VH, 1); // W +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); // W + vResult = _mm_mul_ps(vResult, M.r[3]); + XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z + vResult = XM_FMADD_PS(vTemp, M.r[2], vResult); + vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y + vResult = XM_FMADD_PS(vTemp, M.r[1], vResult); + vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X + vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT4* XM_CALLCONV XMVector4TransformStream +( + XMFLOAT4* pOutputStream, + size_t OutputStride, + const XMFLOAT4* pInputStream, + size_t InputStride, + size_t VectorCount, + FXMMATRIX M +) noexcept +{ + assert(pOutputStream != nullptr); + assert(pInputStream != nullptr); + + assert(InputStride >= sizeof(XMFLOAT4)); + _Analysis_assume_(InputStride >= sizeof(XMFLOAT4)); + + assert(OutputStride >= sizeof(XMFLOAT4)); + _Analysis_assume_(OutputStride >= sizeof(XMFLOAT4)); + +#if defined(_XM_NO_INTRINSICS_) + + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + for (size_t i = 0; i < VectorCount; i++) + { + XMVECTOR V = XMLoadFloat4(reinterpret_cast(pInputVector)); + XMVECTOR W = XMVectorSplatW(V); + XMVECTOR Z = XMVectorSplatZ(V); + XMVECTOR Y = XMVectorSplatY(V); + XMVECTOR X = XMVectorSplatX(V); + + XMVECTOR Result = XMVectorMultiply(W, row3); + Result = XMVectorMultiplyAdd(Z, row2, Result); + Result = XMVectorMultiplyAdd(Y, row1, Result); + Result = XMVectorMultiplyAdd(X, row0, Result); + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015, "PREfast noise: Esp:1307" ) +#endif + + XMStoreFloat4(reinterpret_cast(pOutputVector), Result); + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + + pInputVector += InputStride; + pOutputVector += OutputStride; + } + + return pOutputStream; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + size_t i = 0; + size_t four = VectorCount >> 2; + if (four > 0) + { + if ((InputStride == sizeof(XMFLOAT4)) && (OutputStride == sizeof(XMFLOAT4))) + { + for (size_t j = 0; j < four; ++j) + { + float32x4x4_t V = vld4q_f32(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT4) * 4; + + float32x2_t r = vget_low_f32(row0); + XMVECTOR vResult0 = vmulq_lane_f32(V.val[0], r, 0); // Ax + XMVECTOR vResult1 = vmulq_lane_f32(V.val[0], r, 1); // Bx + + XM_PREFETCH(pInputVector); + + r = vget_high_f32(row0); + XMVECTOR vResult2 = vmulq_lane_f32(V.val[0], r, 0); // Cx + XMVECTOR vResult3 = vmulq_lane_f32(V.val[0], r, 1); // Dx + + XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); + + r = vget_low_f32(row1); + vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey + vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); + + r = vget_high_f32(row1); + vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy + vResult3 = vmlaq_lane_f32(vResult3, V.val[1], r, 1); // Dx+Hy + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); + + r = vget_low_f32(row2); + vResult0 = vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz + vResult1 = vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); + + r = vget_high_f32(row2); + vResult2 = vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz + vResult3 = vmlaq_lane_f32(vResult3, V.val[2], r, 1); // Dx+Hy+Lz + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); + + r = vget_low_f32(row3); + vResult0 = vmlaq_lane_f32(vResult0, V.val[3], r, 0); // Ax+Ey+Iz+Mw + vResult1 = vmlaq_lane_f32(vResult1, V.val[3], r, 1); // Bx+Fy+Jz+Nw + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 6)); + + r = vget_high_f32(row3); + vResult2 = vmlaq_lane_f32(vResult2, V.val[3], r, 0); // Cx+Gy+Kz+Ow + vResult3 = vmlaq_lane_f32(vResult3, V.val[3], r, 1); // Dx+Hy+Lz+Pw + + XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 7)); + + V.val[0] = vResult0; + V.val[1] = vResult1; + V.val[2] = vResult2; + V.val[3] = vResult3; + + vst4q_f32(reinterpret_cast(pOutputVector), V); + pOutputVector += sizeof(XMFLOAT4) * 4; + + i += 4; + } + } + } + + for (; i < VectorCount; i++) + { + XMVECTOR V = vld1q_f32(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + float32x2_t VL = vget_low_f32(V); + XMVECTOR vResult = vmulq_lane_f32(row0, VL, 0); // X + vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y + float32x2_t VH = vget_high_f32(V); + vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z + vResult = vmlaq_lane_f32(vResult, row3, VH, 1); // W + + vst1q_f32(reinterpret_cast(pOutputVector), vResult); + pOutputVector += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_AVX2_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t two = VectorCount >> 1; + if (two > 0) + { + __m256 row0 = _mm256_broadcast_ps(&M.r[0]); + __m256 row1 = _mm256_broadcast_ps(&M.r[1]); + __m256 row2 = _mm256_broadcast_ps(&M.r[2]); + __m256 row3 = _mm256_broadcast_ps(&M.r[3]); + + if (InputStride == sizeof(XMFLOAT4)) + { + if (OutputStride == sizeof(XMFLOAT4)) + { + if (!(reinterpret_cast(pOutputStream) & 0x1F)) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < two; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT4) * 2; + + __m256 vTempX = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 vTempY = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 vTempZ = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 vTempW = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm256_mul_ps(vTempX, row0); + vTempY = _mm256_mul_ps(vTempY, row1); + vTempZ = _mm256_fmadd_ps(vTempZ, row2, vTempX); + vTempW = _mm256_fmadd_ps(vTempW, row3, vTempY); + vTempX = _mm256_add_ps(vTempZ, vTempW); + + XM256_STREAM_PS(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += sizeof(XMFLOAT4) * 2; + + i += 2; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < two; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT4) * 2; + + __m256 vTempX = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 vTempY = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 vTempZ = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 vTempW = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm256_mul_ps(vTempX, row0); + vTempY = _mm256_mul_ps(vTempY, row1); + vTempZ = _mm256_fmadd_ps(vTempZ, row2, vTempX); + vTempW = _mm256_fmadd_ps(vTempW, row3, vTempY); + vTempX = _mm256_add_ps(vTempZ, vTempW); + + _mm256_storeu_ps(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += sizeof(XMFLOAT4) * 2; + + i += 2; + } + } + } + else + { + // Packed input, unpacked output + for (size_t j = 0; j < two; ++j) + { + __m256 VV = _mm256_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += sizeof(XMFLOAT4) * 2; + + __m256 vTempX = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 vTempY = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); + __m256 vTempZ = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 vTempW = _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm256_mul_ps(vTempX, row0); + vTempY = _mm256_mul_ps(vTempY, row1); + vTempZ = _mm256_fmadd_ps(vTempZ, row2, vTempX); + vTempW = _mm256_fmadd_ps(vTempW, row3, vTempY); + vTempX = _mm256_add_ps(vTempZ, vTempW); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), _mm256_castps256_ps128(vTempX)); + pOutputVector += OutputStride; + + _mm_storeu_ps(reinterpret_cast(pOutputVector), _mm256_extractf128_ps(vTempX, 1)); + pOutputVector += OutputStride; + i += 2; + } + } + } + } + + if (i < VectorCount) + { + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + for (; i < VectorCount; i++) + { + __m128 V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm_mul_ps(vTempX, row0); + vTempY = _mm_mul_ps(vTempY, row1); + vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); + vTempW = XM_FMADD_PS(vTempW, row3, vTempY); + vTempX = _mm_add_ps(vTempZ, vTempW); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += OutputStride; + } + } + + XM_SFENCE(); + + return pOutputStream; +#elif defined(_XM_SSE_INTRINSICS_) + auto pInputVector = reinterpret_cast(pInputStream); + auto pOutputVector = reinterpret_cast(pOutputStream); + + const XMVECTOR row0 = M.r[0]; + const XMVECTOR row1 = M.r[1]; + const XMVECTOR row2 = M.r[2]; + const XMVECTOR row3 = M.r[3]; + + if (!(reinterpret_cast(pOutputStream) & 0xF) && !(OutputStride & 0xF)) + { + if (!(reinterpret_cast(pInputStream) & 0xF) && !(InputStride & 0xF)) + { + // Aligned input, aligned output + for (size_t i = 0; i < VectorCount; i++) + { + __m128 V = _mm_load_ps(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm_mul_ps(vTempX, row0); + vTempY = _mm_mul_ps(vTempY, row1); + vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); + vTempW = XM_FMADD_PS(vTempW, row3, vTempY); + vTempX = _mm_add_ps(vTempZ, vTempW); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += OutputStride; + } + } + else + { + // Unaligned input, aligned output + for (size_t i = 0; i < VectorCount; i++) + { + __m128 V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm_mul_ps(vTempX, row0); + vTempY = _mm_mul_ps(vTempY, row1); + vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); + vTempW = XM_FMADD_PS(vTempW, row3, vTempY); + vTempX = _mm_add_ps(vTempZ, vTempW); + + XM_STREAM_PS(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += OutputStride; + } + } + } + else + { + if (!(reinterpret_cast(pInputStream) & 0xF) && !(InputStride & 0xF)) + { + // Aligned input, unaligned output + for (size_t i = 0; i < VectorCount; i++) + { + __m128 V = _mm_load_ps(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm_mul_ps(vTempX, row0); + vTempY = _mm_mul_ps(vTempY, row1); + vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); + vTempW = XM_FMADD_PS(vTempW, row3, vTempY); + vTempX = _mm_add_ps(vTempZ, vTempW); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += OutputStride; + } + } + else + { + // Unaligned input, unaligned output + for (size_t i = 0; i < VectorCount; i++) + { + __m128 V = _mm_loadu_ps(reinterpret_cast(pInputVector)); + pInputVector += InputStride; + + XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); + XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + + vTempX = _mm_mul_ps(vTempX, row0); + vTempY = _mm_mul_ps(vTempY, row1); + vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); + vTempW = XM_FMADD_PS(vTempW, row3, vTempY); + vTempX = _mm_add_ps(vTempZ, vTempW); + + _mm_storeu_ps(reinterpret_cast(pOutputVector), vTempX); + pOutputVector += OutputStride; + } + } + } + + XM_SFENCE(); + + return pOutputStream; +#endif +} + +/**************************************************************************** + * + * XMVECTOR operators + * + ****************************************************************************/ + +#ifndef _XM_NO_XMVECTOR_OVERLOADS_ + + //------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator+ (FXMVECTOR V) noexcept +{ + return V; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator- (FXMVECTOR V) noexcept +{ + return XMVectorNegate(V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR& XM_CALLCONV operator+= +( + XMVECTOR& V1, + FXMVECTOR V2 +) noexcept +{ + V1 = XMVectorAdd(V1, V2); + return V1; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR& XM_CALLCONV operator-= +( + XMVECTOR& V1, + FXMVECTOR V2 +) noexcept +{ + V1 = XMVectorSubtract(V1, V2); + return V1; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR& XM_CALLCONV operator*= +( + XMVECTOR& V1, + FXMVECTOR V2 +) noexcept +{ + V1 = XMVectorMultiply(V1, V2); + return V1; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR& XM_CALLCONV operator/= +( + XMVECTOR& V1, + FXMVECTOR V2 +) noexcept +{ + V1 = XMVectorDivide(V1, V2); + return V1; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR& operator*= +( + XMVECTOR& V, + const float S +) noexcept +{ + V = XMVectorScale(V, S); + return V; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR& operator/= +( + XMVECTOR& V, + const float S +) noexcept +{ + XMVECTOR vS = XMVectorReplicate(S); + V = XMVectorDivide(V, vS); + return V; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator+ +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + return XMVectorAdd(V1, V2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator- +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + return XMVectorSubtract(V1, V2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator* +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + return XMVectorMultiply(V1, V2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator/ +( + FXMVECTOR V1, + FXMVECTOR V2 +) noexcept +{ + return XMVectorDivide(V1, V2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator* +( + FXMVECTOR V, + const float S +) noexcept +{ + return XMVectorScale(V, S); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator/ +( + FXMVECTOR V, + const float S +) noexcept +{ + XMVECTOR vS = XMVectorReplicate(S); + return XMVectorDivide(V, vS); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XM_CALLCONV operator* +( + float S, + FXMVECTOR V +) noexcept +{ + return XMVectorScale(V, S); +} + +#endif /* !_XM_NO_XMVECTOR_OVERLOADS_ */ + +#if defined(_XM_NO_INTRINSICS_) +#undef XMISNAN +#undef XMISINF +#endif + +#if defined(_XM_SSE_INTRINSICS_) +#undef XM3UNPACK3INTO4 +#undef XM3PACK4INTO3 +#endif + diff --git a/Sdk/External/DirectXMath/Inc/DirectXPackedVector.h b/Sdk/External/DirectXMath/Inc/DirectXPackedVector.h new file mode 100644 index 0000000..f76a05e --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXPackedVector.h @@ -0,0 +1,1216 @@ +//------------------------------------------------------------------------------------- +// DirectXPackedVector.h -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +#include "DirectXMath.h" + +namespace DirectX +{ + + namespace PackedVector + { + +#pragma warning(push) +#pragma warning(disable:4201 4365 4324 4996) + // C4201: nonstandard extension used + // C4365: Off by default noise + // C4324: alignment padding warnings + // C4996: deprecation warnings + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" +#endif + + //------------------------------------------------------------------------------ + // ARGB Color; 8-8-8-8 bit unsigned normalized integer components packed into + // a 32 bit integer. The normalized color is packed into 32 bits using 8 bit + // unsigned, normalized integers for the alpha, red, green, and blue components. + // The alpha component is stored in the most significant bits and the blue + // component in the least significant bits (A8R8G8B8): + // [32] aaaaaaaa rrrrrrrr gggggggg bbbbbbbb [0] + struct XMCOLOR + { + union + { + struct + { + uint8_t b; // Blue: 0/255 to 255/255 + uint8_t g; // Green: 0/255 to 255/255 + uint8_t r; // Red: 0/255 to 255/255 + uint8_t a; // Alpha: 0/255 to 255/255 + }; + uint32_t c; + }; + + XMCOLOR() = default; + + XMCOLOR(const XMCOLOR&) = default; + XMCOLOR& operator=(const XMCOLOR&) = default; + + XMCOLOR(XMCOLOR&&) = default; + XMCOLOR& operator=(XMCOLOR&&) = default; + + constexpr XMCOLOR(uint32_t Color) noexcept : c(Color) {} + XMCOLOR(float _r, float _g, float _b, float _a) noexcept; + explicit XMCOLOR(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return c; } + + XMCOLOR& operator= (const uint32_t Color) noexcept { c = Color; return *this; } + }; + + //------------------------------------------------------------------------------ + // 16 bit floating point number consisting of a sign bit, a 5 bit biased + // exponent, and a 10 bit mantissa + using HALF = uint16_t; + + //------------------------------------------------------------------------------ + // 2D Vector; 16 bit floating point components + struct XMHALF2 + { + union + { + struct + { + HALF x; + HALF y; + }; + uint32_t v; + }; + + XMHALF2() = default; + + XMHALF2(const XMHALF2&) = default; + XMHALF2& operator=(const XMHALF2&) = default; + + XMHALF2(XMHALF2&&) = default; + XMHALF2& operator=(XMHALF2&&) = default; + + explicit constexpr XMHALF2(uint32_t Packed) noexcept : v(Packed) {} + constexpr XMHALF2(HALF _x, HALF _y) noexcept : x(_x), y(_y) {} + explicit XMHALF2(_In_reads_(2) const HALF* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMHALF2(float _x, float _y) noexcept; + explicit XMHALF2(_In_reads_(2) const float* pArray) noexcept; + + XMHALF2& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 2D Vector; 16 bit signed normalized integer components + struct XMSHORTN2 + { + union + { + struct + { + int16_t x; + int16_t y; + }; + uint32_t v; + }; + + XMSHORTN2() = default; + + XMSHORTN2(const XMSHORTN2&) = default; + XMSHORTN2& operator=(const XMSHORTN2&) = default; + + XMSHORTN2(XMSHORTN2&&) = default; + XMSHORTN2& operator=(XMSHORTN2&&) = default; + + explicit constexpr XMSHORTN2(uint32_t Packed) noexcept : v(Packed) {} + constexpr XMSHORTN2(int16_t _x, int16_t _y) noexcept : x(_x), y(_y) {} + explicit XMSHORTN2(_In_reads_(2) const int16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMSHORTN2(float _x, float _y) noexcept; + explicit XMSHORTN2(_In_reads_(2) const float* pArray) noexcept; + + XMSHORTN2& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 2D Vector; 16 bit signed integer components + struct XMSHORT2 + { + union + { + struct + { + int16_t x; + int16_t y; + }; + uint32_t v; + }; + + XMSHORT2() = default; + + XMSHORT2(const XMSHORT2&) = default; + XMSHORT2& operator=(const XMSHORT2&) = default; + + XMSHORT2(XMSHORT2&&) = default; + XMSHORT2& operator=(XMSHORT2&&) = default; + + explicit constexpr XMSHORT2(uint32_t Packed) noexcept : v(Packed) {} + constexpr XMSHORT2(int16_t _x, int16_t _y) noexcept : x(_x), y(_y) {} + explicit XMSHORT2(_In_reads_(2) const int16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMSHORT2(float _x, float _y) noexcept; + explicit XMSHORT2(_In_reads_(2) const float* pArray) noexcept; + + XMSHORT2& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 2D Vector; 16 bit unsigned normalized integer components + struct XMUSHORTN2 + { + union + { + struct + { + uint16_t x; + uint16_t y; + }; + uint32_t v; + }; + + XMUSHORTN2() = default; + + XMUSHORTN2(const XMUSHORTN2&) = default; + XMUSHORTN2& operator=(const XMUSHORTN2&) = default; + + XMUSHORTN2(XMUSHORTN2&&) = default; + XMUSHORTN2& operator=(XMUSHORTN2&&) = default; + + explicit constexpr XMUSHORTN2(uint32_t Packed) noexcept : v(Packed) {} + constexpr XMUSHORTN2(uint16_t _x, uint16_t _y) noexcept : x(_x), y(_y) {} + explicit XMUSHORTN2(_In_reads_(2) const uint16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMUSHORTN2(float _x, float _y) noexcept; + explicit XMUSHORTN2(_In_reads_(2) const float* pArray) noexcept; + + XMUSHORTN2& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 2D Vector; 16 bit unsigned integer components + struct XMUSHORT2 + { + union + { + struct + { + uint16_t x; + uint16_t y; + }; + uint32_t v; + }; + + XMUSHORT2() = default; + + XMUSHORT2(const XMUSHORT2&) = default; + XMUSHORT2& operator=(const XMUSHORT2&) = default; + + XMUSHORT2(XMUSHORT2&&) = default; + XMUSHORT2& operator=(XMUSHORT2&&) = default; + + explicit constexpr XMUSHORT2(uint32_t Packed) noexcept : v(Packed) {} + constexpr XMUSHORT2(uint16_t _x, uint16_t _y) noexcept : x(_x), y(_y) {} + explicit XMUSHORT2(_In_reads_(2) const uint16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMUSHORT2(float _x, float _y) noexcept; + explicit XMUSHORT2(_In_reads_(2) const float* pArray) noexcept; + + XMUSHORT2& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 2D Vector; 8 bit signed normalized integer components + struct XMBYTEN2 + { + union + { + struct + { + int8_t x; + int8_t y; + }; + uint16_t v; + }; + + XMBYTEN2() = default; + + XMBYTEN2(const XMBYTEN2&) = default; + XMBYTEN2& operator=(const XMBYTEN2&) = default; + + XMBYTEN2(XMBYTEN2&&) = default; + XMBYTEN2& operator=(XMBYTEN2&&) = default; + + explicit constexpr XMBYTEN2(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMBYTEN2(int8_t _x, int8_t _y) noexcept : x(_x), y(_y) {} + explicit XMBYTEN2(_In_reads_(2) const int8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMBYTEN2(float _x, float _y) noexcept; + explicit XMBYTEN2(_In_reads_(2) const float* pArray) noexcept; + + XMBYTEN2& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + + // 2D Vector; 8 bit signed integer components + struct XMBYTE2 + { + union + { + struct + { + int8_t x; + int8_t y; + }; + uint16_t v; + }; + + XMBYTE2() = default; + + XMBYTE2(const XMBYTE2&) = default; + XMBYTE2& operator=(const XMBYTE2&) = default; + + XMBYTE2(XMBYTE2&&) = default; + XMBYTE2& operator=(XMBYTE2&&) = default; + + explicit constexpr XMBYTE2(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMBYTE2(int8_t _x, int8_t _y) noexcept : x(_x), y(_y) {} + explicit XMBYTE2(_In_reads_(2) const int8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMBYTE2(float _x, float _y) noexcept; + explicit XMBYTE2(_In_reads_(2) const float* pArray) noexcept; + + XMBYTE2& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + + // 2D Vector; 8 bit unsigned normalized integer components + struct XMUBYTEN2 + { + union + { + struct + { + uint8_t x; + uint8_t y; + }; + uint16_t v; + }; + + XMUBYTEN2() = default; + + XMUBYTEN2(const XMUBYTEN2&) = default; + XMUBYTEN2& operator=(const XMUBYTEN2&) = default; + + XMUBYTEN2(XMUBYTEN2&&) = default; + XMUBYTEN2& operator=(XMUBYTEN2&&) = default; + + explicit constexpr XMUBYTEN2(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMUBYTEN2(uint8_t _x, uint8_t _y) noexcept : x(_x), y(_y) {} + explicit XMUBYTEN2(_In_reads_(2) const uint8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMUBYTEN2(float _x, float _y) noexcept; + explicit XMUBYTEN2(_In_reads_(2) const float* pArray) noexcept; + + XMUBYTEN2& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + + // 2D Vector; 8 bit unsigned integer components + struct XMUBYTE2 + { + union + { + struct + { + uint8_t x; + uint8_t y; + }; + uint16_t v; + }; + + XMUBYTE2() = default; + + XMUBYTE2(const XMUBYTE2&) = default; + XMUBYTE2& operator=(const XMUBYTE2&) = default; + + XMUBYTE2(XMUBYTE2&&) = default; + XMUBYTE2& operator=(XMUBYTE2&&) = default; + + explicit constexpr XMUBYTE2(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMUBYTE2(uint8_t _x, uint8_t _y) noexcept : x(_x), y(_y) {} + explicit XMUBYTE2(_In_reads_(2) const uint8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]) {} + XMUBYTE2(float _x, float _y) noexcept; + explicit XMUBYTE2(_In_reads_(2) const float* pArray) noexcept; + + XMUBYTE2& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 3D vector: 5/6/5 unsigned integer components + struct XMU565 + { + union + { + struct + { + uint16_t x : 5; // 0 to 31 + uint16_t y : 6; // 0 to 63 + uint16_t z : 5; // 0 to 31 + }; + uint16_t v; + }; + + XMU565() = default; + + XMU565(const XMU565&) = default; + XMU565& operator=(const XMU565&) = default; + + XMU565(XMU565&&) = default; + XMU565& operator=(XMU565&&) = default; + + explicit constexpr XMU565(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMU565(uint8_t _x, uint8_t _y, uint8_t _z) noexcept : x(_x), y(_y), z(_z) {} + explicit XMU565(_In_reads_(3) const uint8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} + XMU565(float _x, float _y, float _z) noexcept; + explicit XMU565(_In_reads_(3) const float* pArray) noexcept; + + operator uint16_t () const noexcept { return v; } + + XMU565& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 3D vector: 11/11/10 floating-point components + // The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent + // and 6-bit mantissa for x component, a 5-bit biased exponent and + // 6-bit mantissa for y component, a 5-bit biased exponent and a 5-bit + // mantissa for z. The z component is stored in the most significant bits + // and the x component in the least significant bits. No sign bits so + // all partial-precision numbers are positive. + // (Z10Y11X11): [32] ZZZZZzzz zzzYYYYY yyyyyyXX XXXxxxxx [0] + struct XMFLOAT3PK + { + union + { + struct + { + uint32_t xm : 6; // x-mantissa + uint32_t xe : 5; // x-exponent + uint32_t ym : 6; // y-mantissa + uint32_t ye : 5; // y-exponent + uint32_t zm : 5; // z-mantissa + uint32_t ze : 5; // z-exponent + }; + uint32_t v; + }; + + XMFLOAT3PK() = default; + + XMFLOAT3PK(const XMFLOAT3PK&) = default; + XMFLOAT3PK& operator=(const XMFLOAT3PK&) = default; + + XMFLOAT3PK(XMFLOAT3PK&&) = default; + XMFLOAT3PK& operator=(XMFLOAT3PK&&) = default; + + explicit constexpr XMFLOAT3PK(uint32_t Packed) noexcept : v(Packed) {} + XMFLOAT3PK(float _x, float _y, float _z) noexcept; + explicit XMFLOAT3PK(_In_reads_(3) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMFLOAT3PK& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 3D vector: 9/9/9 floating-point components with shared 5-bit exponent + // The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent + // with 9-bit mantissa for the x, y, and z component. The shared exponent + // is stored in the most significant bits and the x component mantissa is in + // the least significant bits. No sign bits so all partial-precision numbers + // are positive. + // (E5Z9Y9X9): [32] EEEEEzzz zzzzzzyy yyyyyyyx xxxxxxxx [0] + struct XMFLOAT3SE + { + union + { + struct + { + uint32_t xm : 9; // x-mantissa + uint32_t ym : 9; // y-mantissa + uint32_t zm : 9; // z-mantissa + uint32_t e : 5; // shared exponent + }; + uint32_t v; + }; + + XMFLOAT3SE() = default; + + XMFLOAT3SE(const XMFLOAT3SE&) = default; + XMFLOAT3SE& operator=(const XMFLOAT3SE&) = default; + + XMFLOAT3SE(XMFLOAT3SE&&) = default; + XMFLOAT3SE& operator=(XMFLOAT3SE&&) = default; + + explicit constexpr XMFLOAT3SE(uint32_t Packed) noexcept : v(Packed) {} + XMFLOAT3SE(float _x, float _y, float _z) noexcept; + explicit XMFLOAT3SE(_In_reads_(3) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMFLOAT3SE& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 4D Vector; 16 bit floating point components + struct XMHALF4 + { + union + { + struct + { + HALF x; + HALF y; + HALF z; + HALF w; + }; + uint64_t v; + }; + + XMHALF4() = default; + + XMHALF4(const XMHALF4&) = default; + XMHALF4& operator=(const XMHALF4&) = default; + + XMHALF4(XMHALF4&&) = default; + XMHALF4& operator=(XMHALF4&&) = default; + + explicit constexpr XMHALF4(uint64_t Packed) noexcept : v(Packed) {} + constexpr XMHALF4(HALF _x, HALF _y, HALF _z, HALF _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMHALF4(_In_reads_(4) const HALF* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMHALF4(float _x, float _y, float _z, float _w) noexcept; + explicit XMHALF4(_In_reads_(4) const float* pArray) noexcept; + + XMHALF4& operator= (uint64_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 4D Vector; 16 bit signed normalized integer components + struct XMSHORTN4 + { + union + { + struct + { + int16_t x; + int16_t y; + int16_t z; + int16_t w; + }; + uint64_t v; + }; + + XMSHORTN4() = default; + + XMSHORTN4(const XMSHORTN4&) = default; + XMSHORTN4& operator=(const XMSHORTN4&) = default; + + XMSHORTN4(XMSHORTN4&&) = default; + XMSHORTN4& operator=(XMSHORTN4&&) = default; + + explicit constexpr XMSHORTN4(uint64_t Packed) noexcept : v(Packed) {} + constexpr XMSHORTN4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMSHORTN4(_In_reads_(4) const int16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMSHORTN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMSHORTN4(_In_reads_(4) const float* pArray) noexcept; + + XMSHORTN4& operator= (uint64_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 16 bit signed integer components + struct XMSHORT4 + { + union + { + struct + { + int16_t x; + int16_t y; + int16_t z; + int16_t w; + }; + uint64_t v; + }; + + XMSHORT4() = default; + + XMSHORT4(const XMSHORT4&) = default; + XMSHORT4& operator=(const XMSHORT4&) = default; + + XMSHORT4(XMSHORT4&&) = default; + XMSHORT4& operator=(XMSHORT4&&) = default; + + explicit constexpr XMSHORT4(uint64_t Packed) noexcept : v(Packed) {} + constexpr XMSHORT4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMSHORT4(_In_reads_(4) const int16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMSHORT4(float _x, float _y, float _z, float _w) noexcept; + explicit XMSHORT4(_In_reads_(4) const float* pArray) noexcept; + + XMSHORT4& operator= (uint64_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 16 bit unsigned normalized integer components + struct XMUSHORTN4 + { + union + { + struct + { + uint16_t x; + uint16_t y; + uint16_t z; + uint16_t w; + }; + uint64_t v; + }; + + XMUSHORTN4() = default; + + XMUSHORTN4(const XMUSHORTN4&) = default; + XMUSHORTN4& operator=(const XMUSHORTN4&) = default; + + XMUSHORTN4(XMUSHORTN4&&) = default; + XMUSHORTN4& operator=(XMUSHORTN4&&) = default; + + explicit constexpr XMUSHORTN4(uint64_t Packed) noexcept : v(Packed) {} + constexpr XMUSHORTN4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMUSHORTN4(_In_reads_(4) const uint16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMUSHORTN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUSHORTN4(_In_reads_(4) const float* pArray) noexcept; + + XMUSHORTN4& operator= (uint64_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 16 bit unsigned integer components + struct XMUSHORT4 + { + union + { + struct + { + uint16_t x; + uint16_t y; + uint16_t z; + uint16_t w; + }; + uint64_t v; + }; + + XMUSHORT4() = default; + + XMUSHORT4(const XMUSHORT4&) = default; + XMUSHORT4& operator=(const XMUSHORT4&) = default; + + XMUSHORT4(XMUSHORT4&&) = default; + XMUSHORT4& operator=(XMUSHORT4&&) = default; + + explicit constexpr XMUSHORT4(uint64_t Packed) noexcept : v(Packed) {} + constexpr XMUSHORT4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMUSHORT4(_In_reads_(4) const uint16_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMUSHORT4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUSHORT4(_In_reads_(4) const float* pArray) noexcept; + + XMUSHORT4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer + // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, + // normalized integer for the w component and 10 bit signed, normalized + // integers for the z, y, and x components. The w component is stored in the + // most significant bits and the x component in the least significant bits + // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] + struct XMXDECN4 + { + union + { + struct + { + int32_t x : 10; // -511/511 to 511/511 + int32_t y : 10; // -511/511 to 511/511 + int32_t z : 10; // -511/511 to 511/511 + uint32_t w : 2; // 0/3 to 3/3 + }; + uint32_t v; + }; + + XMXDECN4() = default; + + XMXDECN4(const XMXDECN4&) = default; + XMXDECN4& operator=(const XMXDECN4&) = default; + + XMXDECN4(XMXDECN4&&) = default; + XMXDECN4& operator=(XMXDECN4&&) = default; + + explicit constexpr XMXDECN4(uint32_t Packed) : v(Packed) {} + XMXDECN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMXDECN4(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMXDECN4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer + // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned + // integer for the w component and 10 bit signed integers for the + // z, y, and x components. The w component is stored in the + // most significant bits and the x component in the least significant bits + // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] + struct XM_DEPRECATED XMXDEC4 + { + union + { + struct + { + int32_t x : 10; // -511 to 511 + int32_t y : 10; // -511 to 511 + int32_t z : 10; // -511 to 511 + uint32_t w : 2; // 0 to 3 + }; + uint32_t v; + }; + + XMXDEC4() = default; + + XMXDEC4(const XMXDEC4&) = default; + XMXDEC4& operator=(const XMXDEC4&) = default; + + XMXDEC4(XMXDEC4&&) = default; + XMXDEC4& operator=(XMXDEC4&&) = default; + + explicit constexpr XMXDEC4(uint32_t Packed) noexcept : v(Packed) {} + XMXDEC4(float _x, float _y, float _z, float _w) noexcept; + explicit XMXDEC4(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMXDEC4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer + // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit signed, + // normalized integer for the w component and 10 bit signed, normalized + // integers for the z, y, and x components. The w component is stored in the + // most significant bits and the x component in the least significant bits + // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] + struct XM_DEPRECATED XMDECN4 + { + union + { + struct + { + int32_t x : 10; // -511/511 to 511/511 + int32_t y : 10; // -511/511 to 511/511 + int32_t z : 10; // -511/511 to 511/511 + int32_t w : 2; // -1/1 to 1/1 + }; + uint32_t v; + }; + + XMDECN4() = default; + + XMDECN4(const XMDECN4&) = default; + XMDECN4& operator=(const XMDECN4&) = default; + + XMDECN4(XMDECN4&&) = default; + XMDECN4& operator=(XMDECN4&&) = default; + + explicit constexpr XMDECN4(uint32_t Packed) noexcept : v(Packed) {} + XMDECN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMDECN4(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMDECN4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer + // The 4D Vector is packed into 32 bits as follows: a 2 bit signed, + // integer for the w component and 10 bit signed integers for the + // z, y, and x components. The w component is stored in the + // most significant bits and the x component in the least significant bits + // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] + struct XM_DEPRECATED XMDEC4 + { + union + { + struct + { + int32_t x : 10; // -511 to 511 + int32_t y : 10; // -511 to 511 + int32_t z : 10; // -511 to 511 + int32_t w : 2; // -1 to 1 + }; + uint32_t v; + }; + + XMDEC4() = default; + + XMDEC4(const XMDEC4&) = default; + XMDEC4& operator=(const XMDEC4&) = default; + + XMDEC4(XMDEC4&&) = default; + XMDEC4& operator=(XMDEC4&&) = default; + + explicit constexpr XMDEC4(uint32_t Packed) noexcept : v(Packed) {} + XMDEC4(float _x, float _y, float _z, float _w) noexcept; + explicit XMDEC4(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMDEC4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer + // The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, + // normalized integer for the w component and 10 bit unsigned, normalized + // integers for the z, y, and x components. The w component is stored in the + // most significant bits and the x component in the least significant bits + // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] + struct XMUDECN4 + { + union + { + struct + { + uint32_t x : 10; // 0/1023 to 1023/1023 + uint32_t y : 10; // 0/1023 to 1023/1023 + uint32_t z : 10; // 0/1023 to 1023/1023 + uint32_t w : 2; // 0/3 to 3/3 + }; + uint32_t v; + }; + + XMUDECN4() = default; + + XMUDECN4(const XMUDECN4&) = default; + XMUDECN4& operator=(const XMUDECN4&) = default; + + XMUDECN4(XMUDECN4&&) = default; + XMUDECN4& operator=(XMUDECN4&&) = default; + + explicit constexpr XMUDECN4(uint32_t Packed) noexcept : v(Packed) {} + XMUDECN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUDECN4(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMUDECN4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer + // The 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, + // integer for the w component and 10 bit unsigned integers + // for the z, y, and x components. The w component is stored in the + // most significant bits and the x component in the least significant bits + // (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] + struct XMUDEC4 + { + union + { + struct + { + uint32_t x : 10; // 0 to 1023 + uint32_t y : 10; // 0 to 1023 + uint32_t z : 10; // 0 to 1023 + uint32_t w : 2; // 0 to 3 + }; + uint32_t v; + }; + + XMUDEC4() = default; + + XMUDEC4(const XMUDEC4&) = default; + XMUDEC4& operator=(const XMUDEC4&) = default; + + XMUDEC4(XMUDEC4&&) = default; + XMUDEC4& operator=(XMUDEC4&&) = default; + + explicit constexpr XMUDEC4(uint32_t Packed) noexcept : v(Packed) {} + XMUDEC4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUDEC4(_In_reads_(4) const float* pArray) noexcept; + + operator uint32_t () const noexcept { return v; } + + XMUDEC4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 4D Vector; 8 bit signed normalized integer components + struct XMBYTEN4 + { + union + { + struct + { + int8_t x; + int8_t y; + int8_t z; + int8_t w; + }; + uint32_t v; + }; + + XMBYTEN4() = default; + + XMBYTEN4(const XMBYTEN4&) = default; + XMBYTEN4& operator=(const XMBYTEN4&) = default; + + XMBYTEN4(XMBYTEN4&&) = default; + XMBYTEN4& operator=(XMBYTEN4&&) = default; + + constexpr XMBYTEN4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit constexpr XMBYTEN4(uint32_t Packed) noexcept : v(Packed) {} + explicit XMBYTEN4(_In_reads_(4) const int8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMBYTEN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMBYTEN4(_In_reads_(4) const float* pArray) noexcept; + + XMBYTEN4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 8 bit signed integer components + struct XMBYTE4 + { + union + { + struct + { + int8_t x; + int8_t y; + int8_t z; + int8_t w; + }; + uint32_t v; + }; + + XMBYTE4() = default; + + XMBYTE4(const XMBYTE4&) = default; + XMBYTE4& operator=(const XMBYTE4&) = default; + + XMBYTE4(XMBYTE4&&) = default; + XMBYTE4& operator=(XMBYTE4&&) = default; + + constexpr XMBYTE4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit constexpr XMBYTE4(uint32_t Packed) noexcept : v(Packed) {} + explicit XMBYTE4(_In_reads_(4) const int8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMBYTE4(float _x, float _y, float _z, float _w) noexcept; + explicit XMBYTE4(_In_reads_(4) const float* pArray) noexcept; + + XMBYTE4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 8 bit unsigned normalized integer components + struct XMUBYTEN4 + { + union + { + struct + { + uint8_t x; + uint8_t y; + uint8_t z; + uint8_t w; + }; + uint32_t v; + }; + + XMUBYTEN4() = default; + + XMUBYTEN4(const XMUBYTEN4&) = default; + XMUBYTEN4& operator=(const XMUBYTEN4&) = default; + + XMUBYTEN4(XMUBYTEN4&&) = default; + XMUBYTEN4& operator=(XMUBYTEN4&&) = default; + + constexpr XMUBYTEN4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit constexpr XMUBYTEN4(uint32_t Packed) noexcept : v(Packed) {} + explicit XMUBYTEN4(_In_reads_(4) const uint8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMUBYTEN4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUBYTEN4(_In_reads_(4) const float* pArray) noexcept; + + XMUBYTEN4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + // 4D Vector; 8 bit unsigned integer components + struct XMUBYTE4 + { + union + { + struct + { + uint8_t x; + uint8_t y; + uint8_t z; + uint8_t w; + }; + uint32_t v; + }; + + XMUBYTE4() = default; + + XMUBYTE4(const XMUBYTE4&) = default; + XMUBYTE4& operator=(const XMUBYTE4&) = default; + + XMUBYTE4(XMUBYTE4&&) = default; + XMUBYTE4& operator=(XMUBYTE4&&) = default; + + constexpr XMUBYTE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit constexpr XMUBYTE4(uint32_t Packed) noexcept : v(Packed) {} + explicit XMUBYTE4(_In_reads_(4) const uint8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMUBYTE4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUBYTE4(_In_reads_(4) const float* pArray) noexcept; + + XMUBYTE4& operator= (uint32_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 4D vector; 4 bit unsigned integer components + struct XMUNIBBLE4 + { + union + { + struct + { + uint16_t x : 4; // 0 to 15 + uint16_t y : 4; // 0 to 15 + uint16_t z : 4; // 0 to 15 + uint16_t w : 4; // 0 to 15 + }; + uint16_t v; + }; + + XMUNIBBLE4() = default; + + XMUNIBBLE4(const XMUNIBBLE4&) = default; + XMUNIBBLE4& operator=(const XMUNIBBLE4&) = default; + + XMUNIBBLE4(XMUNIBBLE4&&) = default; + XMUNIBBLE4& operator=(XMUNIBBLE4&&) = default; + + explicit constexpr XMUNIBBLE4(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMUNIBBLE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + explicit XMUNIBBLE4(_In_reads_(4) const uint8_t* pArray) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} + XMUNIBBLE4(float _x, float _y, float _z, float _w) noexcept; + explicit XMUNIBBLE4(_In_reads_(4) const float* pArray) noexcept; + + operator uint16_t () const noexcept { return v; } + + XMUNIBBLE4& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + + //------------------------------------------------------------------------------ + // 4D vector: 5/5/5/1 unsigned integer components + struct XMU555 + { + union + { + struct + { + uint16_t x : 5; // 0 to 31 + uint16_t y : 5; // 0 to 31 + uint16_t z : 5; // 0 to 31 + uint16_t w : 1; // 0 or 1 + }; + uint16_t v; + }; + + XMU555() = default; + + XMU555(const XMU555&) = default; + XMU555& operator=(const XMU555&) = default; + + XMU555(XMU555&&) = default; + XMU555& operator=(XMU555&&) = default; + + explicit constexpr XMU555(uint16_t Packed) noexcept : v(Packed) {} + constexpr XMU555(uint8_t _x, uint8_t _y, uint8_t _z, bool _w) noexcept : x(_x), y(_y), z(_z), w(_w ? 0x1 : 0) {} + XMU555(_In_reads_(3) const uint8_t* pArray, _In_ bool _w) noexcept : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(_w ? 0x1 : 0) {} + XMU555(float _x, float _y, float _z, bool _w) noexcept; + XMU555(_In_reads_(3) const float* pArray, _In_ bool _w) noexcept; + + operator uint16_t () const noexcept { return v; } + + XMU555& operator= (uint16_t Packed) noexcept { v = Packed; return *this; } + }; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#pragma warning(pop) + + + /**************************************************************************** + * + * Data conversion operations + * + ****************************************************************************/ + + float XMConvertHalfToFloat(HALF Value) noexcept; + float* XMConvertHalfToFloatStream(_Out_writes_bytes_(sizeof(float) + OutputStride * (HalfCount - 1)) float* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(HALF) + InputStride * (HalfCount - 1)) const HALF* pInputStream, + _In_ size_t InputStride, _In_ size_t HalfCount) noexcept; + HALF XMConvertFloatToHalf(float Value) noexcept; + HALF* XMConvertFloatToHalfStream(_Out_writes_bytes_(sizeof(HALF) + OutputStride * (FloatCount - 1)) HALF* pOutputStream, + _In_ size_t OutputStride, + _In_reads_bytes_(sizeof(float) + InputStride * (FloatCount - 1)) const float* pInputStream, + _In_ size_t InputStride, _In_ size_t FloatCount) noexcept; + + /**************************************************************************** + * + * Load operations + * + ****************************************************************************/ + + XMVECTOR XM_CALLCONV XMLoadColor(_In_ const XMCOLOR* pSource) noexcept; + + XMVECTOR XM_CALLCONV XMLoadHalf2(_In_ const XMHALF2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadShortN2(_In_ const XMSHORTN2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadShort2(_In_ const XMSHORT2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUShortN2(_In_ const XMUSHORTN2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUShort2(_In_ const XMUSHORT2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadByteN2(_In_ const XMBYTEN2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadByte2(_In_ const XMBYTE2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUByteN2(_In_ const XMUBYTEN2* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUByte2(_In_ const XMUBYTE2* pSource) noexcept; + + XMVECTOR XM_CALLCONV XMLoadU565(_In_ const XMU565* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat3PK(_In_ const XMFLOAT3PK* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadFloat3SE(_In_ const XMFLOAT3SE* pSource) noexcept; + + XMVECTOR XM_CALLCONV XMLoadHalf4(_In_ const XMHALF4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadShortN4(_In_ const XMSHORTN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadShort4(_In_ const XMSHORT4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUShortN4(_In_ const XMUSHORTN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUShort4(_In_ const XMUSHORT4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadXDecN4(_In_ const XMXDECN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUDecN4(_In_ const XMUDECN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUDecN4_XR(_In_ const XMUDECN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUDec4(_In_ const XMUDEC4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadByteN4(_In_ const XMBYTEN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadByte4(_In_ const XMBYTE4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUByteN4(_In_ const XMUBYTEN4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUByte4(_In_ const XMUBYTE4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadUNibble4(_In_ const XMUNIBBLE4* pSource) noexcept; + XMVECTOR XM_CALLCONV XMLoadU555(_In_ const XMU555* pSource) noexcept; + +#pragma warning(push) +#pragma warning(disable : 4996) + // C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + + XMVECTOR XM_DEPRECATED XM_CALLCONV XMLoadDecN4(_In_ const XMDECN4* pSource) noexcept; + XMVECTOR XM_DEPRECATED XM_CALLCONV XMLoadDec4(_In_ const XMDEC4* pSource) noexcept; + XMVECTOR XM_DEPRECATED XM_CALLCONV XMLoadXDec4(_In_ const XMXDEC4* pSource) noexcept; + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + + /**************************************************************************** + * + * Store operations + * + ****************************************************************************/ + + void XM_CALLCONV XMStoreColor(_Out_ XMCOLOR* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreHalf2(_Out_ XMHALF2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreShortN2(_Out_ XMSHORTN2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreShort2(_Out_ XMSHORT2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUShortN2(_Out_ XMUSHORTN2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUShort2(_Out_ XMUSHORT2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreByteN2(_Out_ XMBYTEN2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreByte2(_Out_ XMBYTE2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUByteN2(_Out_ XMUBYTEN2* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUByte2(_Out_ XMUBYTE2* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreU565(_Out_ XMU565* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat3PK(_Out_ XMFLOAT3PK* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreFloat3SE(_Out_ XMFLOAT3SE* pDestination, _In_ FXMVECTOR V) noexcept; + + void XM_CALLCONV XMStoreHalf4(_Out_ XMHALF4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreShortN4(_Out_ XMSHORTN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreShort4(_Out_ XMSHORT4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUShortN4(_Out_ XMUSHORTN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUShort4(_Out_ XMUSHORT4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreXDecN4(_Out_ XMXDECN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUDecN4(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUDecN4_XR(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUDec4(_Out_ XMUDEC4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreByteN4(_Out_ XMBYTEN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreByte4(_Out_ XMBYTE4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUByteN4(_Out_ XMUBYTEN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUByte4(_Out_ XMUBYTE4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreUNibble4(_Out_ XMUNIBBLE4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_CALLCONV XMStoreU555(_Out_ XMU555* pDestination, _In_ FXMVECTOR V) noexcept; + +#pragma warning(push) +#pragma warning(disable : 4996) + // C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + + void XM_DEPRECATED XM_CALLCONV XMStoreDecN4(_Out_ XMDECN4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_DEPRECATED XM_CALLCONV XMStoreDec4(_Out_ XMDEC4* pDestination, _In_ FXMVECTOR V) noexcept; + void XM_DEPRECATED XM_CALLCONV XMStoreXDec4(_Out_ XMXDEC4* pDestination, _In_ FXMVECTOR V) noexcept; + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + + /**************************************************************************** + * + * Implementation + * + ****************************************************************************/ + +#pragma warning(push) +#pragma warning(disable:4068 4214 4204 4365 4616 6001 6101) + // C4068/4616: ignore unknown pragmas + // C4214/4204: nonstandard extension used + // C4365: Off by default noise + // C6001/6101: False positives + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") +#pragma prefast(disable : 26495, "Union initialization confuses /analyze") +#endif + +#include "DirectXPackedVector.inl" + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +#pragma warning(pop) + + } // namespace PackedVector + +} // namespace DirectX + diff --git a/Sdk/External/DirectXMath/Inc/DirectXPackedVector.inl b/Sdk/External/DirectXMath/Inc/DirectXPackedVector.inl new file mode 100644 index 0000000..fa1a660 --- /dev/null +++ b/Sdk/External/DirectXMath/Inc/DirectXPackedVector.inl @@ -0,0 +1,4438 @@ +//------------------------------------------------------------------------------------- +// DirectXPackedVector.inl -- SIMD C++ Math library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615560 +//------------------------------------------------------------------------------------- + +#pragma once + +/**************************************************************************** + * + * Data conversion + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline float XMConvertHalfToFloat(HALF Value) noexcept +{ +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + __m128i V1 = _mm_cvtsi32_si128(static_cast(Value)); + __m128 V2 = _mm_cvtph_ps(V1); + return _mm_cvtss_f32(V2); +#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) + uint16x4_t vHalf = vdup_n_u16(Value); + float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); + return vgetq_lane_f32(vFloat, 0); +#else + auto Mantissa = static_cast(Value & 0x03FF); + + uint32_t Exponent = (Value & 0x7C00); + if (Exponent == 0x7C00) // INF/NAN + { + Exponent = 0x8f; + } + else if (Exponent != 0) // The value is normalized + { + Exponent = static_cast((static_cast(Value) >> 10) & 0x1F); + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x0400) == 0); + + Mantissa &= 0x03FF; + } + else // The value is zero + { + Exponent = static_cast(-112); + } + + uint32_t Result = + ((static_cast(Value) & 0x8000) << 16) // Sign + | ((Exponent + 112) << 23) // Exponent + | (Mantissa << 13); // Mantissa + + return reinterpret_cast(&Result)[0]; +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307" ) +#endif + +_Use_decl_annotations_ +inline float* XMConvertHalfToFloatStream +( + float* pOutputStream, + size_t OutputStride, + const HALF* pInputStream, + size_t InputStride, + size_t HalfCount +) noexcept +{ + assert(pOutputStream); + assert(pInputStream); + + assert(InputStride >= sizeof(HALF)); + _Analysis_assume_(InputStride >= sizeof(HALF)); + + assert(OutputStride >= sizeof(float)); + _Analysis_assume_(OutputStride >= sizeof(float)); + +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + auto pHalf = reinterpret_cast(pInputStream); + auto pFloat = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = HalfCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(HALF)) + { + if (OutputStride == sizeof(float)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Packed input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + XM_STREAM_PS(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_storeu_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128i HV = _mm_loadl_epi64(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + __m128 FV = _mm_cvtph_ps(HV); + + _mm_store_ss(reinterpret_cast(pFloat), FV); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); + pFloat += OutputStride; + i += 4; + } + } + } + else if (OutputStride == sizeof(float)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Scattered input, aligned & packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + XM_STREAM_PS(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_storeu_ps(reinterpret_cast(pFloat), FV); + pFloat += OutputStride * 4; + i += 4; + } + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + __m128i HV = _mm_setzero_si128(); + HV = _mm_insert_epi16(HV, H1, 0); + HV = _mm_insert_epi16(HV, H2, 1); + HV = _mm_insert_epi16(HV, H3, 2); + HV = _mm_insert_epi16(HV, H4, 3); + __m128 FV = _mm_cvtph_ps(HV); + + _mm_store_ss(reinterpret_cast(pFloat), FV); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); + pFloat += OutputStride; + *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); + pFloat += OutputStride; + i += 4; + } + } + } + + for (; i < HalfCount; ++i) + { + *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); + pHalf += InputStride; + pFloat += OutputStride; + } + + XM_SFENCE(); + + return pOutputStream; +#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) + auto pHalf = reinterpret_cast(pInputStream); + auto pFloat = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = HalfCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(HALF)) + { + if (OutputStride == sizeof(float)) + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + uint16x4_t vHalf = vld1_u16(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); + + vst1q_f32(reinterpret_cast(pFloat), vFloat); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + uint16x4_t vHalf = vld1_u16(reinterpret_cast(pHalf)); + pHalf += InputStride * 4; + + float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); + + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 0); + pFloat += OutputStride; + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 1); + pFloat += OutputStride; + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 2); + pFloat += OutputStride; + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 3); + pFloat += OutputStride; + i += 4; + } + } + } + else if (OutputStride == sizeof(float)) + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + uint64_t iHalf = uint64_t(H1) | (uint64_t(H2) << 16) | (uint64_t(H3) << 32) | (uint64_t(H4) << 48); + uint16x4_t vHalf = vcreate_u16(iHalf); + + float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); + + vst1q_f32(reinterpret_cast(pFloat), vFloat); + pFloat += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + uint16_t H1 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H2 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H3 = *reinterpret_cast(pHalf); + pHalf += InputStride; + uint16_t H4 = *reinterpret_cast(pHalf); + pHalf += InputStride; + + uint64_t iHalf = uint64_t(H1) | (uint64_t(H2) << 16) | (uint64_t(H3) << 32) | (uint64_t(H4) << 48); + uint16x4_t vHalf = vcreate_u16(iHalf); + + float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); + + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 0); + pFloat += OutputStride; + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 1); + pFloat += OutputStride; + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 2); + pFloat += OutputStride; + vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 3); + pFloat += OutputStride; + i += 4; + } + } + } + + for (; i < HalfCount; ++i) + { + *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); + pHalf += InputStride; + pFloat += OutputStride; + } + + return pOutputStream; +#else + auto pHalf = reinterpret_cast(pInputStream); + auto pFloat = reinterpret_cast(pOutputStream); + + for (size_t i = 0; i < HalfCount; i++) + { + *reinterpret_cast(pFloat) = XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); + pHalf += InputStride; + pFloat += OutputStride; + } + + return pOutputStream; +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline HALF XMConvertFloatToHalf(float Value) noexcept +{ +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + __m128 V1 = _mm_set_ss(Value); + __m128i V2 = _mm_cvtps_ph(V1, _MM_FROUND_TO_NEAREST_INT); + return static_cast(_mm_extract_epi16(V2, 0)); +#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) + float32x4_t vFloat = vdupq_n_f32(Value); + float16x4_t vHalf = vcvt_f16_f32(vFloat); + return vget_lane_u16(vreinterpret_u16_f16(vHalf), 0); +#else + uint32_t Result; + + auto IValue = reinterpret_cast(&Value)[0]; + uint32_t Sign = (IValue & 0x80000000U) >> 16U; + IValue = IValue & 0x7FFFFFFFU; // Hack off the sign + if (IValue >= 0x47800000 /*e+16*/) + { + // The number is too large to be represented as a half. Return infinity or NaN + Result = 0x7C00U | ((IValue > 0x7F800000) ? (0x200 | ((IValue >> 13U) & 0x3FFU)) : 0U); + } + else if (IValue <= 0x33000000U /*e-25*/) + { + Result = 0; + } + else if (IValue < 0x38800000U /*e-14*/) + { + // The number is too small to be represented as a normalized half. + // Convert it to a denormalized value. + uint32_t Shift = 125U - (IValue >> 23U); + IValue = 0x800000U | (IValue & 0x7FFFFFU); + Result = IValue >> (Shift + 1); + uint32_t s = (IValue & ((1U << Shift) - 1)) != 0; + Result += (Result | s) & ((IValue >> Shift) & 1U); + } + else + { + // Rebias the exponent to represent the value as a normalized half. + IValue += 0xC8000000U; + Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U) & 0x7FFFU; + } + return static_cast(Result | Sign); +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline HALF* XMConvertFloatToHalfStream +( + HALF* pOutputStream, + size_t OutputStride, + const float* pInputStream, + size_t InputStride, + size_t FloatCount +) noexcept +{ + assert(pOutputStream); + assert(pInputStream); + + assert(InputStride >= sizeof(float)); + _Analysis_assume_(InputStride >= sizeof(float)); + + assert(OutputStride >= sizeof(HALF)); + _Analysis_assume_(OutputStride >= sizeof(HALF)); + +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + auto pFloat = reinterpret_cast(pInputStream); + auto pHalf = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = FloatCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(float)) + { + if (OutputStride == sizeof(HALF)) + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Aligned and packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_load_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + } + else + { + if ((reinterpret_cast(pFloat) & 0xF) == 0) + { + // Aligned & packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_load_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV = _mm_loadu_ps(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + } + } + else if (OutputStride == sizeof(HALF)) + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); + __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); + FV = _mm_blend_ps(FV, FT, 0xC); + + __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); + + _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + __m128 FV1 = _mm_load_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV2 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV3 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV4 = _mm_broadcast_ss(reinterpret_cast(pFloat)); + pFloat += InputStride; + + __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); + __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); + FV = _mm_blend_ps(FV, FT, 0xC); + + __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); + + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 0)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 1)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 2)); + pHalf += OutputStride; + *reinterpret_cast(pHalf) = static_cast(_mm_extract_epi16(HV, 3)); + pHalf += OutputStride; + i += 4; + } + } + } + + for (; i < FloatCount; ++i) + { + *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); + pFloat += InputStride; + pHalf += OutputStride; + } + + return pOutputStream; +#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) + auto pFloat = reinterpret_cast(pInputStream); + auto pHalf = reinterpret_cast(pOutputStream); + + size_t i = 0; + size_t four = FloatCount >> 2; + if (four > 0) + { + if (InputStride == sizeof(float)) + { + if (OutputStride == sizeof(HALF)) + { + // Packed input, packed output + for (size_t j = 0; j < four; ++j) + { + float32x4_t vFloat = vld1q_f32(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); + + vst1_u16(reinterpret_cast(pHalf), vHalf); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Packed input, scattered output + for (size_t j = 0; j < four; ++j) + { + float32x4_t vFloat = vld1q_f32(reinterpret_cast(pFloat)); + pFloat += InputStride * 4; + + uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); + + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 0); + pHalf += OutputStride; + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 1); + pHalf += OutputStride; + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 2); + pHalf += OutputStride; + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 3); + pHalf += OutputStride; + i += 4; + } + } + } + else if (OutputStride == sizeof(HALF)) + { + // Scattered input, packed output + for (size_t j = 0; j < four; ++j) + { + float32x4_t vFloat = vdupq_n_f32(0); + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 0); + pFloat += InputStride; + + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 1); + pFloat += InputStride; + + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 2); + pFloat += InputStride; + + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 3); + pFloat += InputStride; + + uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); + + vst1_u16(reinterpret_cast(pHalf), vHalf); + pHalf += OutputStride * 4; + i += 4; + } + } + else + { + // Scattered input, scattered output + for (size_t j = 0; j < four; ++j) + { + float32x4_t vFloat = vdupq_n_f32(0); + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 0); + pFloat += InputStride; + + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 1); + pFloat += InputStride; + + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 2); + pFloat += InputStride; + + vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), vFloat, 3); + pFloat += InputStride; + + uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); + + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 0); + pHalf += OutputStride; + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 1); + pHalf += OutputStride; + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 2); + pHalf += OutputStride; + vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 3); + pHalf += OutputStride; + i += 4; + } + } + } + + for (; i < FloatCount; ++i) + { + *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); + pFloat += InputStride; + pHalf += OutputStride; + } + + return pOutputStream; +#else + auto pFloat = reinterpret_cast(pInputStream); + auto pHalf = reinterpret_cast(pOutputStream); + + for (size_t i = 0; i < FloatCount; i++) + { + *reinterpret_cast(pHalf) = XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); + pFloat += InputStride; + pHalf += OutputStride; + } + return pOutputStream; +#endif // !_XM_F16C_INTRINSICS_ +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +/**************************************************************************** + * + * Vector and matrix load operations + * + ****************************************************************************/ + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable:28931, "PREfast noise: Esp:1266") +#endif + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadColor(const XMCOLOR* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + // int32_t -> Float conversions are done in one instruction. + // uint32_t -> Float calls a runtime function. Keep in int32_t + auto iColor = static_cast(pSource->c); + XMVECTORF32 vColor = { { { + static_cast((iColor >> 16) & 0xFF)* (1.0f / 255.0f), + static_cast((iColor >> 8) & 0xFF)* (1.0f / 255.0f), + static_cast(iColor & 0xFF)* (1.0f / 255.0f), + static_cast((iColor >> 24) & 0xFF)* (1.0f / 255.0f) + } } }; + return vColor.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32_t bgra = pSource->c; + uint32_t rgba = (bgra & 0xFF00FF00) | ((bgra >> 16) & 0xFF) | ((bgra << 16) & 0xFF0000); + uint32x2_t vInt8 = vdup_n_u32(rgba); + uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u32(vInt8)); + uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_n_f32(R, 1.0f / 255.0f); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the color in all four entries + __m128i vInt = _mm_set1_epi32(static_cast(pSource->c)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vInt = _mm_and_si128(vInt, g_XMMaskA8R8G8B8); + // a is unsigned! Flip the bit to convert the order to signed + vInt = _mm_xor_si128(vInt, g_XMFlipA8R8G8B8); + // Convert to floating point numbers + XMVECTOR vTemp = _mm_cvtepi32_ps(vInt); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMFixAA8R8G8B8); + // Convert 0-255 to 0.0f-1.0f + return _mm_mul_ps(vTemp, g_XMNormalizeA8R8G8B8); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadHalf2(const XMHALF2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + __m128 V = _mm_load_ss(reinterpret_cast(pSource)); + return _mm_cvtph_ps(_mm_castps_si128(V)); +#else + XMVECTORF32 vResult = { { { + XMConvertHalfToFloat(pSource->x), + XMConvertHalfToFloat(pSource->y), + 0.0f, + 0.0f + } } }; + return vResult.v; +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadShortN2(const XMSHORTN2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + (pSource->x == -32768) ? -1.f : (static_cast(pSource->x)* (1.0f / 32767.0f)), + (pSource->y == -32768) ? -1.f : (static_cast(pSource->y)* (1.0f / 32767.0f)), + 0.0f, + 0.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt16 = vld1_dup_u32(reinterpret_cast(pSource)); + int32x4_t vInt = vmovl_s16(vreinterpret_s16_u32(vInt16)); + vInt = vandq_s32(vInt, g_XMMaskXY); + float32x4_t R = vcvtq_f32_s32(vInt); + R = vmulq_n_f32(R, 1.0f / 32767.0f); + return vmaxq_f32(R, vdupq_n_f32(-1.f)); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the two shorts in all four entries (WORD alignment okay, + // DWORD alignment preferred) + __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); + // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 + vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); + // x needs to be sign extended + vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x - 0x8000 to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16); + // Convert -1.0f - 1.0f + vTemp = _mm_mul_ps(vTemp, g_XMNormalizeX16Y16); + // Clamp result (for case of -32768) + return _mm_max_ps(vTemp, g_XMNegativeOne); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadShort2(const XMSHORT2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + 0.f, + 0.f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt16 = vld1_dup_u32(reinterpret_cast(pSource)); + int32x4_t vInt = vmovl_s16(vreinterpret_s16_u32(vInt16)); + vInt = vandq_s32(vInt, g_XMMaskXY); + return vcvtq_f32_s32(vInt); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the two shorts in all four entries (WORD alignment okay, + // DWORD alignment preferred) + __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); + // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 + vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); + // x needs to be sign extended + vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x - 0x8000 to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16); + // Y is 65536 too large + return _mm_mul_ps(vTemp, g_XMFixupY16); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUShortN2(const XMUSHORTN2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x) / 65535.0f, + static_cast(pSource->y) / 65535.0f, + 0.f, + 0.f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt16 = vld1_dup_u32(reinterpret_cast(pSource)); + uint32x4_t vInt = vmovl_u16(vreinterpret_u16_u32(vInt16)); + vInt = vandq_u32(vInt, g_XMMaskXY); + float32x4_t R = vcvtq_f32_u32(vInt); + R = vmulq_n_f32(R, 1.0f / 65535.0f); + return vmaxq_f32(R, vdupq_n_f32(-1.f)); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 FixupY16 = { { { 1.0f / 65535.0f, 1.0f / (65535.0f * 65536.0f), 0.0f, 0.0f } } }; + static const XMVECTORF32 FixaddY16 = { { { 0, 32768.0f * 65536.0f, 0, 0 } } }; + // Splat the two shorts in all four entries (WORD alignment okay, + // DWORD alignment preferred) + __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); + // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 + vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); + // y needs to be sign flipped + vTemp = _mm_xor_ps(vTemp, g_XMFlipY); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // y + 0x8000 to undo the signed order. + vTemp = _mm_add_ps(vTemp, FixaddY16); + // Y is 65536 times too large + vTemp = _mm_mul_ps(vTemp, FixupY16); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUShort2(const XMUSHORT2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + 0.f, + 0.f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt16 = vld1_dup_u32(reinterpret_cast(pSource)); + uint32x4_t vInt = vmovl_u16(vreinterpret_u16_u32(vInt16)); + vInt = vandq_u32(vInt, g_XMMaskXY); + return vcvtq_f32_u32(vInt); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 FixaddY16 = { { { 0, 32768.0f, 0, 0 } } }; + // Splat the two shorts in all four entries (WORD alignment okay, + // DWORD alignment preferred) + __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); + // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 + vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); + // y needs to be sign flipped + vTemp = _mm_xor_ps(vTemp, g_XMFlipY); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // Y is 65536 times too large + vTemp = _mm_mul_ps(vTemp, g_XMFixupY16); + // y + 0x8000 to undo the signed order. + vTemp = _mm_add_ps(vTemp, FixaddY16); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadByteN2(const XMBYTEN2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + (pSource->x == -128) ? -1.f : (static_cast(pSource->x)* (1.0f / 127.0f)), + (pSource->y == -128) ? -1.f : (static_cast(pSource->y)* (1.0f / 127.0f)), + 0.0f, + 0.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); + int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u16(vInt8)); + int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); + vInt = vandq_s32(vInt, g_XMMaskXY); + float32x4_t R = vcvtq_f32_s32(vInt); + R = vmulq_n_f32(R, 1.0f / 127.0f); + return vmaxq_f32(R, vdupq_n_f32(-1.f)); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f / 127.0f, 1.0f / (127.0f * 256.0f), 0, 0 } } }; + static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask + vTemp = _mm_and_ps(vTemp, Mask); + // x,y and z are unsigned! Flip the bits to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x, y and z - 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddByte4); + // Fix y, z and w because they are too large + vTemp = _mm_mul_ps(vTemp, Scale); + // Clamp result (for case of -128) + return _mm_max_ps(vTemp, g_XMNegativeOne); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadByte2(const XMBYTE2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + 0.0f, + 0.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); + int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u16(vInt8)); + int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); + vInt = vandq_s32(vInt, g_XMMaskXY); + return vcvtq_f32_s32(vInt); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f) } } }; + static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask + vTemp = _mm_and_ps(vTemp, Mask); + // x,y and z are unsigned! Flip the bits to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x, y and z - 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddByte4); + // Fix y, z and w because they are too large + return _mm_mul_ps(vTemp, Scale); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUByteN2(const XMUBYTEN2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x)* (1.0f / 255.0f), + static_cast(pSource->y)* (1.0f / 255.0f), + 0.0f, + 0.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); + uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u16(vInt8)); + uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); + vInt = vandq_u32(vInt, g_XMMaskXY); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_n_f32(R, 1.0f / 255.0f); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f / 255.0f, 1.0f / (255.0f * 256.0f), 0, 0 } } }; + static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask + vTemp = _mm_and_ps(vTemp, Mask); + // w is signed! Flip the bits to convert the order to unsigned + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // w + 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Fix y, z and w because they are too large + return _mm_mul_ps(vTemp, Scale); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUByte2(const XMUBYTE2* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + 0.0f, + 0.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); + uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u16(vInt8)); + uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); + vInt = vandq_u32(vInt, g_XMMaskXY); + return vcvtq_f32_u32(vInt); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f, 1.0f / 256.0f, 0, 0 } } }; + static const XMVECTORU32 Mask = { { { 0xFF, 0xFF00, 0, 0 } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask + vTemp = _mm_and_ps(vTemp, Mask); + // w is signed! Flip the bits to convert the order to unsigned + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // w + 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Fix y, z and w because they are too large + return _mm_mul_ps(vTemp, Scale); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadU565(const XMU565* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + float(pSource->v & 0x1F), + float((pSource->v >> 5) & 0x3F), + float((pSource->v >> 11) & 0x1F), + 0.f, + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORI32 U565And = { { { 0x1F, 0x3F << 5, 0x1F << 11, 0 } } }; + static const XMVECTORF32 U565Mul = { { { 1.0f, 1.0f / 32.0f, 1.0f / 2048.f, 0 } } }; + uint16x4_t vInt16 = vld1_dup_u16(reinterpret_cast(pSource)); + uint32x4_t vInt = vmovl_u16(vInt16); + vInt = vandq_u32(vInt, U565And); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_f32(R, U565Mul); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORI32 U565And = { { { 0x1F, 0x3F << 5, 0x1F << 11, 0 } } }; + static const XMVECTORF32 U565Mul = { { { 1.0f, 1.0f / 32.0f, 1.0f / 2048.f, 0 } } }; + // Get the 32 bit value and splat it + XMVECTOR vResult = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Mask off x, y and z + vResult = _mm_and_ps(vResult, U565And); + // Convert to float + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Normalize x, y, and z + vResult = _mm_mul_ps(vResult, U565Mul); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat3PK(const XMFLOAT3PK* pSource) noexcept +{ + assert(pSource); + + XM_ALIGNED_DATA(16) uint32_t Result[4]; + uint32_t Mantissa; + uint32_t Exponent; + + // X Channel (6-bit mantissa) + Mantissa = pSource->xm; + + if (pSource->xe == 0x1f) // INF or NAN + { + Result[0] = static_cast(0x7f800000 | (static_cast(pSource->xm) << 17)); + } + else + { + if (pSource->xe != 0) // The value is normalized + { + Exponent = pSource->xe; + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x40) == 0); + + Mantissa &= 0x3F; + } + else // The value is zero + { + Exponent = static_cast(-112); + } + + Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17); + } + + // Y Channel (6-bit mantissa) + Mantissa = pSource->ym; + + if (pSource->ye == 0x1f) // INF or NAN + { + Result[1] = static_cast(0x7f800000 | (static_cast(pSource->ym) << 17)); + } + else + { + if (pSource->ye != 0) // The value is normalized + { + Exponent = pSource->ye; + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x40) == 0); + + Mantissa &= 0x3F; + } + else // The value is zero + { + Exponent = static_cast(-112); + } + + Result[1] = ((Exponent + 112) << 23) | (Mantissa << 17); + } + + // Z Channel (5-bit mantissa) + Mantissa = pSource->zm; + + if (pSource->ze == 0x1f) // INF or NAN + { + Result[2] = static_cast(0x7f800000 | (static_cast(pSource->zm) << 17)); + } + else + { + if (pSource->ze != 0) // The value is normalized + { + Exponent = pSource->ze; + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x20) == 0); + + Mantissa &= 0x1F; + } + else // The value is zero + { + Exponent = static_cast(-112); + } + + Result[2] = ((Exponent + 112) << 23) | (Mantissa << 18); + } + + return XMLoadFloat3A(reinterpret_cast(&Result)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadFloat3SE(const XMFLOAT3SE* pSource) noexcept +{ + assert(pSource); + + union { float f; int32_t i; } fi; + fi.i = 0x33800000 + (pSource->e << 23); + float Scale = fi.f; + + XMVECTORF32 v = { { { + Scale * float(pSource->xm), + Scale * float(pSource->ym), + Scale * float(pSource->zm), + 1.0f } } }; + return v; +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadHalf4(const XMHALF4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + __m128i V = _mm_loadl_epi64(reinterpret_cast(pSource)); + return _mm_cvtph_ps(V); +#else + XMVECTORF32 vResult = { { { + XMConvertHalfToFloat(pSource->x), + XMConvertHalfToFloat(pSource->y), + XMConvertHalfToFloat(pSource->z), + XMConvertHalfToFloat(pSource->w) + } } }; + return vResult.v; +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadShortN4(const XMSHORTN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + (pSource->x == -32768) ? -1.f : (static_cast(pSource->x)* (1.0f / 32767.0f)), + (pSource->y == -32768) ? -1.f : (static_cast(pSource->y)* (1.0f / 32767.0f)), + (pSource->z == -32768) ? -1.f : (static_cast(pSource->z)* (1.0f / 32767.0f)), + (pSource->w == -32768) ? -1.f : (static_cast(pSource->w)* (1.0f / 32767.0f)) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int16x4_t vInt = vld1_s16(reinterpret_cast(pSource)); + int32x4_t V = vmovl_s16(vInt); + float32x4_t vResult = vcvtq_f32_s32(V); + vResult = vmulq_n_f32(vResult, 1.0f / 32767.0f); + return vmaxq_f32(vResult, vdupq_n_f32(-1.f)); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the color in all four entries (x,z,y,w) + __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); + // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 + __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); + // x and z are unsigned! Flip the bits to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16Z16W16); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x and z - 0x8000 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16Z16W16); + // Convert to -1.0f - 1.0f + vTemp = _mm_mul_ps(vTemp, g_XMNormalizeX16Y16Z16W16); + // Very important! The entries are x,z,y,w, flip it to x,y,z,w + vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); + // Clamp result (for case of -32768) + return _mm_max_ps(vTemp, g_XMNegativeOne); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadShort4(const XMSHORT4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + static_cast(pSource->z), + static_cast(pSource->w) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + int16x4_t vInt = vld1_s16(reinterpret_cast(pSource)); + int32x4_t V = vmovl_s16(vInt); + return vcvtq_f32_s32(V); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the color in all four entries (x,z,y,w) + __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); + // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 + __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); + // x and z are unsigned! Flip the bits to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16Z16W16); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x and z - 0x8000 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16Z16W16); + // Fix y and w because they are 65536 too large + vTemp = _mm_mul_ps(vTemp, g_XMFixupY16W16); + // Very important! The entries are x,z,y,w, flip it to x,y,z,w + return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUShortN4(const XMUSHORTN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x) / 65535.0f, + static_cast(pSource->y) / 65535.0f, + static_cast(pSource->z) / 65535.0f, + static_cast(pSource->w) / 65535.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint16x4_t vInt = vld1_u16(reinterpret_cast(pSource)); + uint32x4_t V = vmovl_u16(vInt); + float32x4_t vResult = vcvtq_f32_u32(V); + return vmulq_n_f32(vResult, 1.0f / 65535.0f); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 FixupY16W16 = { { { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / (65535.0f * 65536.0f), 1.0f / (65535.0f * 65536.0f) } } }; + static const XMVECTORF32 FixaddY16W16 = { { { 0, 0, 32768.0f * 65536.0f, 32768.0f * 65536.0f } } }; + // Splat the color in all four entries (x,z,y,w) + __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); + // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 + __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); + // y and w are signed! Flip the bits to convert the order to unsigned + vTemp = _mm_xor_ps(vTemp, g_XMFlipZW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // y and w + 0x8000 to complete the conversion + vTemp = _mm_add_ps(vTemp, FixaddY16W16); + // Fix y and w because they are 65536 too large + vTemp = _mm_mul_ps(vTemp, FixupY16W16); + // Very important! The entries are x,z,y,w, flip it to x,y,z,w + return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUShort4(const XMUSHORT4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + static_cast(pSource->z), + static_cast(pSource->w) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint16x4_t vInt = vld1_u16(reinterpret_cast(pSource)); + uint32x4_t V = vmovl_u16(vInt); + return vcvtq_f32_u32(V); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 FixaddY16W16 = { { { 0, 0, 32768.0f, 32768.0f } } }; + // Splat the color in all four entries (x,z,y,w) + __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); + // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 + __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); + // y and w are signed! Flip the bits to convert the order to unsigned + vTemp = _mm_xor_ps(vTemp, g_XMFlipZW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // Fix y and w because they are 65536 too large + vTemp = _mm_mul_ps(vTemp, g_XMFixupY16W16); + // y and w + 0x8000 to complete the conversion + vTemp = _mm_add_ps(vTemp, FixaddY16W16); + // Very important! The entries are x,z,y,w, flip it to x,y,z,w + return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadXDecN4(const XMXDECN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + static const uint32_t SignExtend[] = { 0x00000000, 0xFFFFFC00 }; + + uint32_t ElementX = pSource->v & 0x3FF; + uint32_t ElementY = (pSource->v >> 10) & 0x3FF; + uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { { { + (ElementX == 0x200) ? -1.f : (static_cast(static_cast(ElementX | SignExtend[ElementX >> 9])) / 511.0f), + (ElementY == 0x200) ? -1.f : (static_cast(static_cast(ElementY | SignExtend[ElementY >> 9])) / 511.0f), + (ElementZ == 0x200) ? -1.f : (static_cast(static_cast(ElementZ | SignExtend[ElementZ >> 9])) / 511.0f), + static_cast(pSource->v >> 30) / 3.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskA2B10G10R10); + vInt = veorq_u32(vInt, g_XMFlipA2B10G10R10); + float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); + R = vaddq_f32(R, g_XMFixAA2B10G10R10); + R = vmulq_f32(R, g_XMNormalizeA2B10G10R10); + return vmaxq_f32(R, vdupq_n_f32(-1.0f)); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the color in all four entries + __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskA2B10G10R10); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMFlipA2B10G10R10); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMFixAA2B10G10R10); + // Convert 0-255 to 0.0f-1.0f + vTemp = _mm_mul_ps(vTemp, g_XMNormalizeA2B10G10R10); + // Clamp result (for case of -512) + return _mm_max_ps(vTemp, g_XMNegativeOne); +#endif +} + +//------------------------------------------------------------------------------ +#pragma warning(push) +#pragma warning(disable : 4996) +// C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadXDec4(const XMXDEC4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + static const uint32_t SignExtend[] = { 0x00000000, 0xFFFFFC00 }; + + uint32_t ElementX = pSource->v & 0x3FF; + uint32_t ElementY = (pSource->v >> 10) & 0x3FF; + uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { { { + static_cast(static_cast(ElementX | SignExtend[ElementX >> 9])), + static_cast(static_cast(ElementY | SignExtend[ElementY >> 9])), + static_cast(static_cast(ElementZ | SignExtend[ElementZ >> 9])), + static_cast(pSource->v >> 30) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORU32 XDec4Xor = { { { 0x200, 0x200 << 10, 0x200 << 20, 0x80000000 } } }; + static const XMVECTORF32 XDec4Add = { { { -512.0f, -512.0f * 1024.0f, -512.0f * 1024.0f * 1024.0f, 32768 * 65536.0f } } }; + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskDec4); + vInt = veorq_u32(vInt, XDec4Xor); + float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); + R = vaddq_f32(R, XDec4Add); + return vmulq_f32(R, g_XMMulDec4); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORU32 XDec4Xor = { { { 0x200, 0x200 << 10, 0x200 << 20, 0x80000000 } } }; + static const XMVECTORF32 XDec4Add = { { { -512.0f, -512.0f * 1024.0f, -512.0f * 1024.0f * 1024.0f, 32768 * 65536.0f } } }; + // Splat the color in all four entries + XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, XDec4Xor); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, XDec4Add); + // Convert 0-255 to 0.0f-1.0f + vTemp = _mm_mul_ps(vTemp, g_XMMulDec4); + return vTemp; +#endif +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUDecN4(const XMUDECN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + uint32_t ElementX = pSource->v & 0x3FF; + uint32_t ElementY = (pSource->v >> 10) & 0x3FF; + uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { { { + static_cast(ElementX) / 1023.0f, + static_cast(ElementY) / 1023.0f, + static_cast(ElementZ) / 1023.0f, + static_cast(pSource->v >> 30) / 3.0f + } } }; + return vResult.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 UDecN4Mul = { { { 1.0f / 1023.0f, 1.0f / (1023.0f * 1024.0f), 1.0f / (1023.0f * 1024.0f * 1024.0f), 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f) } } }; + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskDec4); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_f32(R, UDecN4Mul); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 UDecN4Mul = { { { 1.0f / 1023.0f, 1.0f / (1023.0f * 1024.0f), 1.0f / (1023.0f * 1024.0f * 1024.0f), 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f) } } }; + // Splat the color in all four entries + XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Convert 0-255 to 0.0f-1.0f + vTemp = _mm_mul_ps(vTemp, UDecN4Mul); + return vTemp; +#endif +} + + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUDecN4_XR(const XMUDECN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + + int32_t ElementX = pSource->v & 0x3FF; + int32_t ElementY = (pSource->v >> 10) & 0x3FF; + int32_t ElementZ = (pSource->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { { { + static_cast(ElementX - 0x180) / 510.0f, + static_cast(ElementY - 0x180) / 510.0f, + static_cast(ElementZ - 0x180) / 510.0f, + static_cast(pSource->v >> 30) / 3.0f + } } }; + + return vResult.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 XRMul = { { { 1.0f / 510.0f, 1.0f / (510.0f * 1024.0f), 1.0f / (510.0f * 1024.0f * 1024.0f), 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f) } } }; + static const XMVECTORI32 XRBias = { { { 0x180, 0x180 * 1024, 0x180 * 1024 * 1024, 0 } } }; + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskDec4); + int32x4_t vTemp = vsubq_s32(vreinterpretq_s32_u32(vInt), XRBias); + vTemp = veorq_s32(vTemp, g_XMFlipW); + float32x4_t R = vcvtq_f32_s32(vTemp); + R = vaddq_f32(R, g_XMAddUDec4); + return vmulq_f32(R, XRMul); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 XRMul = { { { 1.0f / 510.0f, 1.0f / (510.0f * 1024.0f), 1.0f / (510.0f * 1024.0f * 1024.0f), 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f) } } }; + static const XMVECTORI32 XRBias = { { { 0x180, 0x180 * 1024, 0x180 * 1024 * 1024, 0 } } }; + // Splat the color in all four entries + XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Mask channels + vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); + // Subtract bias + vTemp = _mm_castsi128_ps(_mm_sub_epi32(_mm_castps_si128(vTemp), XRBias)); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Convert to 0.0f-1.0f + return _mm_mul_ps(vTemp, XRMul); +#endif +} + + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUDec4(const XMUDEC4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + uint32_t ElementX = pSource->v & 0x3FF; + uint32_t ElementY = (pSource->v >> 10) & 0x3FF; + uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { { { + static_cast(ElementX), + static_cast(ElementY), + static_cast(ElementZ), + static_cast(pSource->v >> 30) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskDec4); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_f32(R, g_XMMulDec4); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the color in all four entries + XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Convert 0-255 to 0.0f-1.0f + vTemp = _mm_mul_ps(vTemp, g_XMMulDec4); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +#pragma warning(push) +#pragma warning(disable : 4996) +// C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadDecN4(const XMDECN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + static const uint32_t SignExtend[] = { 0x00000000, 0xFFFFFC00 }; + static const uint32_t SignExtendW[] = { 0x00000000, 0xFFFFFFFC }; + + uint32_t ElementX = pSource->v & 0x3FF; + uint32_t ElementY = (pSource->v >> 10) & 0x3FF; + uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; + uint32_t ElementW = pSource->v >> 30; + + XMVECTORF32 vResult = { { { + (ElementX == 0x200) ? -1.f : (static_cast(static_cast(ElementX | SignExtend[ElementX >> 9])) / 511.0f), + (ElementY == 0x200) ? -1.f : (static_cast(static_cast(ElementY | SignExtend[ElementY >> 9])) / 511.0f), + (ElementZ == 0x200) ? -1.f : (static_cast(static_cast(ElementZ | SignExtend[ElementZ >> 9])) / 511.0f), + (ElementW == 0x2) ? -1.f : static_cast(static_cast(ElementW | SignExtendW[(ElementW >> 1) & 1])) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 DecN4Mul = { { { 1.0f / 511.0f, 1.0f / (511.0f * 1024.0f), 1.0f / (511.0f * 1024.0f * 1024.0f), 1.0f / (1024.0f * 1024.0f * 1024.0f) } } }; + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskDec4); + vInt = veorq_u32(vInt, g_XMXorDec4); + float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); + R = vaddq_f32(R, g_XMAddDec4); + R = vmulq_f32(R, DecN4Mul); + return vmaxq_f32(R, vdupq_n_f32(-1.0f)); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 DecN4Mul = { { { 1.0f / 511.0f, 1.0f / (511.0f * 1024.0f), 1.0f / (511.0f * 1024.0f * 1024.0f), 1.0f / (1024.0f * 1024.0f * 1024.0f) } } }; + // Splat the color in all four entries + XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMXorDec4); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMAddDec4); + // Convert 0-255 to 0.0f-1.0f + vTemp = _mm_mul_ps(vTemp, DecN4Mul); + // Clamp result (for case of -512/-1) + return _mm_max_ps(vTemp, g_XMNegativeOne); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadDec4(const XMDEC4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + static const uint32_t SignExtend[] = { 0x00000000, 0xFFFFFC00 }; + static const uint32_t SignExtendW[] = { 0x00000000, 0xFFFFFFFC }; + + uint32_t ElementX = pSource->v & 0x3FF; + uint32_t ElementY = (pSource->v >> 10) & 0x3FF; + uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; + uint32_t ElementW = pSource->v >> 30; + + XMVECTORF32 vResult = { { { + static_cast(static_cast(ElementX | SignExtend[ElementX >> 9])), + static_cast(static_cast(ElementY | SignExtend[ElementY >> 9])), + static_cast(static_cast(ElementZ | SignExtend[ElementZ >> 9])), + static_cast(static_cast(ElementW | SignExtendW[ElementW >> 1])) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); + vInt = vandq_u32(vInt, g_XMMaskDec4); + vInt = veorq_u32(vInt, g_XMXorDec4); + float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); + R = vaddq_f32(R, g_XMAddDec4); + return vmulq_f32(R, g_XMMulDec4); +#elif defined(_XM_SSE_INTRINSICS_) + // Splat the color in all four entries + XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); + // a is unsigned! Flip the bit to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMXorDec4); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // RGB + 0, A + 0x80000000.f to undo the signed order. + vTemp = _mm_add_ps(vTemp, g_XMAddDec4); + // Convert 0-255 to 0.0f-1.0f + vTemp = _mm_mul_ps(vTemp, g_XMMulDec4); + return vTemp; +#endif +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUByteN4(const XMUBYTEN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x) / 255.0f, + static_cast(pSource->y) / 255.0f, + static_cast(pSource->z) / 255.0f, + static_cast(pSource->w) / 255.0f + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); + uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u32(vInt8)); + uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_n_f32(R, 1.0f / 255.0f); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 LoadUByteN4Mul = { { { 1.0f / 255.0f, 1.0f / (255.0f * 256.0f), 1.0f / (255.0f * 65536.0f), 1.0f / (255.0f * 65536.0f * 256.0f) } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); + // w is signed! Flip the bits to convert the order to unsigned + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // w + 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Fix y, z and w because they are too large + vTemp = _mm_mul_ps(vTemp, LoadUByteN4Mul); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUByte4(const XMUBYTE4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + static_cast(pSource->z), + static_cast(pSource->w) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); + uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u32(vInt8)); + uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); + return vcvtq_f32_u32(vInt); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 LoadUByte4Mul = { { { 1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f) } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); + // w is signed! Flip the bits to convert the order to unsigned + vTemp = _mm_xor_ps(vTemp, g_XMFlipW); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // w + 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); + // Fix y, z and w because they are too large + vTemp = _mm_mul_ps(vTemp, LoadUByte4Mul); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadByteN4(const XMBYTEN4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + (pSource->x == -128) ? -1.f : (static_cast(pSource->x) / 127.0f), + (pSource->y == -128) ? -1.f : (static_cast(pSource->y) / 127.0f), + (pSource->z == -128) ? -1.f : (static_cast(pSource->z) / 127.0f), + (pSource->w == -128) ? -1.f : (static_cast(pSource->w) / 127.0f) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); + int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u32(vInt8)); + int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); + float32x4_t R = vcvtq_f32_s32(vInt); + R = vmulq_n_f32(R, 1.0f / 127.0f); + return vmaxq_f32(R, vdupq_n_f32(-1.f)); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 LoadByteN4Mul = { { { 1.0f / 127.0f, 1.0f / (127.0f * 256.0f), 1.0f / (127.0f * 65536.0f), 1.0f / (127.0f * 65536.0f * 256.0f) } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); + // x,y and z are unsigned! Flip the bits to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x, y and z - 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddByte4); + // Fix y, z and w because they are too large + vTemp = _mm_mul_ps(vTemp, LoadByteN4Mul); + // Clamp result (for case of -128) + return _mm_max_ps(vTemp, g_XMNegativeOne); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadByte4(const XMBYTE4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + static_cast(pSource->x), + static_cast(pSource->y), + static_cast(pSource->z), + static_cast(pSource->w) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); + int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u32(vInt8)); + int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); + return vcvtq_f32_s32(vInt); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 LoadByte4Mul = { { { 1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f) } } }; + // Splat the color in all four entries (x,z,y,w) + XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); + // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 + vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); + // x,y and z are unsigned! Flip the bits to convert the order to signed + vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); + // Convert to floating point numbers + vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); + // x, y and z - 0x80 to complete the conversion + vTemp = _mm_add_ps(vTemp, g_XMAddByte4); + // Fix y, z and w because they are too large + vTemp = _mm_mul_ps(vTemp, LoadByte4Mul); + return vTemp; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadUNibble4(const XMUNIBBLE4* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + float(pSource->v & 0xF), + float((pSource->v >> 4) & 0xF), + float((pSource->v >> 8) & 0xF), + float((pSource->v >> 12) & 0xF) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORI32 UNibble4And = { { { 0xF, 0xF0, 0xF00, 0xF000 } } }; + static const XMVECTORF32 UNibble4Mul = { { { 1.0f, 1.0f / 16.f, 1.0f / 256.f, 1.0f / 4096.f } } }; + uint16x4_t vInt16 = vld1_dup_u16(reinterpret_cast(pSource)); + uint32x4_t vInt = vmovl_u16(vInt16); + vInt = vandq_u32(vInt, UNibble4And); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_f32(R, UNibble4Mul); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORI32 UNibble4And = { { { 0xF, 0xF0, 0xF00, 0xF000 } } }; + static const XMVECTORF32 UNibble4Mul = { { { 1.0f, 1.0f / 16.f, 1.0f / 256.f, 1.0f / 4096.f } } }; + // Get the 32 bit value and splat it + XMVECTOR vResult = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Mask off x, y and z + vResult = _mm_and_ps(vResult, UNibble4And); + // Convert to float + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Normalize x, y, and z + vResult = _mm_mul_ps(vResult, UNibble4Mul); + return vResult; +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMVECTOR XM_CALLCONV XMLoadU555(const XMU555* pSource) noexcept +{ + assert(pSource); +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { { { + float(pSource->v & 0x1F), + float((pSource->v >> 5) & 0x1F), + float((pSource->v >> 10) & 0x1F), + float((pSource->v >> 15) & 0x1) + } } }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORI32 U555And = { { { 0x1F, 0x1F << 5, 0x1F << 10, 0x8000 } } }; + static const XMVECTORF32 U555Mul = { { { 1.0f, 1.0f / 32.f, 1.0f / 1024.f, 1.0f / 32768.f } } }; + uint16x4_t vInt16 = vld1_dup_u16(reinterpret_cast(pSource)); + uint32x4_t vInt = vmovl_u16(vInt16); + vInt = vandq_u32(vInt, U555And); + float32x4_t R = vcvtq_f32_u32(vInt); + return vmulq_f32(R, U555Mul); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORI32 U555And = { { { 0x1F, 0x1F << 5, 0x1F << 10, 0x8000 } } }; + static const XMVECTORF32 U555Mul = { { { 1.0f, 1.0f / 32.f, 1.0f / 1024.f, 1.0f / 32768.f } } }; + // Get the 32 bit value and splat it + XMVECTOR vResult = _mm_load_ps1(reinterpret_cast(&pSource->v)); + // Mask off x, y and z + vResult = _mm_and_ps(vResult, U555And); + // Convert to float + vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); + // Normalize x, y, and z + vResult = _mm_mul_ps(vResult, U555Mul); + return vResult; +#endif +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + +/**************************************************************************** + * + * Vector and matrix store operations + * + ****************************************************************************/ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreColor +( + XMCOLOR* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorSaturate(V); + N = XMVectorMultiply(N, g_UByteMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->c = (static_cast(tmp.w) << 24) | + (static_cast(tmp.x) << 16) | + (static_cast(tmp.y) << 8) | + static_cast(tmp.z); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 255.0f); + R = XMVectorRound(R); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); + uint32_t rgba = vget_lane_u32(vreinterpret_u32_u8(vInt8), 0); + pDestination->c = (rgba & 0xFF00FF00) | ((rgba >> 16) & 0xFF) | ((rgba << 16) & 0xFF0000); +#elif defined(_XM_SSE_INTRINSICS_) + // Set <0 to 0 + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + // Set>1 to 1 + vResult = _mm_min_ps(vResult, g_XMOne); + // Convert to 0-255 + vResult = _mm_mul_ps(vResult, g_UByteMax); + // Shuffle RGBA to ARGB + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); + // Convert to int + __m128i vInt = _mm_cvtps_epi32(vResult); + // Mash to shorts + vInt = _mm_packs_epi32(vInt, vInt); + // Mash to bytes + vInt = _mm_packus_epi16(vInt, vInt); + // Store the color + _mm_store_ss(reinterpret_cast(&pDestination->c), _mm_castsi128_ps(vInt)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreHalf2 +( + XMHALF2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + __m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT); + _mm_store_ss(reinterpret_cast(pDestination), _mm_castsi128_ps(V1)); +#else + pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V)); + pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V)); +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreShortN2 +( + XMSHORTN2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); + N = XMVectorMultiply(N, g_ShortMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-1.f)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 32767.0f); + int32x4_t vInt32 = vcvtq_s32_f32(R); + int16x4_t vInt16 = vqmovn_s32(vInt32); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_s16(vInt16), 0); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); + vResult = _mm_min_ps(vResult, g_XMOne); + vResult = _mm_mul_ps(vResult, g_ShortMax); + __m128i vResulti = _mm_cvtps_epi32(vResult); + vResulti = _mm_packs_epi32(vResulti, vResulti); + _mm_store_ss(reinterpret_cast(&pDestination->x), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreShort2 +( + XMSHORT2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_ShortMin, g_ShortMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-32767.f)); + R = vminq_f32(R, vdupq_n_f32(32767.0f)); + int32x4_t vInt32 = vcvtq_s32_f32(R); + int16x4_t vInt16 = vqmovn_s32(vInt32); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_s16(vInt16), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_ShortMin); + vResult = _mm_min_ps(vResult, g_ShortMax); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // Pack the ints into shorts + vInt = _mm_packs_epi32(vInt, vInt); + _mm_store_ss(reinterpret_cast(&pDestination->x), _mm_castsi128_ps(vInt)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUShortN2 +( + XMUSHORTN2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorSaturate(V); + N = XMVectorMultiplyAdd(N, g_UShortMax, g_XMOneHalf.v); + N = XMVectorTruncate(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 65535.0f); + R = vaddq_f32(R, g_XMOneHalf); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_u16(vInt16), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_XMOne); + vResult = _mm_mul_ps(vResult, g_UShortMax); + vResult = _mm_add_ps(vResult, g_XMOneHalf); + // Convert to int + __m128i vInt = _mm_cvttps_epi32(vResult); + // Since the SSE pack instruction clamps using signed rules, + // manually extract the values to store them to memory + pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); + pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUShort2 +( + XMUSHORT2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UShortMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); + R = vminq_f32(R, vdupq_n_f32(65535.0f)); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_u16(vInt16), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_UShortMax); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // Since the SSE pack instruction clamps using signed rules, + // manually extract the values to store them to memory + pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); + pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreByteN2 +( + XMBYTEN2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); + N = XMVectorMultiply(N, g_ByteMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-1.f)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 127.0f); + int32x4_t vInt32 = vcvtq_s32_f32(R); + int16x4_t vInt16 = vqmovn_s32(vInt32); + int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); + vst1_lane_u16(reinterpret_cast(pDestination), vreinterpret_u16_s8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, g_ByteMax); + // Convert to int by rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + pDestination->v = static_cast(((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreByte2 +( + XMBYTE2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_ByteMin, g_ByteMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-127.f)); + R = vminq_f32(R, vdupq_n_f32(127.0f)); + int32x4_t vInt32 = vcvtq_s32_f32(R); + int16x4_t vInt16 = vqmovn_s32(vInt32); + int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); + vst1_lane_u16(reinterpret_cast(pDestination), vreinterpret_u16_s8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_ByteMin); + vResult = _mm_min_ps(vResult, g_ByteMax); + // Convert to int by rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + pDestination->v = static_cast(((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUByteN2 +( + XMUBYTEN2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorSaturate(V); + N = XMVectorMultiplyAdd(N, g_UByteMax, g_XMOneHalf.v); + N = XMVectorTruncate(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 255.0f); + R = vaddq_f32(R, g_XMOneHalf); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); + vst1_lane_u16(reinterpret_cast(pDestination), vreinterpret_u16_u8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, g_UByteMax); + vResult = _mm_add_ps(vResult, g_XMOneHalf); + // Convert to int + __m128i vInt = _mm_cvttps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + pDestination->v = static_cast(((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUByte2 +( + XMUBYTE2* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UByteMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); + R = vminq_f32(R, vdupq_n_f32(255.0f)); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); + vst1_lane_u16(reinterpret_cast(pDestination), vreinterpret_u16_u8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_UByteMax); + // Convert to int by rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + pDestination->v = static_cast(((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreU565 +( + XMU565* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 Max = { { { 31.0f, 63.0f, 31.0f, 0.0f } } }; + +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + ((static_cast(tmp.z) & 0x1F) << 11) + | ((static_cast(tmp.y) & 0x3F) << 5) + | ((static_cast(tmp.x) & 0x1F))); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f, 32.f, 32.f * 64.f, 0.f } } }; + static const XMVECTORU32 Mask = { { { 0x1F, 0x3F << 5, 0x1F << 11, 0 } } }; + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); + vResult = vminq_f32(vResult, Max); + vResult = vmulq_f32(vResult, Scale); + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + vResulti = vandq_u32(vResulti, Mask); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vResulti); + uint32x2_t vhi = vget_high_u32(vResulti); + vTemp = vorr_u32(vTemp, vhi); + vTemp = vpadd_u32(vTemp, vTemp); + vst1_lane_u16(&pDestination->v, vreinterpret_u16_u32(vTemp), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, Max); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + auto z = static_cast(_mm_extract_epi16(vInt, 4)); + pDestination->v = static_cast( + ((static_cast(z) & 0x1F) << 11) + | ((static_cast(y) & 0x3F) << 5) + | ((static_cast(x) & 0x1F))); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3PK +( + XMFLOAT3PK* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + + XM_ALIGNED_DATA(16) uint32_t IValue[4]; + XMStoreFloat3A(reinterpret_cast(&IValue), V); + + uint32_t Result[3]; + + // X & Y Channels (5-bit exponent, 6-bit mantissa) + for (uint32_t j = 0; j < 2; ++j) + { + uint32_t Sign = IValue[j] & 0x80000000; + uint32_t I = IValue[j] & 0x7FFFFFFF; + + if ((I & 0x7F800000) == 0x7F800000) + { + // INF or NAN + Result[j] = 0x7C0U; + if ((I & 0x7FFFFF) != 0) + { + Result[j] = 0x7FFU; + } + else if (Sign) + { + // -INF is clamped to 0 since 3PK is positive only + Result[j] = 0; + } + } + else if (Sign || I < 0x35800000) + { + // 3PK is positive only, so clamp to zero + Result[j] = 0; + } + else if (I > 0x477E0000U) + { + // The number is too large to be represented as a float11, set to max + Result[j] = 0x7BFU; + } + else + { + if (I < 0x38800000U) + { + // The number is too small to be represented as a normalized float11 + // Convert it to a denormalized value. + uint32_t Shift = 113U - (I >> 23U); + I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; + } + else + { + // Rebias the exponent to represent the value as a normalized float11 + I += 0xC8000000U; + } + + Result[j] = ((I + 0xFFFFU + ((I >> 17U) & 1U)) >> 17U) & 0x7ffU; + } + } + + // Z Channel (5-bit exponent, 5-bit mantissa) + uint32_t Sign = IValue[2] & 0x80000000; + uint32_t I = IValue[2] & 0x7FFFFFFF; + + if ((I & 0x7F800000) == 0x7F800000) + { + // INF or NAN + Result[2] = 0x3E0U; + if (I & 0x7FFFFF) + { + Result[2] = 0x3FFU; + } + else if (Sign || I < 0x36000000) + { + // -INF is clamped to 0 since 3PK is positive only + Result[2] = 0; + } + } + else if (Sign) + { + // 3PK is positive only, so clamp to zero + Result[2] = 0; + } + else if (I > 0x477C0000U) + { + // The number is too large to be represented as a float10, set to max + Result[2] = 0x3DFU; + } + else + { + if (I < 0x38800000U) + { + // The number is too small to be represented as a normalized float10 + // Convert it to a denormalized value. + uint32_t Shift = 113U - (I >> 23U); + I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; + } + else + { + // Rebias the exponent to represent the value as a normalized float10 + I += 0xC8000000U; + } + + Result[2] = ((I + 0x1FFFFU + ((I >> 18U) & 1U)) >> 18U) & 0x3ffU; + } + + // Pack Result into memory + pDestination->v = (Result[0] & 0x7ff) + | ((Result[1] & 0x7ff) << 11) + | ((Result[2] & 0x3ff) << 22); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreFloat3SE +( + XMFLOAT3SE* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + + XMFLOAT3A tmp; + XMStoreFloat3A(&tmp, V); + + static constexpr float maxf9 = float(0x1FF << 7); + static constexpr float minf9 = float(1.f / (1 << 16)); + + float x = (tmp.x >= 0.f) ? ((tmp.x > maxf9) ? maxf9 : tmp.x) : 0.f; + float y = (tmp.y >= 0.f) ? ((tmp.y > maxf9) ? maxf9 : tmp.y) : 0.f; + float z = (tmp.z >= 0.f) ? ((tmp.z > maxf9) ? maxf9 : tmp.z) : 0.f; + + const float max_xy = (x > y) ? x : y; + const float max_xyz = (max_xy > z) ? max_xy : z; + + const float maxColor = (max_xyz > minf9) ? max_xyz : minf9; + + union { float f; int32_t i; } fi; + fi.f = maxColor; + fi.i += 0x00004000; // round up leaving 9 bits in fraction (including assumed 1) + + auto exp = static_cast(fi.i) >> 23; + pDestination->e = exp - 0x6f; + + fi.i = static_cast(0x83000000 - (exp << 23)); + float ScaleR = fi.f; + + pDestination->xm = static_cast(Internal::round_to_nearest(x * ScaleR)); + pDestination->ym = static_cast(Internal::round_to_nearest(y * ScaleR)); + pDestination->zm = static_cast(Internal::round_to_nearest(z * ScaleR)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreHalf4 +( + XMHALF4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) + __m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT); + _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), V1); +#else + XMFLOAT4A t; + XMStoreFloat4A(&t, V); + + pDestination->x = XMConvertFloatToHalf(t.x); + pDestination->y = XMConvertFloatToHalf(t.y); + pDestination->z = XMConvertFloatToHalf(t.z); + pDestination->w = XMConvertFloatToHalf(t.w); +#endif // !_XM_F16C_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreShortN4 +( + XMSHORTN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); + N = XMVectorMultiply(N, g_ShortMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(-1.f)); + vResult = vminq_f32(vResult, vdupq_n_f32(1.0f)); + vResult = vmulq_n_f32(vResult, 32767.0f); + int16x4_t vInt = vmovn_s32(vcvtq_s32_f32(vResult)); + vst1_s16(reinterpret_cast(pDestination), vInt); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); + vResult = _mm_min_ps(vResult, g_XMOne); + vResult = _mm_mul_ps(vResult, g_ShortMax); + __m128i vResulti = _mm_cvtps_epi32(vResult); + vResulti = _mm_packs_epi32(vResulti, vResulti); + _mm_store_sd(reinterpret_cast(&pDestination->x), _mm_castsi128_pd(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreShort4 +( + XMSHORT4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_ShortMin, g_ShortMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmaxq_f32(V, g_ShortMin); + vResult = vminq_f32(vResult, g_ShortMax); + int16x4_t vInt = vmovn_s32(vcvtq_s32_f32(vResult)); + vst1_s16(reinterpret_cast(pDestination), vInt); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_ShortMin); + vResult = _mm_min_ps(vResult, g_ShortMax); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // Pack the ints into shorts + vInt = _mm_packs_epi32(vInt, vInt); + _mm_store_sd(reinterpret_cast(&pDestination->x), _mm_castsi128_pd(vInt)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUShortN4 +( + XMUSHORTN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorSaturate(V); + N = XMVectorMultiplyAdd(N, g_UShortMax, g_XMOneHalf.v); + N = XMVectorTruncate(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); + vResult = vminq_f32(vResult, vdupq_n_f32(1.0f)); + vResult = vmulq_n_f32(vResult, 65535.0f); + vResult = vaddq_f32(vResult, g_XMOneHalf); + uint16x4_t vInt = vmovn_u32(vcvtq_u32_f32(vResult)); + vst1_u16(reinterpret_cast(pDestination), vInt); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_XMOne); + vResult = _mm_mul_ps(vResult, g_UShortMax); + vResult = _mm_add_ps(vResult, g_XMOneHalf); + // Convert to int + __m128i vInt = _mm_cvttps_epi32(vResult); + // Since the SSE pack instruction clamps using signed rules, + // manually extract the values to store them to memory + pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); + pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); + pDestination->z = static_cast(_mm_extract_epi16(vInt, 4)); + pDestination->w = static_cast(_mm_extract_epi16(vInt, 6)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUShort4 +( + XMUSHORT4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UShortMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); + vResult = vminq_f32(vResult, g_UShortMax); + uint16x4_t vInt = vmovn_u32(vcvtq_u32_f32(vResult)); + vst1_u16(reinterpret_cast(pDestination), vInt); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_UShortMax); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // Since the SSE pack instruction clamps using signed rules, + // manually extract the values to store them to memory + pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); + pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); + pDestination->z = static_cast(_mm_extract_epi16(vInt, 4)); + pDestination->w = static_cast(_mm_extract_epi16(vInt, 6)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreXDecN4 +( + XMXDECN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 Min = { { { -1.0f, -1.0f, -1.0f, 0.0f } } }; + +#if defined(_XM_NO_INTRINSICS_) + + static const XMVECTORF32 Scale = { { { 511.0f, 511.0f, 511.0f, 3.0f } } }; + + XMVECTOR N = XMVectorClamp(V, Min.v, g_XMOne.v); + N = XMVectorMultiply(N, Scale.v); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | (static_cast(tmp.x) & 0x3FF)); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 511.0f, 511.0f * 1024.0f, 511.0f * 1048576.0f, 3.0f * 536870912.0f } } }; + static const XMVECTORI32 ScaleMask = { { { 0x3FF, 0x3FF << 10, 0x3FF << 20, 0x3 << 29 } } }; + float32x4_t vResult = vmaxq_f32(V, Min); + vResult = vminq_f32(vResult, vdupq_n_f32(1.0f)); + vResult = vmulq_f32(vResult, Scale); + int32x4_t vResulti = vcvtq_s32_f32(vResult); + vResulti = vandq_s32(vResulti, ScaleMask); + int32x4_t vResultw = vandq_s32(vResulti, g_XMMaskW); + vResulti = vaddq_s32(vResulti, vResultw); + // Do a horizontal or of all 4 entries + uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); + uint32x2_t vhi = vget_high_u32(vreinterpretq_u32_s32(vResulti)); + vTemp = vorr_u32(vTemp, vhi); + vTemp = vpadd_u32(vTemp, vTemp); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 511.0f, 511.0f * 1024.0f, 511.0f * 1048576.0f, 3.0f * 536870912.0f } } }; + static const XMVECTORI32 ScaleMask = { { { 0x3FF, 0x3FF << 10, 0x3FF << 20, 0x3 << 29 } } }; + XMVECTOR vResult = _mm_max_ps(V, Min); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, Scale); + // Convert to int (W is unsigned) + __m128i vResulti = _mm_cvtps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, ScaleMask); + // To fix W, add itself to shift it up to <<30 instead of <<29 + __m128i vResultw = _mm_and_si128(vResulti, g_XMMaskW); + vResulti = _mm_add_epi32(vResulti, vResultw); + // Do a horizontal or of all 4 entries + vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vResulti), _MM_SHUFFLE(0, 3, 2, 1)); + vResulti = _mm_or_si128(vResulti, _mm_castps_si128(vResult)); + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 3, 2, 1)); + vResulti = _mm_or_si128(vResulti, _mm_castps_si128(vResult)); + vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 3, 2, 1)); + vResulti = _mm_or_si128(vResulti, _mm_castps_si128(vResult)); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +#pragma warning(push) +#pragma warning(disable : 4996) +// C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreXDec4 +( + XMXDEC4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 MinXDec4 = { { { -511.0f, -511.0f, -511.0f, 0.0f } } }; + static const XMVECTORF32 MaxXDec4 = { { { 511.0f, 511.0f, 511.0f, 3.0f } } }; + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, MinXDec4, MaxXDec4); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | ((static_cast(tmp.x) & 0x3FF))); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ScaleXDec4 = { { { 1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f / 2.0f } } }; + static const XMVECTORI32 MaskXDec4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + float32x4_t vResult = vmaxq_f32(V, MinXDec4); + vResult = vminq_f32(vResult, MaxXDec4); + vResult = vmulq_f32(vResult, ScaleXDec4); + int32x4_t vResulti = vcvtq_s32_f32(vResult); + vResulti = vandq_s32(vResulti, MaskXDec4); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); + uint32x2_t vTemp2 = vget_high_u32(vreinterpretq_u32_s32(vResulti)); + vTemp = vorr_u32(vTemp, vTemp2); + // Perform a single bit left shift on y|w + vTemp2 = vdup_lane_u32(vTemp, 1); + vTemp2 = vadd_u32(vTemp2, vTemp2); + vTemp = vorr_u32(vTemp, vTemp2); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleXDec4 = { { { 1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f / 2.0f } } }; + static const XMVECTORI32 MaskXDec4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, MinXDec4); + vResult = _mm_min_ps(vResult, MaxXDec4); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleXDec4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskXDec4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // Perform a single bit left shift on y|w + vResulti2 = _mm_add_epi32(vResulti2, vResulti2); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUDecN4 +( + XMUDECN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + static const XMVECTORF32 Scale = { { { 1023.0f, 1023.0f, 1023.0f, 3.0f } } }; + + XMVECTOR N = XMVectorSaturate(V); + N = XMVectorMultiply(N, Scale.v); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | ((static_cast(tmp.x) & 0x3FF))); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ScaleUDecN4 = { { { 1023.0f, 1023.0f * 1024.0f * 0.5f, 1023.0f * 1024.0f * 1024.0f, 3.0f * 1024.0f * 1024.0f * 1024.0f * 0.5f } } }; + static const XMVECTORI32 MaskUDecN4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0.f)); + vResult = vminq_f32(vResult, vdupq_n_f32(1.f)); + vResult = vmulq_f32(vResult, ScaleUDecN4); + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + vResulti = vandq_u32(vResulti, MaskUDecN4); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vResulti); + uint32x2_t vTemp2 = vget_high_u32(vResulti); + vTemp = vorr_u32(vTemp, vTemp2); + // Perform a single bit left shift on y|w + vTemp2 = vdup_lane_u32(vTemp, 1); + vTemp2 = vadd_u32(vTemp2, vTemp2); + vTemp = vorr_u32(vTemp, vTemp2); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleUDecN4 = { { { 1023.0f, 1023.0f * 1024.0f * 0.5f, 1023.0f * 1024.0f * 1024.0f, 3.0f * 1024.0f * 1024.0f * 1024.0f * 0.5f } } }; + static const XMVECTORI32 MaskUDecN4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleUDecN4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskUDecN4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // Perform a left shift by one bit on y|w + vResulti2 = _mm_add_epi32(vResulti2, vResulti2); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUDecN4_XR +( + XMUDECN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 Scale = { { { 510.0f, 510.0f, 510.0f, 3.0f } } }; + static const XMVECTORF32 Bias = { { { 384.0f, 384.0f, 384.0f, 0.0f } } }; + static const XMVECTORF32 C = { { { 1023.f, 1023.f, 1023.f, 3.f } } }; + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorMultiplyAdd(V, Scale, Bias); + N = XMVectorClamp(N, g_XMZero, C); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | ((static_cast(tmp.x) & 0x3FF))); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Shift = { { { 1.0f, 1024.0f * 0.5f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f * 0.5f } } }; + static const XMVECTORU32 MaskUDecN4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + float32x4_t vResult = vmlaq_f32(Bias, V, Scale); + vResult = vmaxq_f32(vResult, vdupq_n_f32(0.f)); + vResult = vminq_f32(vResult, C); + vResult = vmulq_f32(vResult, Shift); + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + vResulti = vandq_u32(vResulti, MaskUDecN4); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vResulti); + uint32x2_t vTemp2 = vget_high_u32(vResulti); + vTemp = vorr_u32(vTemp, vTemp2); + // Perform a single bit left shift on y|w + vTemp2 = vdup_lane_u32(vTemp, 1); + vTemp2 = vadd_u32(vTemp2, vTemp2); + vTemp = vorr_u32(vTemp, vTemp2); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 Shift = { { { 1.0f, 1024.0f * 0.5f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f * 0.5f } } }; + static const XMVECTORU32 MaskUDecN4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + // Scale & bias + XMVECTOR vResult = XM_FMADD_PS(V, Scale, Bias); + // Clamp to bounds + vResult = _mm_max_ps(vResult, g_XMZero); + vResult = _mm_min_ps(vResult, C); + // Scale by shift values + vResult = _mm_mul_ps(vResult, Shift); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskUDecN4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // Perform a left shift by one bit on y|w + vResulti2 = _mm_add_epi32(vResulti2, vResulti2); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUDec4 +( + XMUDEC4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 MaxUDec4 = { { { 1023.0f, 1023.0f, 1023.0f, 3.0f } } }; + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), MaxUDec4); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | ((static_cast(tmp.x) & 0x3FF))); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ScaleUDec4 = { { { 1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f / 2.0f } } }; + static const XMVECTORI32 MaskUDec4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0.f)); + vResult = vminq_f32(vResult, MaxUDec4); + vResult = vmulq_f32(vResult, ScaleUDec4); + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + vResulti = vandq_u32(vResulti, MaskUDec4); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vResulti); + uint32x2_t vTemp2 = vget_high_u32(vResulti); + vTemp = vorr_u32(vTemp, vTemp2); + // Perform a single bit left shift on y|w + vTemp2 = vdup_lane_u32(vTemp, 1); + vTemp2 = vadd_u32(vTemp2, vTemp2); + vTemp = vorr_u32(vTemp, vTemp2); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleUDec4 = { { { 1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f / 2.0f } } }; + static const XMVECTORI32 MaskUDec4 = { { { 0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, MaxUDec4); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleUDec4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskUDec4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // Perform a left shift by one bit on y|w + vResulti2 = _mm_add_epi32(vResulti2, vResulti2); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +#pragma warning(push) +#pragma warning(disable : 4996) +// C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreDecN4 +( + XMDECN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + static const XMVECTORF32 Scale = { { { 511.0f, 511.0f, 511.0f, 1.0f } } }; + + XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); + N = XMVectorMultiply(N, Scale.v); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | ((static_cast(tmp.x) & 0x3FF))); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ScaleDecN4 = { { { 511.0f, 511.0f * 1024.0f, 511.0f * 1024.0f * 1024.0f, 1.0f * 1024.0f * 1024.0f * 1024.0f } } }; + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(-1.f)); + vResult = vminq_f32(vResult, vdupq_n_f32(1.f)); + vResult = vmulq_f32(vResult, ScaleDecN4); + int32x4_t vResulti = vcvtq_s32_f32(vResult); + vResulti = vandq_s32(vResulti, g_XMMaskDec4); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); + uint32x2_t vhi = vget_high_u32(vreinterpretq_u32_s32(vResulti)); + vTemp = vorr_u32(vTemp, vhi); + vTemp = vpadd_u32(vTemp, vTemp); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleDecN4 = { { { 511.0f, 511.0f * 1024.0f, 511.0f * 1024.0f * 1024.0f, 1.0f * 1024.0f * 1024.0f * 1024.0f } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleDecN4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, g_XMMaskDec4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreDec4 +( + XMDEC4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 MinDec4 = { { { -511.0f, -511.0f, -511.0f, -1.0f } } }; + static const XMVECTORF32 MaxDec4 = { { { 511.0f, 511.0f, 511.0f, 1.0f } } }; + +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, MinDec4, MaxDec4); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + (static_cast(tmp.w) << 30) + | ((static_cast(tmp.z) & 0x3FF) << 20) + | ((static_cast(tmp.y) & 0x3FF) << 10) + | ((static_cast(tmp.x) & 0x3FF))); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ScaleDec4 = { { { 1.0f, 1024.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f } } }; + float32x4_t vResult = vmaxq_f32(V, MinDec4); + vResult = vminq_f32(vResult, MaxDec4); + vResult = vmulq_f32(vResult, ScaleDec4); + int32x4_t vResulti = vcvtq_s32_f32(vResult); + vResulti = vandq_s32(vResulti, g_XMMaskDec4); + // Do a horizontal or of all 4 entries + uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); + uint32x2_t vhi = vget_high_u32(vreinterpretq_u32_s32(vResulti)); + vTemp = vorr_u32(vTemp, vhi); + vTemp = vpadd_u32(vTemp, vTemp); + vst1_lane_u32(&pDestination->v, vTemp, 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleDec4 = { { { 1.0f, 1024.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, MinDec4); + vResult = _mm_min_ps(vResult, MaxDec4); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleDec4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, g_XMMaskDec4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUByteN4 +( + XMUBYTEN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorSaturate(V); + N = XMVectorMultiply(N, g_UByteMax); + N = XMVectorTruncate(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 255.0f); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_u8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleUByteN4 = { { { 255.0f, 255.0f * 256.0f * 0.5f, 255.0f * 256.0f * 256.0f, 255.0f * 256.0f * 256.0f * 256.0f * 0.5f } } }; + static const XMVECTORI32 MaskUByteN4 = { { { 0xFF, 0xFF << (8 - 1), 0xFF << 16, 0xFF << (24 - 1) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleUByteN4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskUByteN4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // Perform a single bit left shift to fix y|w + vResulti2 = _mm_add_epi32(vResulti2, vResulti2); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUByte4 +( + XMUBYTE4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UByteMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0)); + R = vminq_f32(R, vdupq_n_f32(255.0f)); + uint32x4_t vInt32 = vcvtq_u32_f32(R); + uint16x4_t vInt16 = vqmovn_u32(vInt32); + uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_u8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleUByte4 = { { { 1.0f, 256.0f * 0.5f, 256.0f * 256.0f, 256.0f * 256.0f * 256.0f * 0.5f } } }; + static const XMVECTORI32 MaskUByte4 = { { { 0xFF, 0xFF << (8 - 1), 0xFF << 16, 0xFF << (24 - 1) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, g_UByteMax); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleUByte4); + // Convert to int by rounding + __m128i vResulti = _mm_cvtps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskUByte4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // Perform a single bit left shift to fix y|w + vResulti2 = _mm_add_epi32(vResulti2, vResulti2); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreByteN4 +( + XMBYTEN4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); + N = XMVectorMultiply(N, g_ByteMax); + N = XMVectorTruncate(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-1.f)); + R = vminq_f32(R, vdupq_n_f32(1.0f)); + R = vmulq_n_f32(R, 127.0f); + int32x4_t vInt32 = vcvtq_s32_f32(R); + int16x4_t vInt16 = vqmovn_s32(vInt32); + int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_s8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleByteN4 = { { { 127.0f, 127.0f * 256.0f, 127.0f * 256.0f * 256.0f, 127.0f * 256.0f * 256.0f * 256.0f } } }; + static const XMVECTORI32 MaskByteN4 = { { { 0xFF, 0xFF << 8, 0xFF << 16, static_cast(0xFF000000) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); + vResult = _mm_min_ps(vResult, g_XMOne); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleByteN4); + // Convert to int + __m128i vResulti = _mm_cvttps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskByteN4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreByte4 +( + XMBYTE4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, g_ByteMin, g_ByteMax); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->x = static_cast(tmp.x); + pDestination->y = static_cast(tmp.y); + pDestination->z = static_cast(tmp.z); + pDestination->w = static_cast(tmp.w); + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-127.f)); + R = vminq_f32(R, vdupq_n_f32(127.f)); + int32x4_t vInt32 = vcvtq_s32_f32(R); + int16x4_t vInt16 = vqmovn_s32(vInt32); + int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); + vst1_lane_u32(&pDestination->v, vreinterpret_u32_s8(vInt8), 0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ScaleByte4 = { { { 1.0f, 256.0f, 256.0f * 256.0f, 256.0f * 256.0f * 256.0f } } }; + static const XMVECTORI32 MaskByte4 = { { { 0xFF, 0xFF << 8, 0xFF << 16, static_cast(0xFF000000) } } }; + // Clamp to bounds + XMVECTOR vResult = _mm_max_ps(V, g_ByteMin); + vResult = _mm_min_ps(vResult, g_ByteMax); + // Scale by multiplication + vResult = _mm_mul_ps(vResult, ScaleByte4); + // Convert to int by rounding + __m128i vResulti = _mm_cvtps_epi32(vResult); + // Mask off any fraction + vResulti = _mm_and_si128(vResulti, MaskByte4); + // Do a horizontal or of 4 entries + __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); + // x = x|z, y = y|w + vResulti = _mm_or_si128(vResulti, vResulti2); + // Move Z to the x position + vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); + // i = x|y|z|w + vResulti = _mm_or_si128(vResulti, vResulti2); + _mm_store_ss(reinterpret_cast(&pDestination->v), _mm_castsi128_ps(vResulti)); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreUNibble4 +( + XMUNIBBLE4* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 Max = { { { 15.0f, 15.0f, 15.0f, 15.0f } } }; +#if defined(_XM_NO_INTRINSICS_) + + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + ((static_cast(tmp.w) & 0xF) << 12) + | ((static_cast(tmp.z) & 0xF) << 8) + | ((static_cast(tmp.y) & 0xF) << 4) + | (static_cast(tmp.x) & 0xF)); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f, 16.f, 16.f * 16.f, 16.f * 16.f * 16.f } } }; + static const XMVECTORU32 Mask = { { { 0xF, 0xF << 4, 0xF << 8, 0xF << 12 } } }; + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); + vResult = vminq_f32(vResult, Max); + vResult = vmulq_f32(vResult, Scale); + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + vResulti = vandq_u32(vResulti, Mask); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vResulti); + uint32x2_t vhi = vget_high_u32(vResulti); + vTemp = vorr_u32(vTemp, vhi); + vTemp = vpadd_u32(vTemp, vTemp); + vst1_lane_u16(&pDestination->v, vreinterpret_u16_u32(vTemp), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, Max); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + auto z = static_cast(_mm_extract_epi16(vInt, 4)); + auto w = static_cast(_mm_extract_epi16(vInt, 6)); + pDestination->v = static_cast( + ((static_cast(w) & 0xF) << 12) + | ((static_cast(z) & 0xF) << 8) + | ((static_cast(y) & 0xF) << 4) + | ((static_cast(x) & 0xF))); +#endif +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XM_CALLCONV XMStoreU555 +( + XMU555* pDestination, + FXMVECTOR V +) noexcept +{ + assert(pDestination); + static const XMVECTORF32 Max = { { { 31.0f, 31.0f, 31.0f, 1.0f } } }; + +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); + N = XMVectorRound(N); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N); + + pDestination->v = static_cast( + ((tmp.w > 0.f) ? 0x8000 : 0) + | ((static_cast(tmp.z) & 0x1F) << 10) + | ((static_cast(tmp.y) & 0x1F) << 5) + | (static_cast(tmp.x) & 0x1F)); +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 Scale = { { { 1.0f, 32.f / 2.f, 32.f * 32.f, 32.f * 32.f * 32.f / 2.f } } }; + static const XMVECTORU32 Mask = { { { 0x1F, 0x1F << (5 - 1), 0x1F << 10, 0x1 << (15 - 1) } } }; + float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); + vResult = vminq_f32(vResult, Max); + vResult = vmulq_f32(vResult, Scale); + uint32x4_t vResulti = vcvtq_u32_f32(vResult); + vResulti = vandq_u32(vResulti, Mask); + // Do a horizontal or of 4 entries + uint32x2_t vTemp = vget_low_u32(vResulti); + uint32x2_t vTemp2 = vget_high_u32(vResulti); + vTemp = vorr_u32(vTemp, vTemp2); + // Perform a single bit left shift on y|w + vTemp2 = vdup_lane_u32(vTemp, 1); + vTemp2 = vadd_u32(vTemp2, vTemp2); + vTemp = vorr_u32(vTemp, vTemp2); + vst1_lane_u16(&pDestination->v, vreinterpret_u16_u32(vTemp), 0); +#elif defined(_XM_SSE_INTRINSICS_) + // Bounds check + XMVECTOR vResult = _mm_max_ps(V, g_XMZero); + vResult = _mm_min_ps(vResult, Max); + // Convert to int with rounding + __m128i vInt = _mm_cvtps_epi32(vResult); + // No SSE operations will write to 16-bit values, so we have to extract them manually + auto x = static_cast(_mm_extract_epi16(vInt, 0)); + auto y = static_cast(_mm_extract_epi16(vInt, 2)); + auto z = static_cast(_mm_extract_epi16(vInt, 4)); + auto w = static_cast(_mm_extract_epi16(vInt, 6)); + pDestination->v = static_cast( + (static_cast(w) ? 0x8000 : 0) + | ((static_cast(z) & 0x1F) << 10) + | ((static_cast(y) & 0x1F) << 5) + | ((static_cast(x) & 0x1F))); +#endif +} + + +/**************************************************************************** + * + * XMCOLOR operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMCOLOR::XMCOLOR +( + float _r, + float _g, + float _b, + float _a +) noexcept +{ + XMStoreColor(this, XMVectorSet(_r, _g, _b, _a)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMCOLOR::XMCOLOR(const float* pArray) noexcept +{ + XMStoreColor(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMHALF2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMHALF2::XMHALF2 +( + float _x, + float _y +) noexcept +{ + x = XMConvertFloatToHalf(_x); + y = XMConvertFloatToHalf(_y); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMHALF2::XMHALF2(const float* pArray) noexcept +{ + assert(pArray != nullptr); + x = XMConvertFloatToHalf(pArray[0]); + y = XMConvertFloatToHalf(pArray[1]); +} + +/**************************************************************************** + * + * XMSHORTN2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMSHORTN2::XMSHORTN2 +( + float _x, + float _y +) noexcept +{ + XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMSHORTN2::XMSHORTN2(const float* pArray) noexcept +{ + XMStoreShortN2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMSHORT2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMSHORT2::XMSHORT2 +( + float _x, + float _y +) noexcept +{ + XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMSHORT2::XMSHORT2(const float* pArray) noexcept +{ + XMStoreShort2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUSHORTN2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUSHORTN2::XMUSHORTN2 +( + float _x, + float _y +) noexcept +{ + XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUSHORTN2::XMUSHORTN2(const float* pArray) noexcept +{ + XMStoreUShortN2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUSHORT2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUSHORT2::XMUSHORT2 +( + float _x, + float _y +) noexcept +{ + XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUSHORT2::XMUSHORT2(const float* pArray) noexcept +{ + XMStoreUShort2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMBYTEN2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMBYTEN2::XMBYTEN2 +( + float _x, + float _y +) noexcept +{ + XMStoreByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMBYTEN2::XMBYTEN2(const float* pArray) noexcept +{ + XMStoreByteN2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMBYTE2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMBYTE2::XMBYTE2 +( + float _x, + float _y +) noexcept +{ + XMStoreByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMBYTE2::XMBYTE2(const float* pArray) noexcept +{ + XMStoreByte2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUBYTEN2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUBYTEN2::XMUBYTEN2 +( + float _x, + float _y +) noexcept +{ + XMStoreUByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUBYTEN2::XMUBYTEN2(const float* pArray) noexcept +{ + XMStoreUByteN2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUBYTE2 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUBYTE2::XMUBYTE2 +( + float _x, + float _y +) noexcept +{ + XMStoreUByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUBYTE2::XMUBYTE2(const float* pArray) noexcept +{ + XMStoreUByte2(this, XMLoadFloat2(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMU565 operators + * + ****************************************************************************/ + +inline XMU565::XMU565 +( + float _x, + float _y, + float _z +) noexcept +{ + XMStoreU565(this, XMVectorSet(_x, _y, _z, 0.0f)); +} + +_Use_decl_annotations_ +inline XMU565::XMU565(const float* pArray) noexcept +{ + XMStoreU565(this, XMLoadFloat3(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMFLOAT3PK operators + * + ****************************************************************************/ + +inline XMFLOAT3PK::XMFLOAT3PK +( + float _x, + float _y, + float _z +) noexcept +{ + XMStoreFloat3PK(this, XMVectorSet(_x, _y, _z, 0.0f)); +} + +_Use_decl_annotations_ +inline XMFLOAT3PK::XMFLOAT3PK(const float* pArray) noexcept +{ + XMStoreFloat3PK(this, XMLoadFloat3(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMFLOAT3SE operators + * + ****************************************************************************/ + +inline XMFLOAT3SE::XMFLOAT3SE +( + float _x, + float _y, + float _z +) noexcept +{ + XMStoreFloat3SE(this, XMVectorSet(_x, _y, _z, 0.0f)); +} + +_Use_decl_annotations_ +inline XMFLOAT3SE::XMFLOAT3SE(const float* pArray) noexcept +{ + XMStoreFloat3SE(this, XMLoadFloat3(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMHALF4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMHALF4::XMHALF4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + x = XMConvertFloatToHalf(_x); + y = XMConvertFloatToHalf(_y); + z = XMConvertFloatToHalf(_z); + w = XMConvertFloatToHalf(_w); +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline XMHALF4::XMHALF4(const float* pArray) noexcept +{ + XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(float), 4); +} + +/**************************************************************************** + * + * XMSHORTN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMSHORTN4::XMSHORTN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMSHORTN4::XMSHORTN4(const float* pArray) noexcept +{ + XMStoreShortN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMSHORT4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMSHORT4::XMSHORT4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMSHORT4::XMSHORT4(const float* pArray) noexcept +{ + XMStoreShort4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUSHORTN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUSHORTN4::XMUSHORTN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUSHORTN4::XMUSHORTN4(const float* pArray) noexcept +{ + XMStoreUShortN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUSHORT4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUSHORT4::XMUSHORT4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUSHORT4::XMUSHORT4(const float* pArray) noexcept +{ + XMStoreUShort4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMXDECN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMXDECN4::XMXDECN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMXDECN4::XMXDECN4(const float* pArray) noexcept +{ + XMStoreXDecN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMXDEC4 operators + * + ****************************************************************************/ + +#pragma warning(push) +#pragma warning(disable : 4996) + // C4996: ignore deprecation warning + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + + //------------------------------------------------------------------------------ + +inline XMXDEC4::XMXDEC4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMXDEC4::XMXDEC4(const float* pArray) noexcept +{ + XMStoreXDec4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMDECN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMDECN4::XMDECN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMDECN4::XMDECN4(const float* pArray) noexcept +{ + XMStoreDecN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMDEC4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMDEC4::XMDEC4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMDEC4::XMDEC4(const float* pArray) noexcept +{ + XMStoreDec4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#pragma warning(pop) + +/**************************************************************************** + * + * XMUDECN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUDECN4::XMUDECN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUDECN4::XMUDECN4(const float* pArray) noexcept +{ + XMStoreUDecN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUDEC4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUDEC4::XMUDEC4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUDEC4::XMUDEC4(const float* pArray) noexcept +{ + XMStoreUDec4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMBYTEN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMBYTEN4::XMBYTEN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMBYTEN4::XMBYTEN4(const float* pArray) noexcept +{ + XMStoreByteN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMBYTE4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMBYTE4::XMBYTE4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMBYTE4::XMBYTE4(const float* pArray) noexcept +{ + XMStoreByte4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUBYTEN4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUBYTEN4::XMUBYTEN4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUBYTEN4::XMUBYTEN4(const float* pArray) noexcept +{ + XMStoreUByteN4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUBYTE4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUBYTE4::XMUBYTE4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUBYTE4::XMUBYTE4(const float* pArray) noexcept +{ + XMStoreUByte4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMUNIBBLE4 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMUNIBBLE4::XMUNIBBLE4 +( + float _x, + float _y, + float _z, + float _w +) noexcept +{ + XMStoreUNibble4(this, XMVectorSet(_x, _y, _z, _w)); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMUNIBBLE4::XMUNIBBLE4(const float* pArray) noexcept +{ + XMStoreUNibble4(this, XMLoadFloat4(reinterpret_cast(pArray))); +} + +/**************************************************************************** + * + * XMU555 operators + * + ****************************************************************************/ + + //------------------------------------------------------------------------------ + +inline XMU555::XMU555 +( + float _x, + float _y, + float _z, + bool _w +) noexcept +{ + XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f))); +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMU555::XMU555 +( + const float* pArray, + bool _w +) noexcept +{ + XMVECTOR V = XMLoadFloat3(reinterpret_cast(pArray)); + XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f))); +} + diff --git a/Sdk/External/DirectXMath/LICENSE b/Sdk/External/DirectXMath/LICENSE new file mode 100644 index 0000000..a1df24b --- /dev/null +++ b/Sdk/External/DirectXMath/LICENSE @@ -0,0 +1,21 @@ + The MIT License (MIT) + +Copyright (c) 2011-2020 Microsoft Corp + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, +merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be included in all copies +or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/Sdk/External/DirectXMath/README.md b/Sdk/External/DirectXMath/README.md new file mode 100644 index 0000000..304e93d --- /dev/null +++ b/Sdk/External/DirectXMath/README.md @@ -0,0 +1,78 @@ +![DirectX Logo](https://github.com/Microsoft/DirectXMath/wiki/X_jpg.jpg) + +# DirectXMath + +https://github.com/Microsoft/DirectXMath + +Copyright (c) Microsoft Corporation. All rights reserved. + +**August 2020** + +This package contains the DirectXMath library, an all inline SIMD C++ linear algebra library for use in games and graphics apps + +This code is designed to build with Visual Studio 2017, Visual Studio 2019, or clang for Windows. It is recommended that you make use of the latest updates (VS 2017 15.9 update, or VS 2019 Update 4 or later). + +These components are designed to work without requiring any content from the legacy DirectX SDK. For details, see [Where is the DirectX SDK?](https://aka.ms/dxsdk). + +## Directory Layout + +* ``Inc\`` + + + DirectXMath Files (in the DirectX C++ namespace) + + * DirectXMath.h - Core library + * DirectXPackedVector.h - Load/Store functions and types for working with various compressed GPU formats + * DirectXColors.h - .NET-style Color defines in sRGB color space + * DirectXCollision.h - Bounding volume collision library + +* ``Extentions\`` + + + Advanced instruction set variants for guarded codepaths + + * DirectXMathSSE3.h - SSE3 + * DirectXMathBE.h - Supplemental SSE3 (SSSE3) + * DirectXMathSSE4.h - SSE4.1 + * DirectXMathAVX.h - Advanced Vector Extensions (AVX) + * DirectXMathAVX2.h - Advanced Vector Extensions 2 (AVX2) + * DirectXMathF16C.h - Half-precision conversions (F16C) + * DirectXMathFMA3.h - Fused multiply-accumulate (FMA3) + * DirectXMathFMA4.h - Fused multiply-accumulate (FMA4) + +* ``SHMath\`` + + + Spherical Harmonics math functions + + * DirectXSH.h - Header for SHMath functions + * DirectXSH.cpp, DirectXSHD3D11.cpp, DirectXSHD3D12.cpp - Implementation + +* ``XDSP\`` + + + XDSP.h - Digital Signal Processing helper functions + +## Documentation + +Documentation is available on the [Microsoft Docs](https://docs.microsoft.com/en-us/windows/desktop/dxmath/directxmath-portal). Additional information can be found on the [project wiki](https://github.com/microsoft/DirectXMath/wiki). + +## Compiler support + +Officially the library is supported with Microsoft Visual C++ and clang/LLVM. It should also compile with the Intel C++ compiler, GCC, and MinGW compilers. + +To build for non-Windows platforms, you need to provide a ``sal.h`` header in your include path. You can obtain an open source version from [GitHub](https://github.com/dotnet/corert/blob/master/src/Native/inc/unix/sal.h). + +## Notices + +All content and source code for this package are subject to the terms of the [MIT License](http://opensource.org/licenses/MIT). + +For the latest version of DirectXMath, bug reports, etc. please visit the project site on [GitHub](https://github.com/microsoft/DirectXMath). + +## Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Trademarks + +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies. diff --git a/Sdk/External/DirectXMath/SECURITY.md b/Sdk/External/DirectXMath/SECURITY.md new file mode 100644 index 0000000..f7b8998 --- /dev/null +++ b/Sdk/External/DirectXMath/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). + + \ No newline at end of file diff --git a/Sdk/External/DirectXMath/SHMath/DirectXSH.cpp b/Sdk/External/DirectXMath/SHMath/DirectXSH.cpp new file mode 100644 index 0000000..7c55710 --- /dev/null +++ b/Sdk/External/DirectXMath/SHMath/DirectXSH.cpp @@ -0,0 +1,4905 @@ +//----------------------------------------------------------------------------------- +// DirectXSH.cpp -- C++ Spherical Harmonics Math Library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/p/?LinkId=262885 +//------------------------------------------------------------------------------------- + +#pragma warning( disable : 4619 4456 ) +// C4619 #pragma warning warnings +// C4456 declaration hides previous local declaration + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wshadow" +#pragma clang diagnostic ignored "-Wunused-const-variable" +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +#include "DirectXSH.h" +#include + +using namespace DirectX; + +namespace +{ +#ifdef _PREFAST_ +#pragma prefast(disable:246, "generated code by maple (nested const variable definitions)") +#endif + + const float fExtraNormFac[XM_SH_MAXORDER] = { 2.0f*sqrtf(XM_PI), 2.0f / 3.0f*sqrtf(3.0f*XM_PI), 2.0f / 5.0f*sqrtf(5.0f*XM_PI), 2.0f / 7.0f*sqrtf(7.0f*XM_PI), 2.0f / 3.0f*sqrtf(XM_PI), 2.0f / 11.0f*sqrtf(11.0f*XM_PI) }; + + // computes the integral of a constant function over a solid angular + // extent. No error checking - only used internaly. This function + // only returns the Yl0 coefficients, since the rest are zero for + // circularly symmetric functions. + const float ComputeCapInt_t1 = sqrtf(0.3141593E1f); + const float ComputeCapInt_t5 = sqrtf(3.0f); + const float ComputeCapInt_t11 = sqrtf(5.0f); + const float ComputeCapInt_t18 = sqrtf(7.0f); + const float ComputeCapInt_t32 = sqrtf(11.0f); + + inline void ComputeCapInt(const size_t order, float angle, float *pR) + { + const float t2 = cosf(angle); + const float t3 = ComputeCapInt_t1*t2; + const float t7 = sinf(angle); + const float t8 = t7*t7; + + + pR[0] = -t3 + ComputeCapInt_t1; + pR[1] = ComputeCapInt_t5*ComputeCapInt_t1*t8 / 2.0f; + + if (order > 2) + { + const float t13 = t2*t2; + + pR[2] = -ComputeCapInt_t11*ComputeCapInt_t1*t2*(t13 - 1.0f) / 2.0f; + if (order > 3) + { + const float t19 = ComputeCapInt_t18*ComputeCapInt_t1; + const float t20 = t13*t13; + + pR[3] = -5.0f / 8.0f*t19*t20 + 3.0f / 4.0f*t19*t13 - t19 / 8.0f; + if (order > 4) + { + + + pR[4] = -3.0f / 8.0f*t3*(7.0f*t20 - 10.0f*t13 + 3.0f); + if (order > 5) + { + const float t33 = ComputeCapInt_t32*ComputeCapInt_t1; + pR[5] = -21.0f / 16.0f*t33*t20*t13 + 35.0f / 16.0f*t33*t20 - 15.0f / 16.0f*t33*t13 + t33 / 16.0f; + } + } + } + } + } + + // input pF only consists of Yl0 values, normalizes coefficients for directional + // lights. + inline float CosWtInt(const size_t order) + { + const float fCW0 = 0.25f; + const float fCW1 = 0.5f; + const float fCW2 = 5.0f / 16.0f; + //const float fCW3 = 0.0f; + const float fCW4 = -3.0f / 32.0f; + //const float fCW5 = 0.0f; + + // order has to be at least linear... + + float fRet = fCW0 + fCW1; + + if (order > 2) fRet += fCW2; + if (order > 4) fRet += fCW4; + + // odd degrees >= 3 evaluate to zero integrated against cosine... + + return fRet; + } + + const float SHEvalHemisphereLight_fSqrtPi = sqrtf(XM_PI); + const float SHEvalHemisphereLight_fSqrtPi3 = sqrtf(XM_PI / 3.0f); + + typedef float REAL; +#define CONSTANT(x) (x ## f) + + // routine generated programmatically for evaluating SH basis for degree 1 + // inputs (x,y,z) are a point on the sphere (i.e., must be unit length) + // output is vector b with SH basis evaluated at (x,y,z). + // + inline void sh_eval_basis_1(REAL x, REAL y, REAL z, REAL b[4]) + { + /* m=0 */ + + // l=0 + const REAL p_0_0 = CONSTANT(0.282094791773878140); + b[0] = p_0_0; // l=0,m=0 + // l=1 + const REAL p_1_0 = CONSTANT(0.488602511902919920)*z; + b[2] = p_1_0; // l=1,m=0 + + + /* m=1 */ + + const REAL s1 = y; + const REAL c1 = x; + + // l=1 + const REAL p_1_1 = CONSTANT(-0.488602511902919920); + b[1] = p_1_1*s1; // l=1,m=-1 + b[3] = p_1_1*c1; // l=1,m=+1 + } + + // routine generated programmatically for evaluating SH basis for degree 2 + // inputs (x,y,z) are a point on the sphere (i.e., must be unit length) + // output is vector b with SH basis evaluated at (x,y,z). + // + inline void sh_eval_basis_2(REAL x, REAL y, REAL z, REAL b[9]) + { + const REAL z2 = z*z; + + + /* m=0 */ + + // l=0 + const REAL p_0_0 = CONSTANT(0.282094791773878140); + b[0] = p_0_0; // l=0,m=0 + // l=1 + const REAL p_1_0 = CONSTANT(0.488602511902919920)*z; + b[2] = p_1_0; // l=1,m=0 + // l=2 + const REAL p_2_0 = CONSTANT(0.946174695757560080)*z2 + CONSTANT(-0.315391565252520050); + b[6] = p_2_0; // l=2,m=0 + + + /* m=1 */ + + const REAL s1 = y; + const REAL c1 = x; + + // l=1 + const REAL p_1_1 = CONSTANT(-0.488602511902919920); + b[1] = p_1_1*s1; // l=1,m=-1 + b[3] = p_1_1*c1; // l=1,m=+1 + // l=2 + const REAL p_2_1 = CONSTANT(-1.092548430592079200)*z; + b[5] = p_2_1*s1; // l=2,m=-1 + b[7] = p_2_1*c1; // l=2,m=+1 + + + /* m=2 */ + + const REAL s2 = x*s1 + y*c1; + const REAL c2 = x*c1 - y*s1; + + // l=2 + const REAL p_2_2 = CONSTANT(0.546274215296039590); + b[4] = p_2_2*s2; // l=2,m=-2 + b[8] = p_2_2*c2; // l=2,m=+2 + } + + // routine generated programmatically for evaluating SH basis for degree 3 + // inputs (x,y,z) are a point on the sphere (i.e., must be unit length) + // output is vector b with SH basis evaluated at (x,y,z). + // + void sh_eval_basis_3(REAL x, REAL y, REAL z, REAL b[16]) + { + const REAL z2 = z*z; + + + /* m=0 */ + + // l=0 + const REAL p_0_0 = CONSTANT(0.282094791773878140); + b[0] = p_0_0; // l=0,m=0 + // l=1 + const REAL p_1_0 = CONSTANT(0.488602511902919920)*z; + b[2] = p_1_0; // l=1,m=0 + // l=2 + const REAL p_2_0 = CONSTANT(0.946174695757560080)*z2 + CONSTANT(-0.315391565252520050); + b[6] = p_2_0; // l=2,m=0 + // l=3 + const REAL p_3_0 = z*(CONSTANT(1.865881662950577000)*z2 + CONSTANT(-1.119528997770346200)); + b[12] = p_3_0; // l=3,m=0 + + + /* m=1 */ + + const REAL s1 = y; + const REAL c1 = x; + + // l=1 + const REAL p_1_1 = CONSTANT(-0.488602511902919920); + b[1] = p_1_1*s1; // l=1,m=-1 + b[3] = p_1_1*c1; // l=1,m=+1 + // l=2 + const REAL p_2_1 = CONSTANT(-1.092548430592079200)*z; + b[5] = p_2_1*s1; // l=2,m=-1 + b[7] = p_2_1*c1; // l=2,m=+1 + // l=3 + const REAL p_3_1 = CONSTANT(-2.285228997322328800)*z2 + CONSTANT(0.457045799464465770); + b[11] = p_3_1*s1; // l=3,m=-1 + b[13] = p_3_1*c1; // l=3,m=+1 + + + /* m=2 */ + + const REAL s2 = x*s1 + y*c1; + const REAL c2 = x*c1 - y*s1; + + // l=2 + const REAL p_2_2 = CONSTANT(0.546274215296039590); + b[4] = p_2_2*s2; // l=2,m=-2 + b[8] = p_2_2*c2; // l=2,m=+2 + // l=3 + const REAL p_3_2 = CONSTANT(1.445305721320277100)*z; + b[10] = p_3_2*s2; // l=3,m=-2 + b[14] = p_3_2*c2; // l=3,m=+2 + + + /* m=3 */ + + const REAL s3 = x*s2 + y*c2; + const REAL c3 = x*c2 - y*s2; + + // l=3 + const REAL p_3_3 = CONSTANT(-0.590043589926643520); + b[9] = p_3_3*s3; // l=3,m=-3 + b[15] = p_3_3*c3; // l=3,m=+3 + } + + // routine generated programmatically for evaluating SH basis for degree 4 + // inputs (x,y,z) are a point on the sphere (i.e., must be unit length) + // output is vector b with SH basis evaluated at (x,y,z). + // + void sh_eval_basis_4(REAL x, REAL y, REAL z, REAL b[25]) + { + const REAL z2 = z*z; + + + /* m=0 */ + + // l=0 + const REAL p_0_0 = CONSTANT(0.282094791773878140); + b[0] = p_0_0; // l=0,m=0 + // l=1 + const REAL p_1_0 = CONSTANT(0.488602511902919920)*z; + b[2] = p_1_0; // l=1,m=0 + // l=2 + const REAL p_2_0 = CONSTANT(0.946174695757560080)*z2 + CONSTANT(-0.315391565252520050); + b[6] = p_2_0; // l=2,m=0 + // l=3 + const REAL p_3_0 = z*(CONSTANT(1.865881662950577000)*z2 + CONSTANT(-1.119528997770346200)); + b[12] = p_3_0; // l=3,m=0 + // l=4 + const REAL p_4_0 = CONSTANT(1.984313483298443000)*z*p_3_0 + CONSTANT(-1.006230589874905300)*p_2_0; + b[20] = p_4_0; // l=4,m=0 + + + /* m=1 */ + + const REAL s1 = y; + const REAL c1 = x; + + // l=1 + const REAL p_1_1 = CONSTANT(-0.488602511902919920); + b[1] = p_1_1*s1; // l=1,m=-1 + b[3] = p_1_1*c1; // l=1,m=+1 + // l=2 + const REAL p_2_1 = CONSTANT(-1.092548430592079200)*z; + b[5] = p_2_1*s1; // l=2,m=-1 + b[7] = p_2_1*c1; // l=2,m=+1 + // l=3 + const REAL p_3_1 = CONSTANT(-2.285228997322328800)*z2 + CONSTANT(0.457045799464465770); + b[11] = p_3_1*s1; // l=3,m=-1 + b[13] = p_3_1*c1; // l=3,m=+1 + // l=4 + const REAL p_4_1 = z*(CONSTANT(-4.683325804901024000)*z2 + CONSTANT(2.007139630671867200)); + b[19] = p_4_1*s1; // l=4,m=-1 + b[21] = p_4_1*c1; // l=4,m=+1 + + + /* m=2 */ + + const REAL s2 = x*s1 + y*c1; + const REAL c2 = x*c1 - y*s1; + + // l=2 + const REAL p_2_2 = CONSTANT(0.546274215296039590); + b[4] = p_2_2*s2; // l=2,m=-2 + b[8] = p_2_2*c2; // l=2,m=+2 + // l=3 + const REAL p_3_2 = CONSTANT(1.445305721320277100)*z; + b[10] = p_3_2*s2; // l=3,m=-2 + b[14] = p_3_2*c2; // l=3,m=+2 + // l=4 + const REAL p_4_2 = CONSTANT(3.311611435151459800)*z2 + CONSTANT(-0.473087347878779980); + b[18] = p_4_2*s2; // l=4,m=-2 + b[22] = p_4_2*c2; // l=4,m=+2 + + + /* m=3 */ + + const REAL s3 = x*s2 + y*c2; + const REAL c3 = x*c2 - y*s2; + + // l=3 + const REAL p_3_3 = CONSTANT(-0.590043589926643520); + b[9] = p_3_3*s3; // l=3,m=-3 + b[15] = p_3_3*c3; // l=3,m=+3 + // l=4 + const REAL p_4_3 = CONSTANT(-1.770130769779930200)*z; + b[17] = p_4_3*s3; // l=4,m=-3 + b[23] = p_4_3*c3; // l=4,m=+3 + + + /* m=4 */ + + const REAL s4 = x*s3 + y*c3; + const REAL c4 = x*c3 - y*s3; + + // l=4 + const REAL p_4_4 = CONSTANT(0.625835735449176030); + b[16] = p_4_4*s4; // l=4,m=-4 + b[24] = p_4_4*c4; // l=4,m=+4 + } + + // routine generated programmatically for evaluating SH basis for degree 5 + // inputs (x,y,z) are a point on the sphere (i.e., must be unit length) + // output is vector b with SH basis evaluated at (x,y,z). + // + void sh_eval_basis_5(REAL x, REAL y, REAL z, REAL b[36]) + { + const REAL z2 = z*z; + + + /* m=0 */ + + // l=0 + const REAL p_0_0 = CONSTANT(0.282094791773878140); + b[0] = p_0_0; // l=0,m=0 + // l=1 + const REAL p_1_0 = CONSTANT(0.488602511902919920)*z; + b[2] = p_1_0; // l=1,m=0 + // l=2 + const REAL p_2_0 = CONSTANT(0.946174695757560080)*z2 + CONSTANT(-0.315391565252520050); + b[6] = p_2_0; // l=2,m=0 + // l=3 + const REAL p_3_0 = z*(CONSTANT(1.865881662950577000)*z2 + CONSTANT(-1.119528997770346200)); + b[12] = p_3_0; // l=3,m=0 + // l=4 + const REAL p_4_0 = CONSTANT(1.984313483298443000)*z*p_3_0 + CONSTANT(-1.006230589874905300)*p_2_0; + b[20] = p_4_0; // l=4,m=0 + // l=5 + const REAL p_5_0 = CONSTANT(1.989974874213239700)*z*p_4_0 + CONSTANT(-1.002853072844814000)*p_3_0; + b[30] = p_5_0; // l=5,m=0 + + + /* m=1 */ + + const REAL s1 = y; + const REAL c1 = x; + + // l=1 + const REAL p_1_1 = CONSTANT(-0.488602511902919920); + b[1] = p_1_1*s1; // l=1,m=-1 + b[3] = p_1_1*c1; // l=1,m=+1 + // l=2 + const REAL p_2_1 = CONSTANT(-1.092548430592079200)*z; + b[5] = p_2_1*s1; // l=2,m=-1 + b[7] = p_2_1*c1; // l=2,m=+1 + // l=3 + const REAL p_3_1 = CONSTANT(-2.285228997322328800)*z2 + CONSTANT(0.457045799464465770); + b[11] = p_3_1*s1; // l=3,m=-1 + b[13] = p_3_1*c1; // l=3,m=+1 + // l=4 + const REAL p_4_1 = z*(CONSTANT(-4.683325804901024000)*z2 + CONSTANT(2.007139630671867200)); + b[19] = p_4_1*s1; // l=4,m=-1 + b[21] = p_4_1*c1; // l=4,m=+1 + // l=5 + const REAL p_5_1 = CONSTANT(2.031009601158990200)*z*p_4_1 + CONSTANT(-0.991031208965114650)*p_3_1; + b[29] = p_5_1*s1; // l=5,m=-1 + b[31] = p_5_1*c1; // l=5,m=+1 + + + /* m=2 */ + + const REAL s2 = x*s1 + y*c1; + const REAL c2 = x*c1 - y*s1; + + // l=2 + const REAL p_2_2 = CONSTANT(0.546274215296039590); + b[4] = p_2_2*s2; // l=2,m=-2 + b[8] = p_2_2*c2; // l=2,m=+2 + // l=3 + const REAL p_3_2 = CONSTANT(1.445305721320277100)*z; + b[10] = p_3_2*s2; // l=3,m=-2 + b[14] = p_3_2*c2; // l=3,m=+2 + // l=4 + const REAL p_4_2 = CONSTANT(3.311611435151459800)*z2 + CONSTANT(-0.473087347878779980); + b[18] = p_4_2*s2; // l=4,m=-2 + b[22] = p_4_2*c2; // l=4,m=+2 + // l=5 + const REAL p_5_2 = z*(CONSTANT(7.190305177459987500)*z2 + CONSTANT(-2.396768392486662100)); + b[28] = p_5_2*s2; // l=5,m=-2 + b[32] = p_5_2*c2; // l=5,m=+2 + + + /* m=3 */ + + const REAL s3 = x*s2 + y*c2; + const REAL c3 = x*c2 - y*s2; + + // l=3 + const REAL p_3_3 = CONSTANT(-0.590043589926643520); + b[9] = p_3_3*s3; // l=3,m=-3 + b[15] = p_3_3*c3; // l=3,m=+3 + // l=4 + const REAL p_4_3 = CONSTANT(-1.770130769779930200)*z; + b[17] = p_4_3*s3; // l=4,m=-3 + b[23] = p_4_3*c3; // l=4,m=+3 + // l=5 + const REAL p_5_3 = CONSTANT(-4.403144694917253700)*z2 + CONSTANT(0.489238299435250430); + b[27] = p_5_3*s3; // l=5,m=-3 + b[33] = p_5_3*c3; // l=5,m=+3 + + + /* m=4 */ + + const REAL s4 = x*s3 + y*c3; + const REAL c4 = x*c3 - y*s3; + + // l=4 + const REAL p_4_4 = CONSTANT(0.625835735449176030); + b[16] = p_4_4*s4; // l=4,m=-4 + b[24] = p_4_4*c4; // l=4,m=+4 + // l=5 + const REAL p_5_4 = CONSTANT(2.075662314881041100)*z; + b[26] = p_5_4*s4; // l=5,m=-4 + b[34] = p_5_4*c4; // l=5,m=+4 + + + /* m=5 */ + + const REAL s5 = x*s4 + y*c4; + const REAL c5 = x*c4 - y*s4; + + // l=5 + const REAL p_5_5 = CONSTANT(-0.656382056840170150); + b[25] = p_5_5*s5; // l=5,m=-5 + b[35] = p_5_5*c5; // l=5,m=+5 + } + + const REAL M_PIjs = (REAL)(4.0*atan(1.0)); + const REAL maxang = (REAL)(M_PIjs / 2); + const int NSH0 = 1; + const int NSH1 = 4; + const int NSH2 = 9; + const int NSH3 = 16; + const int NSH4 = 25; + const int NSH5 = 36; + const int NSH6 = 49; + const int NSH7 = 64; + const int NSH8 = 81; + const int NSH9 = 100; + const int NL0 = 1; + const int NL1 = 3; + const int NL2 = 5; + const int NL3 = 7; + const int NL4 = 9; + const int NL5 = 11; + const int NL6 = 13; + const int NL7 = 15; + const int NL8 = 17; + const int NL9 = 19; + + inline void rot(REAL ct, REAL st, REAL x, REAL y, REAL &xout, REAL &yout) + { + xout = x*ct - y*st; + yout = y*ct + x*st; + } + + inline void rot_inv(REAL ct, REAL st, REAL x, REAL y, REAL &xout, REAL &yout) + { + xout = x*ct + y*st; + yout = y*ct - x*st; + } + + inline void rot_1(REAL ct, REAL st, REAL ctm[1], REAL stm[1]) + { + ctm[0] = ct; + stm[0] = st; + } + + inline void rot_2(REAL ct, REAL st, REAL ctm[2], REAL stm[2]) + { + REAL ct2 = CONSTANT(2.0)*ct; + ctm[0] = ct; + stm[0] = st; + ctm[1] = ct2*ct - CONSTANT(1.0); + stm[1] = ct2*st; + } + + inline void rot_3(REAL ct, REAL st, REAL ctm[3], REAL stm[3]) + { + REAL ct2 = CONSTANT(2.0)*ct; + ctm[0] = ct; + stm[0] = st; + ctm[1] = ct2*ct - CONSTANT(1.0); + stm[1] = ct2*st; + ctm[2] = ct2*ctm[1] - ct; + stm[2] = ct2*stm[1] - st; + } + + inline void rot_4(REAL ct, REAL st, REAL ctm[4], REAL stm[4]) + { + REAL ct2 = CONSTANT(2.0)*ct; + ctm[0] = ct; + stm[0] = st; + ctm[1] = ct2*ct - CONSTANT(1.0); + stm[1] = ct2*st; + ctm[2] = ct2*ctm[1] - ct; + stm[2] = ct2*stm[1] - st; + ctm[3] = ct2*ctm[2] - ctm[1]; + stm[3] = ct2*stm[2] - stm[1]; + } + + inline void rot_5(REAL ct, REAL st, REAL ctm[5], REAL stm[5]) + { + REAL ct2 = CONSTANT(2.0)*ct; + ctm[0] = ct; + stm[0] = st; + ctm[1] = ct2*ct - CONSTANT(1.0); + stm[1] = ct2*st; + ctm[2] = ct2*ctm[1] - ct; + stm[2] = ct2*stm[1] - st; + ctm[3] = ct2*ctm[2] - ctm[1]; + stm[3] = ct2*stm[2] - stm[1]; + ctm[4] = ct2*ctm[3] - ctm[2]; + stm[4] = ct2*stm[3] - stm[2]; + } + + inline void sh_rotz_1(REAL ctm[1], REAL stm[1], REAL y[NL1], REAL yr[NL1]) + { + yr[1] = y[1]; + rot_inv(ctm[0], stm[0], y[0], y[2], yr[0], yr[2]); + } + + inline void sh_rotz_2(REAL ctm[2], REAL stm[2], REAL y[NL2], REAL yr[NL2]) + { + yr[2] = y[2]; + rot_inv(ctm[0], stm[0], y[1], y[3], yr[1], yr[3]); + rot_inv(ctm[1], stm[1], y[0], y[4], yr[0], yr[4]); + } + + inline void sh_rotz_3(REAL ctm[3], REAL stm[3], REAL y[NL3], REAL yr[NL3]) + { + yr[3] = y[3]; + rot_inv(ctm[0], stm[0], y[2], y[4], yr[2], yr[4]); + rot_inv(ctm[1], stm[1], y[1], y[5], yr[1], yr[5]); + rot_inv(ctm[2], stm[2], y[0], y[6], yr[0], yr[6]); + } + + inline void sh_rotz_4(REAL ctm[4], REAL stm[4], REAL y[NL4], REAL yr[NL4]) + { + yr[4] = y[4]; + rot_inv(ctm[0], stm[0], y[3], y[5], yr[3], yr[5]); + rot_inv(ctm[1], stm[1], y[2], y[6], yr[2], yr[6]); + rot_inv(ctm[2], stm[2], y[1], y[7], yr[1], yr[7]); + rot_inv(ctm[3], stm[3], y[0], y[8], yr[0], yr[8]); + } + + inline void sh_rotz_5(REAL ctm[5], REAL stm[5], REAL y[NL5], REAL yr[NL5]) + { + yr[5] = y[5]; + rot_inv(ctm[0], stm[0], y[4], y[6], yr[4], yr[6]); + rot_inv(ctm[1], stm[1], y[3], y[7], yr[3], yr[7]); + rot_inv(ctm[2], stm[2], y[2], y[8], yr[2], yr[8]); + rot_inv(ctm[3], stm[3], y[1], y[9], yr[1], yr[9]); + rot_inv(ctm[4], stm[4], y[0], y[10], yr[0], yr[10]); + } + + // rotation code generated programmatically by rotatex (2000x4000 samples, eps=1e-008) + + const REAL fx_1_001 = (REAL)(sqrt(1.0) / 1.0); // 1 + const REAL fx_1_002 = (REAL)(-sqrt(1.0) / 1.0); // -1.00000030843 + + inline void sh_rotx90_1(REAL y[], REAL yr[]) + { + yr[0] = fx_1_001*y[1]; + yr[1] = fx_1_002*y[0]; + yr[2] = fx_1_001*y[2]; + }; + + inline void sh_rotx90_inv_1(REAL y[], REAL yr[]) + { + yr[0] = fx_1_002*y[1]; + yr[1] = fx_1_001*y[0]; + yr[2] = fx_1_001*y[2]; + } + + const REAL fx_2_001 = (REAL)(sqrt(4.0) / 2.0); // 1 + const REAL fx_2_002 = (REAL)(-sqrt(4.0) / 2.0); // -1 + const REAL fx_2_003 = (REAL)(-sqrt(1.0) / 2.0); // -0.500000257021 + const REAL fx_2_004 = (REAL)(-sqrt(3.0) / 2.0); // -0.866025848959 + const REAL fx_2_005 = (REAL)(sqrt(1.0) / 2.0); // 0.5 + + inline void sh_rotx90_2(REAL y[], REAL yr[]) + { + yr[0] = fx_2_001*y[3]; + yr[1] = fx_2_002*y[1]; + yr[2] = fx_2_003*y[2] + fx_2_004*y[4]; + yr[3] = fx_2_002*y[0]; + yr[4] = fx_2_004*y[2] + fx_2_005*y[4]; + }; + + inline void sh_rotx90_inv_2(REAL y[], REAL yr[]) + { + yr[0] = fx_2_002*y[3]; + yr[1] = fx_2_002*y[1]; + yr[2] = fx_2_003*y[2] + fx_2_004*y[4]; + yr[3] = fx_2_001*y[0]; + yr[4] = fx_2_004*y[2] + fx_2_005*y[4]; + } + + const REAL fx_3_001 = (REAL)(-sqrt(10.0) / 4.0); // -0.790569415042 + const REAL fx_3_002 = (REAL)(sqrt(6.0) / 4.0); // 0.612372435696 + const REAL fx_3_003 = (REAL)(-sqrt(16.0) / 4.0); // -1 + const REAL fx_3_004 = (REAL)(-sqrt(6.0) / 4.0); // -0.612372435695 + const REAL fx_3_005 = (REAL)(-sqrt(1.0) / 4.0); // -0.25 + const REAL fx_3_006 = (REAL)(-sqrt(15.0) / 4.0); // -0.968245836551 + const REAL fx_3_007 = (REAL)(sqrt(1.0) / 4.0); // 0.25 + const REAL fx_3_008 = (REAL)(sqrt(10.0) / 4.0); // 0.790569983984 + + inline void sh_rotx90_3(REAL y[], REAL yr[]) + { + yr[0] = fx_3_001*y[3] + fx_3_002*y[5]; + yr[1] = fx_3_003*y[1]; + yr[2] = fx_3_004*y[3] + fx_3_001*y[5]; + yr[3] = fx_3_008*y[0] + fx_3_002*y[2]; + yr[4] = fx_3_005*y[4] + fx_3_006*y[6]; + yr[5] = fx_3_004*y[0] - fx_3_001*y[2]; + yr[6] = fx_3_006*y[4] + fx_3_007*y[6]; + }; + + inline void sh_rotx90_inv_3(REAL y[], REAL yr[]) + { + yr[0] = fx_3_008*y[3] + fx_3_004*y[5]; + yr[1] = fx_3_003*y[1]; + yr[2] = fx_3_002*y[3] - fx_3_001*y[5]; + yr[3] = fx_3_001*y[0] + fx_3_004*y[2]; + yr[4] = fx_3_005*y[4] + fx_3_006*y[6]; + yr[5] = fx_3_002*y[0] + fx_3_001*y[2]; + yr[6] = fx_3_006*y[4] + fx_3_007*y[6]; + } + + const REAL fx_4_001 = (REAL)(-sqrt(56.0) / 8.0); // -0.935414346694 + const REAL fx_4_002 = (REAL)(sqrt(8.0) / 8.0); // 0.353553390593 + const REAL fx_4_003 = (REAL)(-sqrt(36.0) / 8.0); // -0.75 + const REAL fx_4_004 = (REAL)(sqrt(28.0) / 8.0); // 0.661437827766 + const REAL fx_4_005 = (REAL)(-sqrt(8.0) / 8.0); // -0.353553390593 + const REAL fx_4_006 = (REAL)(sqrt(36.0) / 8.0); // 0.749999999999 + const REAL fx_4_007 = (REAL)(sqrt(9.0) / 8.0); // 0.37500034698 + const REAL fx_4_008 = (REAL)(sqrt(20.0) / 8.0); // 0.559017511622 + const REAL fx_4_009 = (REAL)(sqrt(35.0) / 8.0); // 0.739510657141 + const REAL fx_4_010 = (REAL)(sqrt(16.0) / 8.0); // 0.5 + const REAL fx_4_011 = (REAL)(-sqrt(28.0) / 8.0); // -0.661437827766 + const REAL fx_4_012 = (REAL)(sqrt(1.0) / 8.0); // 0.125 + const REAL fx_4_013 = (REAL)(sqrt(56.0) / 8.0); // 0.935414346692 + + inline void sh_rotx90_4(REAL y[], REAL yr[]) + { + yr[0] = fx_4_001*y[5] + fx_4_002*y[7]; + yr[1] = fx_4_003*y[1] + fx_4_004*y[3]; + yr[2] = fx_4_005*y[5] + fx_4_001*y[7]; + yr[3] = fx_4_004*y[1] + fx_4_006*y[3]; + yr[4] = fx_4_007*y[4] + fx_4_008*y[6] + fx_4_009*y[8]; + yr[5] = fx_4_013*y[0] + fx_4_002*y[2]; + yr[6] = fx_4_008*y[4] + fx_4_010*y[6] + fx_4_011*y[8]; + yr[7] = fx_4_005*y[0] - fx_4_001*y[2]; + yr[8] = fx_4_009*y[4] + fx_4_011*y[6] + fx_4_012*y[8]; + }; + + inline void sh_rotx90_inv_4(REAL y[], REAL yr[]) + { + yr[0] = fx_4_013*y[5] + fx_4_005*y[7]; + yr[1] = fx_4_003*y[1] + fx_4_004*y[3]; + yr[2] = fx_4_002*y[5] - fx_4_001*y[7]; + yr[3] = fx_4_004*y[1] + fx_4_006*y[3]; + yr[4] = fx_4_007*y[4] + fx_4_008*y[6] + fx_4_009*y[8]; + yr[5] = fx_4_001*y[0] + fx_4_005*y[2]; + yr[6] = fx_4_008*y[4] + fx_4_010*y[6] + fx_4_011*y[8]; + yr[7] = fx_4_002*y[0] + fx_4_001*y[2]; + yr[8] = fx_4_009*y[4] + fx_4_011*y[6] + fx_4_012*y[8]; + } + + const REAL fx_5_001 = (REAL)(sqrt(126.0) / 16.0); // 0.70156076002 + const REAL fx_5_002 = (REAL)(-sqrt(120.0) / 16.0); // -0.684653196882 + const REAL fx_5_003 = (REAL)(sqrt(10.0) / 16.0); // 0.197642353761 + const REAL fx_5_004 = (REAL)(-sqrt(64.0) / 16.0); // -0.5 + const REAL fx_5_005 = (REAL)(sqrt(192.0) / 16.0); // 0.866025403784 + const REAL fx_5_006 = (REAL)(sqrt(70.0) / 16.0); // 0.522912516584 + const REAL fx_5_007 = (REAL)(sqrt(24.0) / 16.0); // 0.306186217848 + const REAL fx_5_008 = (REAL)(-sqrt(162.0) / 16.0); // -0.795495128835 + const REAL fx_5_009 = (REAL)(sqrt(64.0) / 16.0); // 0.5 + const REAL fx_5_010 = (REAL)(sqrt(60.0) / 16.0); // 0.484122918274 + const REAL fx_5_011 = (REAL)(sqrt(112.0) / 16.0); // 0.661437827763 + const REAL fx_5_012 = (REAL)(sqrt(84.0) / 16.0); // 0.572821961867 + const REAL fx_5_013 = (REAL)(sqrt(4.0) / 16.0); // 0.125 + const REAL fx_5_014 = (REAL)(sqrt(42.0) / 16.0); // 0.405046293649 + const REAL fx_5_015 = (REAL)(sqrt(210.0) / 16.0); // 0.905711046633 + const REAL fx_5_016 = (REAL)(sqrt(169.0) / 16.0); // 0.8125 + const REAL fx_5_017 = (REAL)(-sqrt(45.0) / 16.0); // -0.419262745781 + const REAL fx_5_018 = (REAL)(sqrt(1.0) / 16.0); // 0.0625 + const REAL fx_5_019 = (REAL)(-sqrt(126.0) / 16.0); // -0.701561553415 + const REAL fx_5_020 = (REAL)(sqrt(120.0) / 16.0); // 0.684653196881 + const REAL fx_5_021 = (REAL)(-sqrt(10.0) / 16.0); // -0.197642353761 + const REAL fx_5_022 = (REAL)(-sqrt(70.0) / 16.0); // -0.522913107945 + const REAL fx_5_023 = (REAL)(-sqrt(60.0) / 16.0); // -0.48412346577 + + inline void sh_rotx90_5(REAL y[], REAL yr[]) + { + yr[0] = fx_5_001*y[5] + fx_5_002*y[7] + fx_5_003*y[9]; + yr[1] = fx_5_004*y[1] + fx_5_005*y[3]; + yr[2] = fx_5_006*y[5] + fx_5_007*y[7] + fx_5_008*y[9]; + yr[3] = fx_5_005*y[1] + fx_5_009*y[3]; + yr[4] = fx_5_010*y[5] + fx_5_011*y[7] + fx_5_012*y[9]; + yr[5] = fx_5_019*y[0] + fx_5_022*y[2] + fx_5_023*y[4]; + yr[6] = fx_5_013*y[6] + fx_5_014*y[8] + fx_5_015*y[10]; + yr[7] = fx_5_020*y[0] - fx_5_007*y[2] - fx_5_011*y[4]; + yr[8] = fx_5_014*y[6] + fx_5_016*y[8] + fx_5_017*y[10]; + yr[9] = fx_5_021*y[0] - fx_5_008*y[2] - fx_5_012*y[4]; + yr[10] = fx_5_015*y[6] + fx_5_017*y[8] + fx_5_018*y[10]; + }; + + inline void sh_rotx90_inv_5(REAL y[], REAL yr[]) + { + yr[0] = fx_5_019*y[5] + fx_5_020*y[7] + fx_5_021*y[9]; + yr[1] = fx_5_004*y[1] + fx_5_005*y[3]; + yr[2] = fx_5_022*y[5] - fx_5_007*y[7] - fx_5_008*y[9]; + yr[3] = fx_5_005*y[1] + fx_5_009*y[3]; + yr[4] = fx_5_023*y[5] - fx_5_011*y[7] - fx_5_012*y[9]; + yr[5] = fx_5_001*y[0] + fx_5_006*y[2] + fx_5_010*y[4]; + yr[6] = fx_5_013*y[6] + fx_5_014*y[8] + fx_5_015*y[10]; + yr[7] = fx_5_002*y[0] + fx_5_007*y[2] + fx_5_011*y[4]; + yr[8] = fx_5_014*y[6] + fx_5_016*y[8] + fx_5_017*y[10]; + yr[9] = fx_5_003*y[0] + fx_5_008*y[2] + fx_5_012*y[4]; + yr[10] = fx_5_015*y[6] + fx_5_017*y[8] + fx_5_018*y[10]; + } + + inline void sh_rot_1(REAL m[3 * 3], REAL y[NL1], REAL yr[NL1]) + { + REAL yr0 = m[4] * y[0] - m[5] * y[1] + m[3] * y[2]; + REAL yr1 = m[8] * y[1] - m[7] * y[0] - m[6] * y[2]; + REAL yr2 = m[1] * y[0] - m[2] * y[1] + m[0] * y[2]; + + yr[0] = yr0; + yr[1] = yr1; + yr[2] = yr2; + } + + inline void sh_roty_1(REAL ctm[1], REAL stm[1], REAL y[NL1], REAL yr[NL1]) + { + yr[0] = y[0]; + rot_inv(ctm[0], stm[0], y[1], y[2], yr[1], yr[2]); + } + + inline void sh_roty_2(REAL ctm[2], REAL stm[2], REAL y[NL2], REAL yr[NL2]) + { + REAL ytmp[NL2]; + sh_rotx90_2(y, yr); + sh_rotz_2(ctm, stm, yr, ytmp); + sh_rotx90_inv_2(ytmp, yr); + } + + inline void sh_roty_3(REAL ctm[3], REAL stm[3], REAL y[NL3], REAL yr[NL3]) + { + REAL ytmp[NL3]; + sh_rotx90_3(y, yr); + sh_rotz_3(ctm, stm, yr, ytmp); + sh_rotx90_inv_3(ytmp, yr); + } + + inline void sh_roty_4(REAL ctm[4], REAL stm[4], REAL y[NL4], REAL yr[NL4]) + { + REAL ytmp[NL4]; + sh_rotx90_4(y, yr); + sh_rotz_4(ctm, stm, yr, ytmp); + sh_rotx90_inv_4(ytmp, yr); + } + + inline void sh_roty_5(REAL ctm[5], REAL stm[5], REAL y[NL5], REAL yr[NL5]) + { + REAL ytmp[NL5]; + sh_rotx90_5(y, yr); + sh_rotz_5(ctm, stm, yr, ytmp); + sh_rotx90_inv_5(ytmp, yr); + } + +#define ROT_TOL CONSTANT(1e-4) + + /* + Finds cosine,sine pairs for zyz rotation (i.e. rotation R_z2 R_y R_z1 v). + The rotation is one which maps mx to (1,0,0) and mz to (0,0,1). + */ + inline void zyz(REAL m[3 * 3], REAL &zc1, REAL &zs1, REAL &yc, REAL &ys, REAL &zc2, REAL &zs2) + { + REAL cz = m[8]; + + // rotate so that (cx,cy,0) aligns to (1,0,0) + REAL cxylen = (REAL)sqrtf(1.0f - cz*cz); + if (cxylen >= ROT_TOL) + { + // if above is a NaN, will do the correct thing + yc = cz; + ys = cxylen; + REAL len67inv = 1.0f / sqrtf(m[6] * m[6] + m[7] * m[7]); + zc1 = -m[6] * len67inv; + zs1 = m[7] * len67inv; + REAL len25inv = 1.0f / sqrtf(m[2] * m[2] + m[5] * m[5]); + zc2 = m[2] * len25inv; + zs2 = m[5] * len25inv; + } + else { // m[6],m[7],m[8] already aligned to (0,0,1) + zc1 = 1.0; zs1 = 0.0; // identity + yc = cz; ys = 0.0; // identity + zc2 = m[0] * cz; zs2 = -m[1]; // align x axis (mx[0],mx[1],0) to (1,0,0) + } + } + + inline void sh_rotzyz_2(REAL zc1m[2], REAL zs1m[2], REAL ycm[2], REAL ysm[2], REAL zc2m[2], REAL zs2m[2], REAL y[NL2], REAL yr[NL2]) + { + REAL ytmp[NL2]; + sh_rotz_2(zc1m, zs1m, y, yr); + sh_roty_2(ycm, ysm, yr, ytmp); + sh_rotz_2(zc2m, zs2m, ytmp, yr); + } + + inline void sh_rotzyz_3(REAL zc1m[3], REAL zs1m[3], REAL ycm[3], REAL ysm[3], REAL zc2m[3], REAL zs2m[3], REAL y[NL3], REAL yr[NL3]) + { + REAL ytmp[NL3]; + sh_rotz_3(zc1m, zs1m, y, yr); + sh_roty_3(ycm, ysm, yr, ytmp); + sh_rotz_3(zc2m, zs2m, ytmp, yr); + } + + inline void sh_rotzyz_4(REAL zc1m[4], REAL zs1m[4], REAL ycm[4], REAL ysm[4], REAL zc2m[4], REAL zs2m[4], REAL y[NL4], REAL yr[NL4]) + { + REAL ytmp[NL4]; + sh_rotz_4(zc1m, zs1m, y, yr); + sh_roty_4(ycm, ysm, yr, ytmp); + sh_rotz_4(zc2m, zs2m, ytmp, yr); + } + + inline void sh_rotzyz_5(REAL zc1m[5], REAL zs1m[5], REAL ycm[5], REAL ysm[5], REAL zc2m[5], REAL zs2m[5], REAL y[NL5], REAL yr[NL5]) + { + REAL ytmp[NL5]; + sh_rotz_5(zc1m, zs1m, y, yr); + sh_roty_5(ycm, ysm, yr, ytmp); + sh_rotz_5(zc2m, zs2m, ytmp, yr); + } + + inline void sh3_rot(REAL m[3 * 3], REAL zc1, REAL zs1, REAL yc, REAL ys, REAL zc2, REAL zs2, REAL y[NSH3], REAL yr[NSH3]) + { + REAL zc1m[3], zs1m[3]; + rot_3(zc1, zs1, zc1m, zs1m); + REAL ycm[3], ysm[3]; + rot_3(yc, ys, ycm, ysm); + REAL zc2m[3], zs2m[3]; + rot_3(zc2, zs2, zc2m, zs2m); + + yr[0] = y[0]; + sh_rot_1(m, y + NSH0, yr + NSH0); + sh_rotzyz_2(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH1, yr + NSH1); + sh_rotzyz_3(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH2, yr + NSH2); + } + + inline void sh4_rot(REAL m[3 * 3], REAL zc1, REAL zs1, REAL yc, REAL ys, REAL zc2, REAL zs2, REAL y[NSH4], REAL yr[NSH4]) + { + REAL zc1m[4], zs1m[4]; + rot_4(zc1, zs1, zc1m, zs1m); + REAL ycm[4], ysm[4]; + rot_4(yc, ys, ycm, ysm); + REAL zc2m[4], zs2m[4]; + rot_4(zc2, zs2, zc2m, zs2m); + + yr[0] = y[0]; + sh_rot_1(m, y + NSH0, yr + NSH0); + sh_rotzyz_2(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH1, yr + NSH1); + sh_rotzyz_3(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH2, yr + NSH2); + sh_rotzyz_4(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH3, yr + NSH3); + } + + inline void sh5_rot(REAL m[3 * 3], REAL zc1, REAL zs1, REAL yc, REAL ys, REAL zc2, REAL zs2, REAL y[NSH5], REAL yr[NSH5]) + { + REAL zc1m[5], zs1m[5]; + rot_5(zc1, zs1, zc1m, zs1m); + REAL ycm[5], ysm[5]; + rot_5(yc, ys, ycm, ysm); + REAL zc2m[5], zs2m[5]; + rot_5(zc2, zs2, zc2m, zs2m); + + yr[0] = y[0]; + sh_rot_1(m, y + NSH0, yr + NSH0); + sh_rotzyz_2(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH1, yr + NSH1); + sh_rotzyz_3(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH2, yr + NSH2); + sh_rotzyz_4(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH3, yr + NSH3); + sh_rotzyz_5(zc1m, zs1m, ycm, ysm, zc2m, zs2m, y + NSH4, yr + NSH4); + } + + inline void sh1_rot(REAL m[3 * 3], REAL y[NSH1], REAL yr[NSH1]) + { + yr[0] = y[0]; + sh_rot_1(m, y + NSH0, yr + NSH0); + } + + inline void sh3_rot(REAL m[3 * 3], REAL y[NSH3], REAL yr[NSH3]) + { + REAL zc1, zs1, yc, ys, zc2, zs2; + zyz(m, zc1, zs1, yc, ys, zc2, zs2); + sh3_rot(m, zc1, zs1, yc, ys, zc2, zs2, y, yr); + } + + inline void sh4_rot(REAL m[3 * 3], REAL y[NSH4], REAL yr[NSH4]) + { + REAL zc1, zs1, yc, ys, zc2, zs2; + zyz(m, zc1, zs1, yc, ys, zc2, zs2); + sh4_rot(m, zc1, zs1, yc, ys, zc2, zs2, y, yr); + } + + inline void sh5_rot(REAL m[3 * 3], REAL y[NSH5], REAL yr[NSH5]) + { + REAL zc1, zs1, yc, ys, zc2, zs2; + zyz(m, zc1, zs1, yc, ys, zc2, zs2); + sh5_rot(m, zc1, zs1, yc, ys, zc2, zs2, y, yr); + } + + // simple matrix vector multiply for a square matrix (only used by ZRotation) + inline void SimpMatMul(size_t dim, const float *matrix, const float *input, float *result) + { + for (size_t iR = 0; iR < dim; ++iR) + { + result[iR + 0] = matrix[iR*dim + 0] * input[0]; + for (size_t iC = 1; iC < dim; ++iC) + { + result[iR] += matrix[iR*dim + iC] * input[iC]; + } + } + } + +}; // anonymous namespace + + +//------------------------------------------------------------------------------------- +// Evaluates the Spherical Harmonic basis functions +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb205448.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* XM_CALLCONV DirectX::XMSHEvalDirection( + float *result, + size_t order, + FXMVECTOR dir) noexcept +{ + if (!result) + return nullptr; + + XMFLOAT4A dv; + XMStoreFloat4A(&dv, dir); + + const float fX = dv.x; + const float fY = dv.y; + const float fZ = dv.z; + + switch (order) + { + case 2: + sh_eval_basis_1(fX, fY, fZ, result); + break; + + case 3: + sh_eval_basis_2(fX, fY, fZ, result); + break; + + case 4: + sh_eval_basis_3(fX, fY, fZ, result); + break; + + case 5: + sh_eval_basis_4(fX, fY, fZ, result); + break; + + case 6: + sh_eval_basis_5(fX, fY, fZ, result); + break; + + default: + assert(order < XM_SH_MINORDER || order > XM_SH_MAXORDER); + return nullptr; + } + + return result; +} + + +//------------------------------------------------------------------------------------- +// Rotates SH vector by a rotation matrix +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb204992.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* XM_CALLCONV DirectX::XMSHRotate( + float *result, + size_t order, + FXMMATRIX rotMatrix, + const float *input) noexcept +{ + if (!result || !input) + return nullptr; + + if (result == input) + return nullptr; + + XMFLOAT3X3 mat; + XMStoreFloat3x3(&mat, rotMatrix); + + float mRot[3 * 3]; + const float r00 = mRot[0 * 3 + 0] = mat._11; + const float r10 = mRot[1 * 3 + 0] = mat._12; + const float r20 = mRot[2 * 3 + 0] = mat._13; + + const float r01 = mRot[0 * 3 + 1] = mat._21; + const float r11 = mRot[1 * 3 + 1] = mat._22; + const float r21 = mRot[2 * 3 + 1] = mat._23; + + const float r02 = mRot[0 * 3 + 2] = mat._31; + const float r12 = mRot[1 * 3 + 2] = mat._32; + const float r22 = mRot[2 * 3 + 2] = mat._33; + + result[0] = input[0]; // rotate the constant term + + switch (order) + { + case 2: + { + // do linear by hand... + + result[1] = r11*input[1] - r12*input[2] + r10*input[3]; + result[2] = -r21*input[1] + r22*input[2] - r20*input[3]; + result[3] = r01*input[1] - r02*input[2] + r00*input[3]; + } + break; + + case 3: + { + float R[25]; + // do linear by hand... + + result[1] = r11*input[1] - r12*input[2] + r10*input[3]; + result[2] = -r21*input[1] + r22*input[2] - r20*input[3]; + result[3] = r01*input[1] - r02*input[2] + r00*input[3]; + + // direct code for quadratics is faster than ZYZ reccurence relations + + const float t41 = r01 * r00; + const float t43 = r11 * r10; + const float t48 = r11 * r12; + const float t50 = r01 * r02; + const float t55 = r02 * r02; + const float t57 = r22 * r22; + const float t58 = r12 * r12; + const float t61 = r00 * r02; + const float t63 = r10 * r12; + const float t68 = r10 * r10; + const float t70 = r01 * r01; + const float t72 = r11 * r11; + const float t74 = r00 * r00; + const float t76 = r21 * r21; + const float t78 = r20 * r20; + + const float v173 = 0.1732050808e1f; + const float v577 = 0.5773502693e0f; + const float v115 = 0.1154700539e1f; + const float v288 = 0.2886751347e0f; + const float v866 = 0.8660254040e0f; + + R[0] = r11 * r00 + r01 * r10; + R[1] = -r01 * r12 - r11 * r02; + R[2] = v173 * r02 * r12; + R[3] = -r10 * r02 - r00 * r12; + R[4] = r00 * r10 - r01 * r11; + R[5] = -r11 * r20 - r21 * r10; + R[6] = r11 * r22 + r21 * r12; + R[7] = -v173 * r22 * r12; + R[8] = r20 * r12 + r10 * r22; + R[9] = -r10 * r20 + r11 * r21; + R[10] = -v577* (t41 + t43) + v115 * r21 * r20; + R[11] = v577* (t48 + t50) - v115 * r21 * r22; + R[12] = -0.5000000000e0f * (t55 + t58) + t57; + R[13] = v577 * (t61 + t63) - v115 * r20 * r22; + R[14] = v288 * (t70 - t68 + t72 - t74) - v577 * (t76 - t78); + R[15] = -r01 * r20 - r21 * r00; + R[16] = r01 * r22 + r21 * r02; + R[17] = -v173 * r22 * r02; + R[18] = r00 * r22 + r20 * r02; + R[19] = -r00 * r20 + r01 * r21; + R[20] = t41 - t43; + R[21] = -t50 + t48; + R[22] = v866 * (t55 - t58); + R[23] = t63 - t61; + R[24] = 0.5000000000e0f *(t74 - t68 - t70 + t72); + + // blow the matrix multiply out by hand, looping is ineficient on a P4... + for (unsigned int iR = 0; iR < 5; iR++) + { + const unsigned int uBase = iR * 5; + result[4 + iR] = R[uBase + 0] * input[4] + R[uBase + 1] * input[5] + R[uBase + 2] * input[6] + R[uBase + 3] * input[7] + R[uBase + 4] * input[8]; + } + } + break; + + case 4: + sh3_rot(mRot, const_cast(input), result); + break; + + case 5: + sh4_rot(mRot, const_cast(input), result); + break; + + case 6: + sh5_rot(mRot, const_cast(input), result); + break; + + default: + assert(order < XM_SH_MINORDER || order > XM_SH_MAXORDER); + return nullptr; + } + + return result; +} + + +//------------------------------------------------------------------------------------- +// Rotates the SH vector in the Z axis by an angle +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb205461.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHRotateZ( + float *result, + size_t order, + float angle, + const float *input) noexcept +{ + if (!result || !input) + return nullptr; + + if (result == input) + return nullptr; + + if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER) + return nullptr; + + float R[(2 * (XM_SH_MAXORDER - 1) + 1)*(2 * (XM_SH_MAXORDER - 1) + 1)]; // used to store rotation matrices... + + // these are actually very sparse matrices, most of the entries are zero's... + + const float ca = cosf(angle); + const float sa = sinf(angle); + + const float t1 = ca; + const float t2 = sa; + R[0] = t1; + R[1] = 0.0f; + R[2] = t2; + R[3] = 0.0f; + R[4] = 1.0f; + R[5] = 0.0f; + R[6] = -t2; + R[7] = 0.0f; + R[8] = t1; + + result[0] = input[0]; + SimpMatMul(3, R, input + 1, result + 1); + + if (order > 2) + { + for (int j = 0; j < 5 * 5; j++) R[j] = 0.0f; + const float t1 = sa; + const float t2 = t1*t1; + const float t3 = ca; + const float t4 = t3*t3; + const float t5 = -t2 + t4; + const float t7 = 2.0f*t3*t1; + R[0] = t5; + R[4] = t7; + R[6] = t3; + R[8] = t1; + R[12] = 1.0f; + R[16] = -t1; + R[18] = t3; + R[20] = -t7; + R[24] = t5; + + SimpMatMul(5, R, input + 4, result + 4); // un-roll matrix/vector multiply + if (order > 3) + { + for (int j = 0; j < 7 * 7; j++) R[j] = 0.0f; + const float t1 = ca; + const float t2 = t1*t1; + const float t4 = sa; + const float t5 = t4*t4; + const float t8 = t2*t1 - 3.0f*t1*t5; + const float t12 = 3.0f*t4*t2 - t5*t4; + const float t13 = -t5 + t2; + const float t15 = 2.0f*t1*t4; + R[0] = t8; + R[6] = t12; + R[8] = t13; + R[12] = t15; + R[16] = t1; + R[18] = t4; + R[24] = 1.0f; + R[30] = -t4; + R[32] = t1; + R[36] = -t15; + R[40] = t13; + R[42] = -t12; + R[48] = t8; + SimpMatMul(7, R, input + 9, result + 9); + if (order > 4) + { + for (int j = 0; j <= 9 * 9; j++) R[j] = 0.0f; + const float t1 = ca; + const float t2 = t1*t1; + const float t3 = t2*t2; + const float t4 = sa; + const float t5 = t4*t4; + const float t6 = t5*t5; + const float t9 = t3 + t6 - 6.0f*t5*t2; + const float t10 = t5*t4; + const float t12 = t2*t1; + const float t14 = -t10*t1 + t4*t12; + const float t17 = t12 - 3.0f*t1*t5; + const float t20 = 3.0f*t4*t2 - t10; + const float t21 = -t5 + t2; + const float t23 = 2.0f*t1*t4; + R[0] = t9; + R[8] = 4.0f*t14; + R[10] = t17; + R[16] = t20; + R[20] = t21; + R[24] = t23; + R[30] = t1; + R[32] = t4; + R[40] = 1.0f; + R[48] = -t4; + R[50] = t1; + R[56] = -t23; + R[60] = t21; + R[64] = -t20; + R[70] = t17; + R[72] = -4.0f*t14; + R[80] = t9; + + SimpMatMul(9, R, input + 16, result + 16); + if (order > 5) + { + for (int j = 0; j < 11 * 11; j++) R[j] = 0.0f; + const float t1 = ca; + const float t2 = sa; + const float t3 = t2*t2; + const float t4 = t3*t3; + const float t7 = t1*t1; + const float t8 = t7*t1; + const float t11 = t7*t7; + const float t13 = 5.0f*t1*t4 - 10.0f*t3*t8 + t11*t1; + const float t14 = t3*t2; + const float t20 = -10.0f*t14*t7 + 5.0f*t2*t11 + t4*t2; + const float t23 = t11 + t4 - 6.0f*t3*t7; + const float t26 = -t14*t1 + t2*t8; + const float t29 = t8 - 3.0f*t1*t3; + const float t32 = 3.0f*t2*t7 - t14; + const float t33 = -t3 + t7; + const float t35 = 2.0f*t1*t2; + R[0] = t13; + R[10] = t20; + R[12] = t23; + R[20] = 4.0f*t26; + R[24] = t29; + R[30] = t32; + R[36] = t33; + R[40] = t35; + R[48] = t1; + R[50] = t2; + R[60] = 1.0f; + R[70] = -t2; + R[72] = t1; + R[80] = -t35; + R[84] = t33; + R[90] = -t32; + R[96] = t29; + R[100] = -4.0f*t26; + R[108] = t23; + R[110] = -t20; + R[120] = t13; + SimpMatMul(11, R, input + 25, result + 25); + } + } + } + } + + return result; +} + + +//------------------------------------------------------------------------------------- +// Adds two SH vectors, result[i] = inputA[i] + inputB[i]; +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb205438.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHAdd( + float *result, + size_t order, + const float *inputA, + const float *inputB) noexcept +{ + if (!result || !inputA || !inputB) + return nullptr; + + const size_t numcoeff = order*order; + + for (size_t i = 0; i < numcoeff; ++i) + { + result[i] = inputA[i] + inputB[i]; + } + + return result; +} + + +//------------------------------------------------------------------------------------- +// Scales a SH vector, result[i] = input[i] * scale; +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb204994.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHScale( + float *result, + size_t order, + const float *input, + float scale) noexcept +{ + if (!result || !input) + return nullptr; + + const size_t numcoeff = order*order; + + for (size_t i = 0; i < numcoeff; ++i) + { + result[i] = scale * input[i]; + } + + return result; +} + + +//------------------------------------------------------------------------------------- +// Computes the dot product of two SH vectors +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb205446.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float DirectX::XMSHDot( + size_t order, + const float *inputA, + const float *inputB) noexcept +{ + if (!inputA || !inputB) + return 0.f; + + float result = inputA[0] * inputB[0]; + + const size_t numcoeff = order*order; + + for (size_t i = 1; i < numcoeff; ++i) + { + result += inputA[i] * inputB[i]; + } + + return result; +} + + +//------------------------------------------------------------------------------------- +// Computes the product of two functions represented using SH (f and g), where: +// result[i] = int(y_i(s) * f(s) * g(s)), where y_i(s) is the ith SH basis +// function, f(s) and g(s) are SH functions (sum_i(y_i(s)*c_i)). The order O +// determines the lengths of the arrays, where there should always be O^2 +// coefficients. In general the product of two SH functions of order O generates +// and SH function of order 2*O - 1, but we truncate the result. This means +// that the product commutes (f*g == g*f) but doesn't associate +// (f*(g*h) != (f*g)*h. +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHMultiply( + float *result, + size_t order, + const float *inputF, + const float *inputG) noexcept +{ + switch (order) + { + case 2: + return XMSHMultiply2(result, inputF, inputG); + + case 3: + return XMSHMultiply3(result, inputF, inputG); + + case 4: + return XMSHMultiply4(result, inputF, inputG); + + case 5: + return XMSHMultiply5(result, inputF, inputG); + + case 6: + return XMSHMultiply6(result, inputF, inputG); + + default: + assert(order < XM_SH_MINORDER || order > XM_SH_MAXORDER); + return nullptr; + } +} + + +//------------------------------------------------------------------------------------- +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb205454.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHMultiply2( + float *y, + const float *f, + const float *g) noexcept +{ + if (!y || !f || !g) + return nullptr; + + REAL tf, tg, t; + // [0,0]: 0, + y[0] = CONSTANT(0.282094792935999980)*f[0] * g[0]; + + // [1,1]: 0, + tf = CONSTANT(0.282094791773000010)*f[0]; + tg = CONSTANT(0.282094791773000010)*g[0]; + y[1] = tf*g[1] + tg*f[1]; + t = f[1] * g[1]; + y[0] += CONSTANT(0.282094791773000010)*t; + + // [2,2]: 0, + tf = CONSTANT(0.282094795249000000)*f[0]; + tg = CONSTANT(0.282094795249000000)*g[0]; + y[2] = tf*g[2] + tg*f[2]; + t = f[2] * g[2]; + y[0] += CONSTANT(0.282094795249000000)*t; + + // [3,3]: 0, + tf = CONSTANT(0.282094791773000010)*f[0]; + tg = CONSTANT(0.282094791773000010)*g[0]; + y[3] = tf*g[3] + tg*f[3]; + t = f[3] * g[3]; + y[0] += CONSTANT(0.282094791773000010)*t; + + // multiply count=20 + + return y; +} + + +//------------------------------------------------------------------------------------- +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb232906.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHMultiply3( + float *y, + const float *f, + const float *g) noexcept +{ + if (!y || !f || !g) + return nullptr; + + REAL tf, tg, t; + // [0,0]: 0, + y[0] = CONSTANT(0.282094792935999980)*f[0] * g[0]; + + // [1,1]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(-0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(-0.218509686119999990)*g[8]; + y[1] = tf*g[1] + tg*f[1]; + t = f[1] * g[1]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] = CONSTANT(-0.126156626101000010)*t; + y[8] = CONSTANT(-0.218509686119999990)*t; + + // [1,2]: 5, + tf = CONSTANT(0.218509686118000010)*f[5]; + tg = CONSTANT(0.218509686118000010)*g[5]; + y[1] += tf*g[2] + tg*f[2]; + y[2] = tf*g[1] + tg*f[1]; + t = f[1] * g[2] + f[2] * g[1]; + y[5] = CONSTANT(0.218509686118000010)*t; + + // [1,3]: 4, + tf = CONSTANT(0.218509686114999990)*f[4]; + tg = CONSTANT(0.218509686114999990)*g[4]; + y[1] += tf*g[3] + tg*f[3]; + y[3] = tf*g[1] + tg*f[1]; + t = f[1] * g[3] + f[3] * g[1]; + y[4] = CONSTANT(0.218509686114999990)*t; + + // [2,2]: 0,6, + tf = CONSTANT(0.282094795249000000)*f[0] + CONSTANT(0.252313259986999990)*f[6]; + tg = CONSTANT(0.282094795249000000)*g[0] + CONSTANT(0.252313259986999990)*g[6]; + y[2] += tf*g[2] + tg*f[2]; + t = f[2] * g[2]; + y[0] += CONSTANT(0.282094795249000000)*t; + y[6] += CONSTANT(0.252313259986999990)*t; + + // [2,3]: 7, + tf = CONSTANT(0.218509686118000010)*f[7]; + tg = CONSTANT(0.218509686118000010)*g[7]; + y[2] += tf*g[3] + tg*f[3]; + y[3] += tf*g[2] + tg*f[2]; + t = f[2] * g[3] + f[3] * g[2]; + y[7] = CONSTANT(0.218509686118000010)*t; + + // [3,3]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(0.218509686119999990)*g[8]; + y[3] += tf*g[3] + tg*f[3]; + t = f[3] * g[3]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] += CONSTANT(-0.126156626101000010)*t; + y[8] += CONSTANT(0.218509686119999990)*t; + + // [4,4]: 0,6, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6]; + y[4] += tf*g[4] + tg*f[4]; + t = f[4] * g[4]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + + // [4,5]: 7, + tf = CONSTANT(0.156078347226000000)*f[7]; + tg = CONSTANT(0.156078347226000000)*g[7]; + y[4] += tf*g[5] + tg*f[5]; + y[5] += tf*g[4] + tg*f[4]; + t = f[4] * g[5] + f[5] * g[4]; + y[7] += CONSTANT(0.156078347226000000)*t; + + // [5,5]: 0,6,8, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.090111875786499998)*f[6] + CONSTANT(-0.156078347227999990)*f[8]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.090111875786499998)*g[6] + CONSTANT(-0.156078347227999990)*g[8]; + y[5] += tf*g[5] + tg*f[5]; + t = f[5] * g[5]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.090111875786499998)*t; + y[8] += CONSTANT(-0.156078347227999990)*t; + + // [6,6]: 0,6, + tf = CONSTANT(0.282094797560000000)*f[0]; + tg = CONSTANT(0.282094797560000000)*g[0]; + y[6] += tf*g[6] + tg*f[6]; + t = f[6] * g[6]; + y[0] += CONSTANT(0.282094797560000000)*t; + y[6] += CONSTANT(0.180223764527000010)*t; + + // [7,7]: 0,6,8, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.090111875786499998)*f[6] + CONSTANT(0.156078347227999990)*f[8]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.090111875786499998)*g[6] + CONSTANT(0.156078347227999990)*g[8]; + y[7] += tf*g[7] + tg*f[7]; + t = f[7] * g[7]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.090111875786499998)*t; + y[8] += CONSTANT(0.156078347227999990)*t; + + // [8,8]: 0,6, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6]; + y[8] += tf*g[8] + tg*f[8]; + t = f[8] * g[8]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + + // multiply count=120 + + return y; +} + + +//------------------------------------------------------------------------------------- +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb232907.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHMultiply4( + float *y, + const float *f, + const float *g) noexcept +{ + if (!y || !f || !g) + return nullptr; + + REAL tf, tg, t; + // [0,0]: 0, + y[0] = CONSTANT(0.282094792935999980)*f[0] * g[0]; + + // [1,1]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(-0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(-0.218509686119999990)*g[8]; + y[1] = tf*g[1] + tg*f[1]; + t = f[1] * g[1]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] = CONSTANT(-0.126156626101000010)*t; + y[8] = CONSTANT(-0.218509686119999990)*t; + + // [1,4]: 3,13,15, + tf = CONSTANT(0.218509686114999990)*f[3] + CONSTANT(-0.058399170082300000)*f[13] + CONSTANT(-0.226179013157999990)*f[15]; + tg = CONSTANT(0.218509686114999990)*g[3] + CONSTANT(-0.058399170082300000)*g[13] + CONSTANT(-0.226179013157999990)*g[15]; + y[1] += tf*g[4] + tg*f[4]; + y[4] = tf*g[1] + tg*f[1]; + t = f[1] * g[4] + f[4] * g[1]; + y[3] = CONSTANT(0.218509686114999990)*t; + y[13] = CONSTANT(-0.058399170082300000)*t; + y[15] = CONSTANT(-0.226179013157999990)*t; + + // [1,5]: 2,12,14, + tf = CONSTANT(0.218509686118000010)*f[2] + CONSTANT(-0.143048168103000000)*f[12] + CONSTANT(-0.184674390923000000)*f[14]; + tg = CONSTANT(0.218509686118000010)*g[2] + CONSTANT(-0.143048168103000000)*g[12] + CONSTANT(-0.184674390923000000)*g[14]; + y[1] += tf*g[5] + tg*f[5]; + y[5] = tf*g[1] + tg*f[1]; + t = f[1] * g[5] + f[5] * g[1]; + y[2] = CONSTANT(0.218509686118000010)*t; + y[12] = CONSTANT(-0.143048168103000000)*t; + y[14] = CONSTANT(-0.184674390923000000)*t; + + // [1,6]: 11, + tf = CONSTANT(0.202300659402999990)*f[11]; + tg = CONSTANT(0.202300659402999990)*g[11]; + y[1] += tf*g[6] + tg*f[6]; + y[6] += tf*g[1] + tg*f[1]; + t = f[1] * g[6] + f[6] * g[1]; + y[11] = CONSTANT(0.202300659402999990)*t; + + // [1,8]: 9,11, + tf = CONSTANT(0.226179013155000000)*f[9] + CONSTANT(0.058399170081799998)*f[11]; + tg = CONSTANT(0.226179013155000000)*g[9] + CONSTANT(0.058399170081799998)*g[11]; + y[1] += tf*g[8] + tg*f[8]; + y[8] += tf*g[1] + tg*f[1]; + t = f[1] * g[8] + f[8] * g[1]; + y[9] = CONSTANT(0.226179013155000000)*t; + y[11] += CONSTANT(0.058399170081799998)*t; + + // [2,2]: 0,6, + tf = CONSTANT(0.282094795249000000)*f[0] + CONSTANT(0.252313259986999990)*f[6]; + tg = CONSTANT(0.282094795249000000)*g[0] + CONSTANT(0.252313259986999990)*g[6]; + y[2] += tf*g[2] + tg*f[2]; + t = f[2] * g[2]; + y[0] += CONSTANT(0.282094795249000000)*t; + y[6] += CONSTANT(0.252313259986999990)*t; + + // [2,6]: 12, + tf = CONSTANT(0.247766706973999990)*f[12]; + tg = CONSTANT(0.247766706973999990)*g[12]; + y[2] += tf*g[6] + tg*f[6]; + y[6] += tf*g[2] + tg*f[2]; + t = f[2] * g[6] + f[6] * g[2]; + y[12] += CONSTANT(0.247766706973999990)*t; + + // [3,3]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(0.218509686119999990)*g[8]; + y[3] += tf*g[3] + tg*f[3]; + t = f[3] * g[3]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] += CONSTANT(-0.126156626101000010)*t; + y[8] += CONSTANT(0.218509686119999990)*t; + + // [3,6]: 13, + tf = CONSTANT(0.202300659402999990)*f[13]; + tg = CONSTANT(0.202300659402999990)*g[13]; + y[3] += tf*g[6] + tg*f[6]; + y[6] += tf*g[3] + tg*f[3]; + t = f[3] * g[6] + f[6] * g[3]; + y[13] += CONSTANT(0.202300659402999990)*t; + + // [3,7]: 2,12,14, + tf = CONSTANT(0.218509686118000010)*f[2] + CONSTANT(-0.143048168103000000)*f[12] + CONSTANT(0.184674390923000000)*f[14]; + tg = CONSTANT(0.218509686118000010)*g[2] + CONSTANT(-0.143048168103000000)*g[12] + CONSTANT(0.184674390923000000)*g[14]; + y[3] += tf*g[7] + tg*f[7]; + y[7] = tf*g[3] + tg*f[3]; + t = f[3] * g[7] + f[7] * g[3]; + y[2] += CONSTANT(0.218509686118000010)*t; + y[12] += CONSTANT(-0.143048168103000000)*t; + y[14] += CONSTANT(0.184674390923000000)*t; + + // [3,8]: 13,15, + tf = CONSTANT(-0.058399170081799998)*f[13] + CONSTANT(0.226179013155000000)*f[15]; + tg = CONSTANT(-0.058399170081799998)*g[13] + CONSTANT(0.226179013155000000)*g[15]; + y[3] += tf*g[8] + tg*f[8]; + y[8] += tf*g[3] + tg*f[3]; + t = f[3] * g[8] + f[8] * g[3]; + y[13] += CONSTANT(-0.058399170081799998)*t; + y[15] += CONSTANT(0.226179013155000000)*t; + + // [4,4]: 0,6, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6]; + y[4] += tf*g[4] + tg*f[4]; + t = f[4] * g[4]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + + // [4,5]: 7, + tf = CONSTANT(0.156078347226000000)*f[7]; + tg = CONSTANT(0.156078347226000000)*g[7]; + y[4] += tf*g[5] + tg*f[5]; + y[5] += tf*g[4] + tg*f[4]; + t = f[4] * g[5] + f[5] * g[4]; + y[7] += CONSTANT(0.156078347226000000)*t; + + // [4,9]: 3,13, + tf = CONSTANT(0.226179013157999990)*f[3] + CONSTANT(-0.094031597258400004)*f[13]; + tg = CONSTANT(0.226179013157999990)*g[3] + CONSTANT(-0.094031597258400004)*g[13]; + y[4] += tf*g[9] + tg*f[9]; + y[9] += tf*g[4] + tg*f[4]; + t = f[4] * g[9] + f[9] * g[4]; + y[3] += CONSTANT(0.226179013157999990)*t; + y[13] += CONSTANT(-0.094031597258400004)*t; + + // [4,10]: 2,12, + tf = CONSTANT(0.184674390919999990)*f[2] + CONSTANT(-0.188063194517999990)*f[12]; + tg = CONSTANT(0.184674390919999990)*g[2] + CONSTANT(-0.188063194517999990)*g[12]; + y[4] += tf*g[10] + tg*f[10]; + y[10] = tf*g[4] + tg*f[4]; + t = f[4] * g[10] + f[10] * g[4]; + y[2] += CONSTANT(0.184674390919999990)*t; + y[12] += CONSTANT(-0.188063194517999990)*t; + + // [4,11]: 3,13,15, + tf = CONSTANT(-0.058399170082300000)*f[3] + CONSTANT(0.145673124078000010)*f[13] + CONSTANT(0.094031597258400004)*f[15]; + tg = CONSTANT(-0.058399170082300000)*g[3] + CONSTANT(0.145673124078000010)*g[13] + CONSTANT(0.094031597258400004)*g[15]; + y[4] += tf*g[11] + tg*f[11]; + y[11] += tf*g[4] + tg*f[4]; + t = f[4] * g[11] + f[11] * g[4]; + y[3] += CONSTANT(-0.058399170082300000)*t; + y[13] += CONSTANT(0.145673124078000010)*t; + y[15] += CONSTANT(0.094031597258400004)*t; + + // [5,5]: 0,6,8, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.090111875786499998)*f[6] + CONSTANT(-0.156078347227999990)*f[8]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.090111875786499998)*g[6] + CONSTANT(-0.156078347227999990)*g[8]; + y[5] += tf*g[5] + tg*f[5]; + t = f[5] * g[5]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.090111875786499998)*t; + y[8] += CONSTANT(-0.156078347227999990)*t; + + // [5,9]: 14, + tf = CONSTANT(0.148677009677999990)*f[14]; + tg = CONSTANT(0.148677009677999990)*g[14]; + y[5] += tf*g[9] + tg*f[9]; + y[9] += tf*g[5] + tg*f[5]; + t = f[5] * g[9] + f[9] * g[5]; + y[14] += CONSTANT(0.148677009677999990)*t; + + // [5,10]: 3,13,15, + tf = CONSTANT(0.184674390919999990)*f[3] + CONSTANT(0.115164716490000000)*f[13] + CONSTANT(-0.148677009678999990)*f[15]; + tg = CONSTANT(0.184674390919999990)*g[3] + CONSTANT(0.115164716490000000)*g[13] + CONSTANT(-0.148677009678999990)*g[15]; + y[5] += tf*g[10] + tg*f[10]; + y[10] += tf*g[5] + tg*f[5]; + t = f[5] * g[10] + f[10] * g[5]; + y[3] += CONSTANT(0.184674390919999990)*t; + y[13] += CONSTANT(0.115164716490000000)*t; + y[15] += CONSTANT(-0.148677009678999990)*t; + + // [5,11]: 2,12,14, + tf = CONSTANT(0.233596680327000010)*f[2] + CONSTANT(0.059470803871800003)*f[12] + CONSTANT(-0.115164716491000000)*f[14]; + tg = CONSTANT(0.233596680327000010)*g[2] + CONSTANT(0.059470803871800003)*g[12] + CONSTANT(-0.115164716491000000)*g[14]; + y[5] += tf*g[11] + tg*f[11]; + y[11] += tf*g[5] + tg*f[5]; + t = f[5] * g[11] + f[11] * g[5]; + y[2] += CONSTANT(0.233596680327000010)*t; + y[12] += CONSTANT(0.059470803871800003)*t; + y[14] += CONSTANT(-0.115164716491000000)*t; + + // [6,6]: 0,6, + tf = CONSTANT(0.282094797560000000)*f[0]; + tg = CONSTANT(0.282094797560000000)*g[0]; + y[6] += tf*g[6] + tg*f[6]; + t = f[6] * g[6]; + y[0] += CONSTANT(0.282094797560000000)*t; + y[6] += CONSTANT(0.180223764527000010)*t; + + // [7,7]: 6,0,8, + tf = CONSTANT(0.090111875786499998)*f[6] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.156078347227999990)*f[8]; + tg = CONSTANT(0.090111875786499998)*g[6] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.156078347227999990)*g[8]; + y[7] += tf*g[7] + tg*f[7]; + t = f[7] * g[7]; + y[6] += CONSTANT(0.090111875786499998)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.156078347227999990)*t; + + // [7,10]: 9,1,11, + tf = CONSTANT(0.148677009678999990)*f[9] + CONSTANT(0.184674390919999990)*f[1] + CONSTANT(0.115164716490000000)*f[11]; + tg = CONSTANT(0.148677009678999990)*g[9] + CONSTANT(0.184674390919999990)*g[1] + CONSTANT(0.115164716490000000)*g[11]; + y[7] += tf*g[10] + tg*f[10]; + y[10] += tf*g[7] + tg*f[7]; + t = f[7] * g[10] + f[10] * g[7]; + y[9] += CONSTANT(0.148677009678999990)*t; + y[1] += CONSTANT(0.184674390919999990)*t; + y[11] += CONSTANT(0.115164716490000000)*t; + + // [7,13]: 12,2,14, + tf = CONSTANT(0.059470803871800003)*f[12] + CONSTANT(0.233596680327000010)*f[2] + CONSTANT(0.115164716491000000)*f[14]; + tg = CONSTANT(0.059470803871800003)*g[12] + CONSTANT(0.233596680327000010)*g[2] + CONSTANT(0.115164716491000000)*g[14]; + y[7] += tf*g[13] + tg*f[13]; + y[13] += tf*g[7] + tg*f[7]; + t = f[7] * g[13] + f[13] * g[7]; + y[12] += CONSTANT(0.059470803871800003)*t; + y[2] += CONSTANT(0.233596680327000010)*t; + y[14] += CONSTANT(0.115164716491000000)*t; + + // [7,14]: 15, + tf = CONSTANT(0.148677009677999990)*f[15]; + tg = CONSTANT(0.148677009677999990)*g[15]; + y[7] += tf*g[14] + tg*f[14]; + y[14] += tf*g[7] + tg*f[7]; + t = f[7] * g[14] + f[14] * g[7]; + y[15] += CONSTANT(0.148677009677999990)*t; + + // [8,8]: 0,6, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6]; + y[8] += tf*g[8] + tg*f[8]; + t = f[8] * g[8]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + + // [8,9]: 11, + tf = CONSTANT(-0.094031597259499999)*f[11]; + tg = CONSTANT(-0.094031597259499999)*g[11]; + y[8] += tf*g[9] + tg*f[9]; + y[9] += tf*g[8] + tg*f[8]; + t = f[8] * g[9] + f[9] * g[8]; + y[11] += CONSTANT(-0.094031597259499999)*t; + + // [8,13]: 15, + tf = CONSTANT(-0.094031597259499999)*f[15]; + tg = CONSTANT(-0.094031597259499999)*g[15]; + y[8] += tf*g[13] + tg*f[13]; + y[13] += tf*g[8] + tg*f[8]; + t = f[8] * g[13] + f[13] * g[8]; + y[15] += CONSTANT(-0.094031597259499999)*t; + + // [8,14]: 2,12, + tf = CONSTANT(0.184674390919999990)*f[2] + CONSTANT(-0.188063194517999990)*f[12]; + tg = CONSTANT(0.184674390919999990)*g[2] + CONSTANT(-0.188063194517999990)*g[12]; + y[8] += tf*g[14] + tg*f[14]; + y[14] += tf*g[8] + tg*f[8]; + t = f[8] * g[14] + f[14] * g[8]; + y[2] += CONSTANT(0.184674390919999990)*t; + y[12] += CONSTANT(-0.188063194517999990)*t; + + // [9,9]: 6,0, + tf = CONSTANT(-0.210261043508000010)*f[6] + CONSTANT(0.282094791766999970)*f[0]; + tg = CONSTANT(-0.210261043508000010)*g[6] + CONSTANT(0.282094791766999970)*g[0]; + y[9] += tf*g[9] + tg*f[9]; + t = f[9] * g[9]; + y[6] += CONSTANT(-0.210261043508000010)*t; + y[0] += CONSTANT(0.282094791766999970)*t; + + // [10,10]: 0, + tf = CONSTANT(0.282094791771999980)*f[0]; + tg = CONSTANT(0.282094791771999980)*g[0]; + y[10] += tf*g[10] + tg*f[10]; + t = f[10] * g[10]; + y[0] += CONSTANT(0.282094791771999980)*t; + + // [11,11]: 0,6,8, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.126156626101000010)*f[6] + CONSTANT(-0.145673124078999990)*f[8]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.126156626101000010)*g[6] + CONSTANT(-0.145673124078999990)*g[8]; + y[11] += tf*g[11] + tg*f[11]; + t = f[11] * g[11]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.126156626101000010)*t; + y[8] += CONSTANT(-0.145673124078999990)*t; + + // [12,12]: 0,6, + tf = CONSTANT(0.282094799871999980)*f[0] + CONSTANT(0.168208852954000010)*f[6]; + tg = CONSTANT(0.282094799871999980)*g[0] + CONSTANT(0.168208852954000010)*g[6]; + y[12] += tf*g[12] + tg*f[12]; + t = f[12] * g[12]; + y[0] += CONSTANT(0.282094799871999980)*t; + y[6] += CONSTANT(0.168208852954000010)*t; + + // [13,13]: 0,8,6, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.145673124078999990)*f[8] + CONSTANT(0.126156626101000010)*f[6]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.145673124078999990)*g[8] + CONSTANT(0.126156626101000010)*g[6]; + y[13] += tf*g[13] + tg*f[13]; + t = f[13] * g[13]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.145673124078999990)*t; + y[6] += CONSTANT(0.126156626101000010)*t; + + // [14,14]: 0, + tf = CONSTANT(0.282094791771999980)*f[0]; + tg = CONSTANT(0.282094791771999980)*g[0]; + y[14] += tf*g[14] + tg*f[14]; + t = f[14] * g[14]; + y[0] += CONSTANT(0.282094791771999980)*t; + + // [15,15]: 0,6, + tf = CONSTANT(0.282094791766999970)*f[0] + CONSTANT(-0.210261043508000010)*f[6]; + tg = CONSTANT(0.282094791766999970)*g[0] + CONSTANT(-0.210261043508000010)*g[6]; + y[15] += tf*g[15] + tg*f[15]; + t = f[15] * g[15]; + y[0] += CONSTANT(0.282094791766999970)*t; + y[6] += CONSTANT(-0.210261043508000010)*t; + + // multiply count=399 + + return y; +} + + +//------------------------------------------------------------------------------------- +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb232908.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHMultiply5( + float *y, + const float *f, + const float *g) noexcept +{ + if (!y || !f || !g) + return nullptr; + + REAL tf, tg, t; + // [0,0]: 0, + y[0] = CONSTANT(0.282094792935999980)*f[0] * g[0]; + + // [1,1]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(-0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(-0.218509686119999990)*g[8]; + y[1] = tf*g[1] + tg*f[1]; + t = f[1] * g[1]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] = CONSTANT(-0.126156626101000010)*t; + y[8] = CONSTANT(-0.218509686119999990)*t; + + // [1,4]: 3,13,15, + tf = CONSTANT(0.218509686114999990)*f[3] + CONSTANT(-0.058399170082300000)*f[13] + CONSTANT(-0.226179013157999990)*f[15]; + tg = CONSTANT(0.218509686114999990)*g[3] + CONSTANT(-0.058399170082300000)*g[13] + CONSTANT(-0.226179013157999990)*g[15]; + y[1] += tf*g[4] + tg*f[4]; + y[4] = tf*g[1] + tg*f[1]; + t = f[1] * g[4] + f[4] * g[1]; + y[3] = CONSTANT(0.218509686114999990)*t; + y[13] = CONSTANT(-0.058399170082300000)*t; + y[15] = CONSTANT(-0.226179013157999990)*t; + + // [1,5]: 2,12,14, + tf = CONSTANT(0.218509686118000010)*f[2] + CONSTANT(-0.143048168103000000)*f[12] + CONSTANT(-0.184674390923000000)*f[14]; + tg = CONSTANT(0.218509686118000010)*g[2] + CONSTANT(-0.143048168103000000)*g[12] + CONSTANT(-0.184674390923000000)*g[14]; + y[1] += tf*g[5] + tg*f[5]; + y[5] = tf*g[1] + tg*f[1]; + t = f[1] * g[5] + f[5] * g[1]; + y[2] = CONSTANT(0.218509686118000010)*t; + y[12] = CONSTANT(-0.143048168103000000)*t; + y[14] = CONSTANT(-0.184674390923000000)*t; + + // [1,9]: 8,22,24, + tf = CONSTANT(0.226179013155000000)*f[8] + CONSTANT(-0.043528171378199997)*f[22] + CONSTANT(-0.230329432978999990)*f[24]; + tg = CONSTANT(0.226179013155000000)*g[8] + CONSTANT(-0.043528171378199997)*g[22] + CONSTANT(-0.230329432978999990)*g[24]; + y[1] += tf*g[9] + tg*f[9]; + y[9] = tf*g[1] + tg*f[1]; + t = f[1] * g[9] + f[9] * g[1]; + y[8] += CONSTANT(0.226179013155000000)*t; + y[22] = CONSTANT(-0.043528171378199997)*t; + y[24] = CONSTANT(-0.230329432978999990)*t; + + // [1,10]: 7,21,23, + tf = CONSTANT(0.184674390919999990)*f[7] + CONSTANT(-0.075393004386799994)*f[21] + CONSTANT(-0.199471140200000010)*f[23]; + tg = CONSTANT(0.184674390919999990)*g[7] + CONSTANT(-0.075393004386799994)*g[21] + CONSTANT(-0.199471140200000010)*g[23]; + y[1] += tf*g[10] + tg*f[10]; + y[10] = tf*g[1] + tg*f[1]; + t = f[1] * g[10] + f[10] * g[1]; + y[7] = CONSTANT(0.184674390919999990)*t; + y[21] = CONSTANT(-0.075393004386799994)*t; + y[23] = CONSTANT(-0.199471140200000010)*t; + + // [1,11]: 6,8,20,22, + tf = CONSTANT(0.202300659402999990)*f[6] + CONSTANT(0.058399170081799998)*f[8] + CONSTANT(-0.150786008773000000)*f[20] + CONSTANT(-0.168583882836999990)*f[22]; + tg = CONSTANT(0.202300659402999990)*g[6] + CONSTANT(0.058399170081799998)*g[8] + CONSTANT(-0.150786008773000000)*g[20] + CONSTANT(-0.168583882836999990)*g[22]; + y[1] += tf*g[11] + tg*f[11]; + y[11] = tf*g[1] + tg*f[1]; + t = f[1] * g[11] + f[11] * g[1]; + y[6] += CONSTANT(0.202300659402999990)*t; + y[8] += CONSTANT(0.058399170081799998)*t; + y[20] = CONSTANT(-0.150786008773000000)*t; + y[22] += CONSTANT(-0.168583882836999990)*t; + + // [1,12]: 19, + tf = CONSTANT(0.194663900273000010)*f[19]; + tg = CONSTANT(0.194663900273000010)*g[19]; + y[1] += tf*g[12] + tg*f[12]; + y[12] += tf*g[1] + tg*f[1]; + t = f[1] * g[12] + f[12] * g[1]; + y[19] = CONSTANT(0.194663900273000010)*t; + + // [1,13]: 18, + tf = CONSTANT(0.168583882834000000)*f[18]; + tg = CONSTANT(0.168583882834000000)*g[18]; + y[1] += tf*g[13] + tg*f[13]; + y[13] += tf*g[1] + tg*f[1]; + t = f[1] * g[13] + f[13] * g[1]; + y[18] = CONSTANT(0.168583882834000000)*t; + + // [1,14]: 17,19, + tf = CONSTANT(0.199471140196999990)*f[17] + CONSTANT(0.075393004386399995)*f[19]; + tg = CONSTANT(0.199471140196999990)*g[17] + CONSTANT(0.075393004386399995)*g[19]; + y[1] += tf*g[14] + tg*f[14]; + y[14] += tf*g[1] + tg*f[1]; + t = f[1] * g[14] + f[14] * g[1]; + y[17] = CONSTANT(0.199471140196999990)*t; + y[19] += CONSTANT(0.075393004386399995)*t; + + // [1,15]: 16,18, + tf = CONSTANT(0.230329432973999990)*f[16] + CONSTANT(0.043528171377799997)*f[18]; + tg = CONSTANT(0.230329432973999990)*g[16] + CONSTANT(0.043528171377799997)*g[18]; + y[1] += tf*g[15] + tg*f[15]; + y[15] += tf*g[1] + tg*f[1]; + t = f[1] * g[15] + f[15] * g[1]; + y[16] = CONSTANT(0.230329432973999990)*t; + y[18] += CONSTANT(0.043528171377799997)*t; + + // [2,2]: 0,6, + tf = CONSTANT(0.282094795249000000)*f[0] + CONSTANT(0.252313259986999990)*f[6]; + tg = CONSTANT(0.282094795249000000)*g[0] + CONSTANT(0.252313259986999990)*g[6]; + y[2] += tf*g[2] + tg*f[2]; + t = f[2] * g[2]; + y[0] += CONSTANT(0.282094795249000000)*t; + y[6] += CONSTANT(0.252313259986999990)*t; + + // [2,10]: 4,18, + tf = CONSTANT(0.184674390919999990)*f[4] + CONSTANT(0.213243618621000000)*f[18]; + tg = CONSTANT(0.184674390919999990)*g[4] + CONSTANT(0.213243618621000000)*g[18]; + y[2] += tf*g[10] + tg*f[10]; + y[10] += tf*g[2] + tg*f[2]; + t = f[2] * g[10] + f[10] * g[2]; + y[4] += CONSTANT(0.184674390919999990)*t; + y[18] += CONSTANT(0.213243618621000000)*t; + + // [2,12]: 6,20, + tf = CONSTANT(0.247766706973999990)*f[6] + CONSTANT(0.246232537174000010)*f[20]; + tg = CONSTANT(0.247766706973999990)*g[6] + CONSTANT(0.246232537174000010)*g[20]; + y[2] += tf*g[12] + tg*f[12]; + y[12] += tf*g[2] + tg*f[2]; + t = f[2] * g[12] + f[12] * g[2]; + y[6] += CONSTANT(0.247766706973999990)*t; + y[20] += CONSTANT(0.246232537174000010)*t; + + // [2,14]: 8,22, + tf = CONSTANT(0.184674390919999990)*f[8] + CONSTANT(0.213243618621000000)*f[22]; + tg = CONSTANT(0.184674390919999990)*g[8] + CONSTANT(0.213243618621000000)*g[22]; + y[2] += tf*g[14] + tg*f[14]; + y[14] += tf*g[2] + tg*f[2]; + t = f[2] * g[14] + f[14] * g[2]; + y[8] += CONSTANT(0.184674390919999990)*t; + y[22] += CONSTANT(0.213243618621000000)*t; + + // [3,3]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(0.218509686119999990)*g[8]; + y[3] += tf*g[3] + tg*f[3]; + t = f[3] * g[3]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] += CONSTANT(-0.126156626101000010)*t; + y[8] += CONSTANT(0.218509686119999990)*t; + + // [3,7]: 2,12,14, + tf = CONSTANT(0.218509686118000010)*f[2] + CONSTANT(-0.143048168103000000)*f[12] + CONSTANT(0.184674390923000000)*f[14]; + tg = CONSTANT(0.218509686118000010)*g[2] + CONSTANT(-0.143048168103000000)*g[12] + CONSTANT(0.184674390923000000)*g[14]; + y[3] += tf*g[7] + tg*f[7]; + y[7] += tf*g[3] + tg*f[3]; + t = f[3] * g[7] + f[7] * g[3]; + y[2] += CONSTANT(0.218509686118000010)*t; + y[12] += CONSTANT(-0.143048168103000000)*t; + y[14] += CONSTANT(0.184674390923000000)*t; + + // [3,9]: 4,16,18, + tf = CONSTANT(0.226179013157999990)*f[4] + CONSTANT(0.230329432973999990)*f[16] + CONSTANT(-0.043528171377799997)*f[18]; + tg = CONSTANT(0.226179013157999990)*g[4] + CONSTANT(0.230329432973999990)*g[16] + CONSTANT(-0.043528171377799997)*g[18]; + y[3] += tf*g[9] + tg*f[9]; + y[9] += tf*g[3] + tg*f[3]; + t = f[3] * g[9] + f[9] * g[3]; + y[4] += CONSTANT(0.226179013157999990)*t; + y[16] += CONSTANT(0.230329432973999990)*t; + y[18] += CONSTANT(-0.043528171377799997)*t; + + // [3,10]: 5,17,19, + tf = CONSTANT(0.184674390919999990)*f[5] + CONSTANT(0.199471140200000010)*f[17] + CONSTANT(-0.075393004386799994)*f[19]; + tg = CONSTANT(0.184674390919999990)*g[5] + CONSTANT(0.199471140200000010)*g[17] + CONSTANT(-0.075393004386799994)*g[19]; + y[3] += tf*g[10] + tg*f[10]; + y[10] += tf*g[3] + tg*f[3]; + t = f[3] * g[10] + f[10] * g[3]; + y[5] += CONSTANT(0.184674390919999990)*t; + y[17] += CONSTANT(0.199471140200000010)*t; + y[19] += CONSTANT(-0.075393004386799994)*t; + + // [3,12]: 21, + tf = CONSTANT(0.194663900273000010)*f[21]; + tg = CONSTANT(0.194663900273000010)*g[21]; + y[3] += tf*g[12] + tg*f[12]; + y[12] += tf*g[3] + tg*f[3]; + t = f[3] * g[12] + f[12] * g[3]; + y[21] += CONSTANT(0.194663900273000010)*t; + + // [3,13]: 8,6,20,22, + tf = CONSTANT(-0.058399170081799998)*f[8] + CONSTANT(0.202300659402999990)*f[6] + CONSTANT(-0.150786008773000000)*f[20] + CONSTANT(0.168583882836999990)*f[22]; + tg = CONSTANT(-0.058399170081799998)*g[8] + CONSTANT(0.202300659402999990)*g[6] + CONSTANT(-0.150786008773000000)*g[20] + CONSTANT(0.168583882836999990)*g[22]; + y[3] += tf*g[13] + tg*f[13]; + y[13] += tf*g[3] + tg*f[3]; + t = f[3] * g[13] + f[13] * g[3]; + y[8] += CONSTANT(-0.058399170081799998)*t; + y[6] += CONSTANT(0.202300659402999990)*t; + y[20] += CONSTANT(-0.150786008773000000)*t; + y[22] += CONSTANT(0.168583882836999990)*t; + + // [3,14]: 21,23, + tf = CONSTANT(-0.075393004386399995)*f[21] + CONSTANT(0.199471140196999990)*f[23]; + tg = CONSTANT(-0.075393004386399995)*g[21] + CONSTANT(0.199471140196999990)*g[23]; + y[3] += tf*g[14] + tg*f[14]; + y[14] += tf*g[3] + tg*f[3]; + t = f[3] * g[14] + f[14] * g[3]; + y[21] += CONSTANT(-0.075393004386399995)*t; + y[23] += CONSTANT(0.199471140196999990)*t; + + // [3,15]: 8,22,24, + tf = CONSTANT(0.226179013155000000)*f[8] + CONSTANT(-0.043528171378199997)*f[22] + CONSTANT(0.230329432978999990)*f[24]; + tg = CONSTANT(0.226179013155000000)*g[8] + CONSTANT(-0.043528171378199997)*g[22] + CONSTANT(0.230329432978999990)*g[24]; + y[3] += tf*g[15] + tg*f[15]; + y[15] += tf*g[3] + tg*f[3]; + t = f[3] * g[15] + f[15] * g[3]; + y[8] += CONSTANT(0.226179013155000000)*t; + y[22] += CONSTANT(-0.043528171378199997)*t; + y[24] += CONSTANT(0.230329432978999990)*t; + + // [4,4]: 0,6,20,24, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6] + CONSTANT(0.040299255967500003)*f[20] + CONSTANT(-0.238413613505999990)*f[24]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6] + CONSTANT(0.040299255967500003)*g[20] + CONSTANT(-0.238413613505999990)*g[24]; + y[4] += tf*g[4] + tg*f[4]; + t = f[4] * g[4]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + y[20] += CONSTANT(0.040299255967500003)*t; + y[24] += CONSTANT(-0.238413613505999990)*t; + + // [4,5]: 7,21,23, + tf = CONSTANT(0.156078347226000000)*f[7] + CONSTANT(-0.063718718434399996)*f[21] + CONSTANT(-0.168583882835000000)*f[23]; + tg = CONSTANT(0.156078347226000000)*g[7] + CONSTANT(-0.063718718434399996)*g[21] + CONSTANT(-0.168583882835000000)*g[23]; + y[4] += tf*g[5] + tg*f[5]; + y[5] += tf*g[4] + tg*f[4]; + t = f[4] * g[5] + f[5] * g[4]; + y[7] += CONSTANT(0.156078347226000000)*t; + y[21] += CONSTANT(-0.063718718434399996)*t; + y[23] += CONSTANT(-0.168583882835000000)*t; + + // [4,11]: 3,13,15, + tf = CONSTANT(-0.058399170082300000)*f[3] + CONSTANT(0.145673124078000010)*f[13] + CONSTANT(0.094031597258400004)*f[15]; + tg = CONSTANT(-0.058399170082300000)*g[3] + CONSTANT(0.145673124078000010)*g[13] + CONSTANT(0.094031597258400004)*g[15]; + y[4] += tf*g[11] + tg*f[11]; + y[11] += tf*g[4] + tg*f[4]; + t = f[4] * g[11] + f[11] * g[4]; + y[3] += CONSTANT(-0.058399170082300000)*t; + y[13] += CONSTANT(0.145673124078000010)*t; + y[15] += CONSTANT(0.094031597258400004)*t; + + // [4,16]: 8,22, + tf = CONSTANT(0.238413613494000000)*f[8] + CONSTANT(-0.075080816693699995)*f[22]; + tg = CONSTANT(0.238413613494000000)*g[8] + CONSTANT(-0.075080816693699995)*g[22]; + y[4] += tf*g[16] + tg*f[16]; + y[16] += tf*g[4] + tg*f[4]; + t = f[4] * g[16] + f[16] * g[4]; + y[8] += CONSTANT(0.238413613494000000)*t; + y[22] += CONSTANT(-0.075080816693699995)*t; + + // [4,18]: 6,20,24, + tf = CONSTANT(0.156078347226000000)*f[6] + CONSTANT(-0.190364615029000010)*f[20] + CONSTANT(0.075080816691500005)*f[24]; + tg = CONSTANT(0.156078347226000000)*g[6] + CONSTANT(-0.190364615029000010)*g[20] + CONSTANT(0.075080816691500005)*g[24]; + y[4] += tf*g[18] + tg*f[18]; + y[18] += tf*g[4] + tg*f[4]; + t = f[4] * g[18] + f[18] * g[4]; + y[6] += CONSTANT(0.156078347226000000)*t; + y[20] += CONSTANT(-0.190364615029000010)*t; + y[24] += CONSTANT(0.075080816691500005)*t; + + // [4,19]: 7,21,23, + tf = CONSTANT(-0.063718718434399996)*f[7] + CONSTANT(0.141889406569999990)*f[21] + CONSTANT(0.112621225039000000)*f[23]; + tg = CONSTANT(-0.063718718434399996)*g[7] + CONSTANT(0.141889406569999990)*g[21] + CONSTANT(0.112621225039000000)*g[23]; + y[4] += tf*g[19] + tg*f[19]; + y[19] += tf*g[4] + tg*f[4]; + t = f[4] * g[19] + f[19] * g[4]; + y[7] += CONSTANT(-0.063718718434399996)*t; + y[21] += CONSTANT(0.141889406569999990)*t; + y[23] += CONSTANT(0.112621225039000000)*t; + + // [5,5]: 0,6,8,20,22, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.090111875786499998)*f[6] + CONSTANT(-0.156078347227999990)*f[8] + CONSTANT(-0.161197023870999990)*f[20] + CONSTANT(-0.180223751574000000)*f[22]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.090111875786499998)*g[6] + CONSTANT(-0.156078347227999990)*g[8] + CONSTANT(-0.161197023870999990)*g[20] + CONSTANT(-0.180223751574000000)*g[22]; + y[5] += tf*g[5] + tg*f[5]; + t = f[5] * g[5]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.090111875786499998)*t; + y[8] += CONSTANT(-0.156078347227999990)*t; + y[20] += CONSTANT(-0.161197023870999990)*t; + y[22] += CONSTANT(-0.180223751574000000)*t; + + // [5,11]: 2,12,14, + tf = CONSTANT(0.233596680327000010)*f[2] + CONSTANT(0.059470803871800003)*f[12] + CONSTANT(-0.115164716491000000)*f[14]; + tg = CONSTANT(0.233596680327000010)*g[2] + CONSTANT(0.059470803871800003)*g[12] + CONSTANT(-0.115164716491000000)*g[14]; + y[5] += tf*g[11] + tg*f[11]; + y[11] += tf*g[5] + tg*f[5]; + t = f[5] * g[11] + f[11] * g[5]; + y[2] += CONSTANT(0.233596680327000010)*t; + y[12] += CONSTANT(0.059470803871800003)*t; + y[14] += CONSTANT(-0.115164716491000000)*t; + + // [5,17]: 8,22,24, + tf = CONSTANT(0.168583882832999990)*f[8] + CONSTANT(0.132725386548000010)*f[22] + CONSTANT(-0.140463346189000000)*f[24]; + tg = CONSTANT(0.168583882832999990)*g[8] + CONSTANT(0.132725386548000010)*g[22] + CONSTANT(-0.140463346189000000)*g[24]; + y[5] += tf*g[17] + tg*f[17]; + y[17] += tf*g[5] + tg*f[5]; + t = f[5] * g[17] + f[17] * g[5]; + y[8] += CONSTANT(0.168583882832999990)*t; + y[22] += CONSTANT(0.132725386548000010)*t; + y[24] += CONSTANT(-0.140463346189000000)*t; + + // [5,18]: 7,21,23, + tf = CONSTANT(0.180223751571000010)*f[7] + CONSTANT(0.090297865407399994)*f[21] + CONSTANT(-0.132725386549000010)*f[23]; + tg = CONSTANT(0.180223751571000010)*g[7] + CONSTANT(0.090297865407399994)*g[21] + CONSTANT(-0.132725386549000010)*g[23]; + y[5] += tf*g[18] + tg*f[18]; + y[18] += tf*g[5] + tg*f[5]; + t = f[5] * g[18] + f[18] * g[5]; + y[7] += CONSTANT(0.180223751571000010)*t; + y[21] += CONSTANT(0.090297865407399994)*t; + y[23] += CONSTANT(-0.132725386549000010)*t; + + // [5,19]: 6,8,20,22, + tf = CONSTANT(0.220728115440999990)*f[6] + CONSTANT(0.063718718433900007)*f[8] + CONSTANT(0.044869370061299998)*f[20] + CONSTANT(-0.090297865408399999)*f[22]; + tg = CONSTANT(0.220728115440999990)*g[6] + CONSTANT(0.063718718433900007)*g[8] + CONSTANT(0.044869370061299998)*g[20] + CONSTANT(-0.090297865408399999)*g[22]; + y[5] += tf*g[19] + tg*f[19]; + y[19] += tf*g[5] + tg*f[5]; + t = f[5] * g[19] + f[19] * g[5]; + y[6] += CONSTANT(0.220728115440999990)*t; + y[8] += CONSTANT(0.063718718433900007)*t; + y[20] += CONSTANT(0.044869370061299998)*t; + y[22] += CONSTANT(-0.090297865408399999)*t; + + // [6,6]: 0,6,20, + tf = CONSTANT(0.282094797560000000)*f[0] + CONSTANT(0.241795553185999990)*f[20]; + tg = CONSTANT(0.282094797560000000)*g[0] + CONSTANT(0.241795553185999990)*g[20]; + y[6] += tf*g[6] + tg*f[6]; + t = f[6] * g[6]; + y[0] += CONSTANT(0.282094797560000000)*t; + y[6] += CONSTANT(0.180223764527000010)*t; + y[20] += CONSTANT(0.241795553185999990)*t; + + // [7,7]: 6,0,8,20,22, + tf = CONSTANT(0.090111875786499998)*f[6] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.156078347227999990)*f[8] + CONSTANT(-0.161197023870999990)*f[20] + CONSTANT(0.180223751574000000)*f[22]; + tg = CONSTANT(0.090111875786499998)*g[6] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.156078347227999990)*g[8] + CONSTANT(-0.161197023870999990)*g[20] + CONSTANT(0.180223751574000000)*g[22]; + y[7] += tf*g[7] + tg*f[7]; + t = f[7] * g[7]; + y[6] += CONSTANT(0.090111875786499998)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.156078347227999990)*t; + y[20] += CONSTANT(-0.161197023870999990)*t; + y[22] += CONSTANT(0.180223751574000000)*t; + + // [7,13]: 12,2,14, + tf = CONSTANT(0.059470803871800003)*f[12] + CONSTANT(0.233596680327000010)*f[2] + CONSTANT(0.115164716491000000)*f[14]; + tg = CONSTANT(0.059470803871800003)*g[12] + CONSTANT(0.233596680327000010)*g[2] + CONSTANT(0.115164716491000000)*g[14]; + y[7] += tf*g[13] + tg*f[13]; + y[13] += tf*g[7] + tg*f[7]; + t = f[7] * g[13] + f[13] * g[7]; + y[12] += CONSTANT(0.059470803871800003)*t; + y[2] += CONSTANT(0.233596680327000010)*t; + y[14] += CONSTANT(0.115164716491000000)*t; + + // [7,17]: 16,4,18, + tf = CONSTANT(0.140463346187999990)*f[16] + CONSTANT(0.168583882835000000)*f[4] + CONSTANT(0.132725386549000010)*f[18]; + tg = CONSTANT(0.140463346187999990)*g[16] + CONSTANT(0.168583882835000000)*g[4] + CONSTANT(0.132725386549000010)*g[18]; + y[7] += tf*g[17] + tg*f[17]; + y[17] += tf*g[7] + tg*f[7]; + t = f[7] * g[17] + f[17] * g[7]; + y[16] += CONSTANT(0.140463346187999990)*t; + y[4] += CONSTANT(0.168583882835000000)*t; + y[18] += CONSTANT(0.132725386549000010)*t; + + // [7,21]: 8,20,6,22, + tf = CONSTANT(-0.063718718433900007)*f[8] + CONSTANT(0.044869370061299998)*f[20] + CONSTANT(0.220728115440999990)*f[6] + CONSTANT(0.090297865408399999)*f[22]; + tg = CONSTANT(-0.063718718433900007)*g[8] + CONSTANT(0.044869370061299998)*g[20] + CONSTANT(0.220728115440999990)*g[6] + CONSTANT(0.090297865408399999)*g[22]; + y[7] += tf*g[21] + tg*f[21]; + y[21] += tf*g[7] + tg*f[7]; + t = f[7] * g[21] + f[21] * g[7]; + y[8] += CONSTANT(-0.063718718433900007)*t; + y[20] += CONSTANT(0.044869370061299998)*t; + y[6] += CONSTANT(0.220728115440999990)*t; + y[22] += CONSTANT(0.090297865408399999)*t; + + // [7,23]: 8,22,24, + tf = CONSTANT(0.168583882832999990)*f[8] + CONSTANT(0.132725386548000010)*f[22] + CONSTANT(0.140463346189000000)*f[24]; + tg = CONSTANT(0.168583882832999990)*g[8] + CONSTANT(0.132725386548000010)*g[22] + CONSTANT(0.140463346189000000)*g[24]; + y[7] += tf*g[23] + tg*f[23]; + y[23] += tf*g[7] + tg*f[7]; + t = f[7] * g[23] + f[23] * g[7]; + y[8] += CONSTANT(0.168583882832999990)*t; + y[22] += CONSTANT(0.132725386548000010)*t; + y[24] += CONSTANT(0.140463346189000000)*t; + + // [8,8]: 0,6,20,24, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6] + CONSTANT(0.040299255967500003)*f[20] + CONSTANT(0.238413613505999990)*f[24]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6] + CONSTANT(0.040299255967500003)*g[20] + CONSTANT(0.238413613505999990)*g[24]; + y[8] += tf*g[8] + tg*f[8]; + t = f[8] * g[8]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + y[20] += CONSTANT(0.040299255967500003)*t; + y[24] += CONSTANT(0.238413613505999990)*t; + + // [8,22]: 6,20,24, + tf = CONSTANT(0.156078347226000000)*f[6] + CONSTANT(-0.190364615029000010)*f[20] + CONSTANT(-0.075080816691500005)*f[24]; + tg = CONSTANT(0.156078347226000000)*g[6] + CONSTANT(-0.190364615029000010)*g[20] + CONSTANT(-0.075080816691500005)*g[24]; + y[8] += tf*g[22] + tg*f[22]; + y[22] += tf*g[8] + tg*f[8]; + t = f[8] * g[22] + f[22] * g[8]; + y[6] += CONSTANT(0.156078347226000000)*t; + y[20] += CONSTANT(-0.190364615029000010)*t; + y[24] += CONSTANT(-0.075080816691500005)*t; + + // [9,9]: 6,0,20, + tf = CONSTANT(-0.210261043508000010)*f[6] + CONSTANT(0.282094791766999970)*f[0] + CONSTANT(0.076934943209800002)*f[20]; + tg = CONSTANT(-0.210261043508000010)*g[6] + CONSTANT(0.282094791766999970)*g[0] + CONSTANT(0.076934943209800002)*g[20]; + y[9] += tf*g[9] + tg*f[9]; + t = f[9] * g[9]; + y[6] += CONSTANT(-0.210261043508000010)*t; + y[0] += CONSTANT(0.282094791766999970)*t; + y[20] += CONSTANT(0.076934943209800002)*t; + + // [9,10]: 7,21, + tf = CONSTANT(0.148677009678999990)*f[7] + CONSTANT(-0.099322584599600000)*f[21]; + tg = CONSTANT(0.148677009678999990)*g[7] + CONSTANT(-0.099322584599600000)*g[21]; + y[9] += tf*g[10] + tg*f[10]; + y[10] += tf*g[9] + tg*f[9]; + t = f[9] * g[10] + f[10] * g[9]; + y[7] += CONSTANT(0.148677009678999990)*t; + y[21] += CONSTANT(-0.099322584599600000)*t; + + // [9,11]: 8,22,24, + tf = CONSTANT(-0.094031597259499999)*f[8] + CONSTANT(0.133255230518000010)*f[22] + CONSTANT(0.117520066950999990)*f[24]; + tg = CONSTANT(-0.094031597259499999)*g[8] + CONSTANT(0.133255230518000010)*g[22] + CONSTANT(0.117520066950999990)*g[24]; + y[9] += tf*g[11] + tg*f[11]; + y[11] += tf*g[9] + tg*f[9]; + t = f[9] * g[11] + f[11] * g[9]; + y[8] += CONSTANT(-0.094031597259499999)*t; + y[22] += CONSTANT(0.133255230518000010)*t; + y[24] += CONSTANT(0.117520066950999990)*t; + + // [9,13]: 4,16,18, + tf = CONSTANT(-0.094031597258400004)*f[4] + CONSTANT(-0.117520066953000000)*f[16] + CONSTANT(0.133255230519000010)*f[18]; + tg = CONSTANT(-0.094031597258400004)*g[4] + CONSTANT(-0.117520066953000000)*g[16] + CONSTANT(0.133255230519000010)*g[18]; + y[9] += tf*g[13] + tg*f[13]; + y[13] += tf*g[9] + tg*f[9]; + t = f[9] * g[13] + f[13] * g[9]; + y[4] += CONSTANT(-0.094031597258400004)*t; + y[16] += CONSTANT(-0.117520066953000000)*t; + y[18] += CONSTANT(0.133255230519000010)*t; + + // [9,14]: 5,19, + tf = CONSTANT(0.148677009677999990)*f[5] + CONSTANT(-0.099322584600699995)*f[19]; + tg = CONSTANT(0.148677009677999990)*g[5] + CONSTANT(-0.099322584600699995)*g[19]; + y[9] += tf*g[14] + tg*f[14]; + y[14] += tf*g[9] + tg*f[9]; + t = f[9] * g[14] + f[14] * g[9]; + y[5] += CONSTANT(0.148677009677999990)*t; + y[19] += CONSTANT(-0.099322584600699995)*t; + + // [9,17]: 2,12, + tf = CONSTANT(0.162867503964999990)*f[2] + CONSTANT(-0.203550726872999990)*f[12]; + tg = CONSTANT(0.162867503964999990)*g[2] + CONSTANT(-0.203550726872999990)*g[12]; + y[9] += tf*g[17] + tg*f[17]; + y[17] += tf*g[9] + tg*f[9]; + t = f[9] * g[17] + f[17] * g[9]; + y[2] += CONSTANT(0.162867503964999990)*t; + y[12] += CONSTANT(-0.203550726872999990)*t; + + // [10,10]: 0,20,24, + tf = CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.179514867494000000)*f[20] + CONSTANT(-0.151717754049000010)*f[24]; + tg = CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.179514867494000000)*g[20] + CONSTANT(-0.151717754049000010)*g[24]; + y[10] += tf*g[10] + tg*f[10]; + t = f[10] * g[10]; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.179514867494000000)*t; + y[24] += CONSTANT(-0.151717754049000010)*t; + + // [10,11]: 7,21,23, + tf = CONSTANT(0.115164716490000000)*f[7] + CONSTANT(0.102579924281000000)*f[21] + CONSTANT(-0.067850242288900006)*f[23]; + tg = CONSTANT(0.115164716490000000)*g[7] + CONSTANT(0.102579924281000000)*g[21] + CONSTANT(-0.067850242288900006)*g[23]; + y[10] += tf*g[11] + tg*f[11]; + y[11] += tf*g[10] + tg*f[10]; + t = f[10] * g[11] + f[11] * g[10]; + y[7] += CONSTANT(0.115164716490000000)*t; + y[21] += CONSTANT(0.102579924281000000)*t; + y[23] += CONSTANT(-0.067850242288900006)*t; + + // [10,12]: 4,18, + tf = CONSTANT(-0.188063194517999990)*f[4] + CONSTANT(-0.044418410173299998)*f[18]; + tg = CONSTANT(-0.188063194517999990)*g[4] + CONSTANT(-0.044418410173299998)*g[18]; + y[10] += tf*g[12] + tg*f[12]; + y[12] += tf*g[10] + tg*f[10]; + t = f[10] * g[12] + f[12] * g[10]; + y[4] += CONSTANT(-0.188063194517999990)*t; + y[18] += CONSTANT(-0.044418410173299998)*t; + + // [10,13]: 5,17,19, + tf = CONSTANT(0.115164716490000000)*f[5] + CONSTANT(0.067850242288900006)*f[17] + CONSTANT(0.102579924281000000)*f[19]; + tg = CONSTANT(0.115164716490000000)*g[5] + CONSTANT(0.067850242288900006)*g[17] + CONSTANT(0.102579924281000000)*g[19]; + y[10] += tf*g[13] + tg*f[13]; + y[13] += tf*g[10] + tg*f[10]; + t = f[10] * g[13] + f[13] * g[10]; + y[5] += CONSTANT(0.115164716490000000)*t; + y[17] += CONSTANT(0.067850242288900006)*t; + y[19] += CONSTANT(0.102579924281000000)*t; + + // [10,14]: 16, + tf = CONSTANT(0.151717754044999990)*f[16]; + tg = CONSTANT(0.151717754044999990)*g[16]; + y[10] += tf*g[14] + tg*f[14]; + y[14] += tf*g[10] + tg*f[10]; + t = f[10] * g[14] + f[14] * g[10]; + y[16] += CONSTANT(0.151717754044999990)*t; + + // [10,15]: 5,19, + tf = CONSTANT(-0.148677009678999990)*f[5] + CONSTANT(0.099322584599600000)*f[19]; + tg = CONSTANT(-0.148677009678999990)*g[5] + CONSTANT(0.099322584599600000)*g[19]; + y[10] += tf*g[15] + tg*f[15]; + y[15] += tf*g[10] + tg*f[10]; + t = f[10] * g[15] + f[15] * g[10]; + y[5] += CONSTANT(-0.148677009678999990)*t; + y[19] += CONSTANT(0.099322584599600000)*t; + + // [11,11]: 0,6,8,20,22, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.126156626101000010)*f[6] + CONSTANT(-0.145673124078999990)*f[8] + CONSTANT(0.025644981070299999)*f[20] + CONSTANT(-0.114687841910000000)*f[22]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.126156626101000010)*g[6] + CONSTANT(-0.145673124078999990)*g[8] + CONSTANT(0.025644981070299999)*g[20] + CONSTANT(-0.114687841910000000)*g[22]; + y[11] += tf*g[11] + tg*f[11]; + t = f[11] * g[11]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.126156626101000010)*t; + y[8] += CONSTANT(-0.145673124078999990)*t; + y[20] += CONSTANT(0.025644981070299999)*t; + y[22] += CONSTANT(-0.114687841910000000)*t; + + // [11,14]: 17, + tf = CONSTANT(0.067850242288500007)*f[17]; + tg = CONSTANT(0.067850242288500007)*g[17]; + y[11] += tf*g[14] + tg*f[14]; + y[14] += tf*g[11] + tg*f[11]; + t = f[11] * g[14] + f[14] * g[11]; + y[17] += CONSTANT(0.067850242288500007)*t; + + // [11,15]: 16, + tf = CONSTANT(-0.117520066953000000)*f[16]; + tg = CONSTANT(-0.117520066953000000)*g[16]; + y[11] += tf*g[15] + tg*f[15]; + y[15] += tf*g[11] + tg*f[11]; + t = f[11] * g[15] + f[15] * g[11]; + y[16] += CONSTANT(-0.117520066953000000)*t; + + // [11,18]: 3,13,15, + tf = CONSTANT(0.168583882834000000)*f[3] + CONSTANT(0.114687841909000000)*f[13] + CONSTANT(-0.133255230519000010)*f[15]; + tg = CONSTANT(0.168583882834000000)*g[3] + CONSTANT(0.114687841909000000)*g[13] + CONSTANT(-0.133255230519000010)*g[15]; + y[11] += tf*g[18] + tg*f[18]; + y[18] += tf*g[11] + tg*f[11]; + t = f[11] * g[18] + f[18] * g[11]; + y[3] += CONSTANT(0.168583882834000000)*t; + y[13] += CONSTANT(0.114687841909000000)*t; + y[15] += CONSTANT(-0.133255230519000010)*t; + + // [11,19]: 2,14,12, + tf = CONSTANT(0.238413613504000000)*f[2] + CONSTANT(-0.102579924282000000)*f[14] + CONSTANT(0.099322584599300004)*f[12]; + tg = CONSTANT(0.238413613504000000)*g[2] + CONSTANT(-0.102579924282000000)*g[14] + CONSTANT(0.099322584599300004)*g[12]; + y[11] += tf*g[19] + tg*f[19]; + y[19] += tf*g[11] + tg*f[11]; + t = f[11] * g[19] + f[19] * g[11]; + y[2] += CONSTANT(0.238413613504000000)*t; + y[14] += CONSTANT(-0.102579924282000000)*t; + y[12] += CONSTANT(0.099322584599300004)*t; + + // [12,12]: 0,6,20, + tf = CONSTANT(0.282094799871999980)*f[0] + CONSTANT(0.168208852954000010)*f[6] + CONSTANT(0.153869910786000010)*f[20]; + tg = CONSTANT(0.282094799871999980)*g[0] + CONSTANT(0.168208852954000010)*g[6] + CONSTANT(0.153869910786000010)*g[20]; + y[12] += tf*g[12] + tg*f[12]; + t = f[12] * g[12]; + y[0] += CONSTANT(0.282094799871999980)*t; + y[6] += CONSTANT(0.168208852954000010)*t; + y[20] += CONSTANT(0.153869910786000010)*t; + + // [12,14]: 8,22, + tf = CONSTANT(-0.188063194517999990)*f[8] + CONSTANT(-0.044418410173299998)*f[22]; + tg = CONSTANT(-0.188063194517999990)*g[8] + CONSTANT(-0.044418410173299998)*g[22]; + y[12] += tf*g[14] + tg*f[14]; + y[14] += tf*g[12] + tg*f[12]; + t = f[12] * g[14] + f[14] * g[12]; + y[8] += CONSTANT(-0.188063194517999990)*t; + y[22] += CONSTANT(-0.044418410173299998)*t; + + // [13,13]: 0,8,6,20,22, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.145673124078999990)*f[8] + CONSTANT(0.126156626101000010)*f[6] + CONSTANT(0.025644981070299999)*f[20] + CONSTANT(0.114687841910000000)*f[22]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.145673124078999990)*g[8] + CONSTANT(0.126156626101000010)*g[6] + CONSTANT(0.025644981070299999)*g[20] + CONSTANT(0.114687841910000000)*g[22]; + y[13] += tf*g[13] + tg*f[13]; + t = f[13] * g[13]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.145673124078999990)*t; + y[6] += CONSTANT(0.126156626101000010)*t; + y[20] += CONSTANT(0.025644981070299999)*t; + y[22] += CONSTANT(0.114687841910000000)*t; + + // [13,14]: 23, + tf = CONSTANT(0.067850242288500007)*f[23]; + tg = CONSTANT(0.067850242288500007)*g[23]; + y[13] += tf*g[14] + tg*f[14]; + y[14] += tf*g[13] + tg*f[13]; + t = f[13] * g[14] + f[14] * g[13]; + y[23] += CONSTANT(0.067850242288500007)*t; + + // [13,15]: 8,22,24, + tf = CONSTANT(-0.094031597259499999)*f[8] + CONSTANT(0.133255230518000010)*f[22] + CONSTANT(-0.117520066950999990)*f[24]; + tg = CONSTANT(-0.094031597259499999)*g[8] + CONSTANT(0.133255230518000010)*g[22] + CONSTANT(-0.117520066950999990)*g[24]; + y[13] += tf*g[15] + tg*f[15]; + y[15] += tf*g[13] + tg*f[13]; + t = f[13] * g[15] + f[15] * g[13]; + y[8] += CONSTANT(-0.094031597259499999)*t; + y[22] += CONSTANT(0.133255230518000010)*t; + y[24] += CONSTANT(-0.117520066950999990)*t; + + // [13,21]: 2,12,14, + tf = CONSTANT(0.238413613504000000)*f[2] + CONSTANT(0.099322584599300004)*f[12] + CONSTANT(0.102579924282000000)*f[14]; + tg = CONSTANT(0.238413613504000000)*g[2] + CONSTANT(0.099322584599300004)*g[12] + CONSTANT(0.102579924282000000)*g[14]; + y[13] += tf*g[21] + tg*f[21]; + y[21] += tf*g[13] + tg*f[13]; + t = f[13] * g[21] + f[21] * g[13]; + y[2] += CONSTANT(0.238413613504000000)*t; + y[12] += CONSTANT(0.099322584599300004)*t; + y[14] += CONSTANT(0.102579924282000000)*t; + + // [14,14]: 0,20,24, + tf = CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.179514867494000000)*f[20] + CONSTANT(0.151717754049000010)*f[24]; + tg = CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.179514867494000000)*g[20] + CONSTANT(0.151717754049000010)*g[24]; + y[14] += tf*g[14] + tg*f[14]; + t = f[14] * g[14]; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.179514867494000000)*t; + y[24] += CONSTANT(0.151717754049000010)*t; + + // [14,15]: 7,21, + tf = CONSTANT(0.148677009677999990)*f[7] + CONSTANT(-0.099322584600699995)*f[21]; + tg = CONSTANT(0.148677009677999990)*g[7] + CONSTANT(-0.099322584600699995)*g[21]; + y[14] += tf*g[15] + tg*f[15]; + y[15] += tf*g[14] + tg*f[14]; + t = f[14] * g[15] + f[15] * g[14]; + y[7] += CONSTANT(0.148677009677999990)*t; + y[21] += CONSTANT(-0.099322584600699995)*t; + + // [15,15]: 0,6,20, + tf = CONSTANT(0.282094791766999970)*f[0] + CONSTANT(-0.210261043508000010)*f[6] + CONSTANT(0.076934943209800002)*f[20]; + tg = CONSTANT(0.282094791766999970)*g[0] + CONSTANT(-0.210261043508000010)*g[6] + CONSTANT(0.076934943209800002)*g[20]; + y[15] += tf*g[15] + tg*f[15]; + t = f[15] * g[15]; + y[0] += CONSTANT(0.282094791766999970)*t; + y[6] += CONSTANT(-0.210261043508000010)*t; + y[20] += CONSTANT(0.076934943209800002)*t; + + // [15,23]: 12,2, + tf = CONSTANT(-0.203550726872999990)*f[12] + CONSTANT(0.162867503964999990)*f[2]; + tg = CONSTANT(-0.203550726872999990)*g[12] + CONSTANT(0.162867503964999990)*g[2]; + y[15] += tf*g[23] + tg*f[23]; + y[23] += tf*g[15] + tg*f[15]; + t = f[15] * g[23] + f[23] * g[15]; + y[12] += CONSTANT(-0.203550726872999990)*t; + y[2] += CONSTANT(0.162867503964999990)*t; + + // [16,16]: 0,6,20, + tf = CONSTANT(0.282094791763999990)*f[0] + CONSTANT(-0.229375683829000000)*f[6] + CONSTANT(0.106525305981000000)*f[20]; + tg = CONSTANT(0.282094791763999990)*g[0] + CONSTANT(-0.229375683829000000)*g[6] + CONSTANT(0.106525305981000000)*g[20]; + y[16] += tf*g[16] + tg*f[16]; + t = f[16] * g[16]; + y[0] += CONSTANT(0.282094791763999990)*t; + y[6] += CONSTANT(-0.229375683829000000)*t; + y[20] += CONSTANT(0.106525305981000000)*t; + + // [16,18]: 8,22, + tf = CONSTANT(-0.075080816693699995)*f[8] + CONSTANT(0.135045473380000000)*f[22]; + tg = CONSTANT(-0.075080816693699995)*g[8] + CONSTANT(0.135045473380000000)*g[22]; + y[16] += tf*g[18] + tg*f[18]; + y[18] += tf*g[16] + tg*f[16]; + t = f[16] * g[18] + f[18] * g[16]; + y[8] += CONSTANT(-0.075080816693699995)*t; + y[22] += CONSTANT(0.135045473380000000)*t; + + // [16,23]: 19,5, + tf = CONSTANT(-0.119098912754999990)*f[19] + CONSTANT(0.140463346187999990)*f[5]; + tg = CONSTANT(-0.119098912754999990)*g[19] + CONSTANT(0.140463346187999990)*g[5]; + y[16] += tf*g[23] + tg*f[23]; + y[23] += tf*g[16] + tg*f[16]; + t = f[16] * g[23] + f[23] * g[16]; + y[19] += CONSTANT(-0.119098912754999990)*t; + y[5] += CONSTANT(0.140463346187999990)*t; + + // [17,17]: 0,6,20, + tf = CONSTANT(0.282094791768999990)*f[0] + CONSTANT(-0.057343920955899998)*f[6] + CONSTANT(-0.159787958979000000)*f[20]; + tg = CONSTANT(0.282094791768999990)*g[0] + CONSTANT(-0.057343920955899998)*g[6] + CONSTANT(-0.159787958979000000)*g[20]; + y[17] += tf*g[17] + tg*f[17]; + t = f[17] * g[17]; + y[0] += CONSTANT(0.282094791768999990)*t; + y[6] += CONSTANT(-0.057343920955899998)*t; + y[20] += CONSTANT(-0.159787958979000000)*t; + + // [17,19]: 8,22,24, + tf = CONSTANT(-0.112621225039000000)*f[8] + CONSTANT(0.045015157794100001)*f[22] + CONSTANT(0.119098912753000000)*f[24]; + tg = CONSTANT(-0.112621225039000000)*g[8] + CONSTANT(0.045015157794100001)*g[22] + CONSTANT(0.119098912753000000)*g[24]; + y[17] += tf*g[19] + tg*f[19]; + y[19] += tf*g[17] + tg*f[17]; + t = f[17] * g[19] + f[19] * g[17]; + y[8] += CONSTANT(-0.112621225039000000)*t; + y[22] += CONSTANT(0.045015157794100001)*t; + y[24] += CONSTANT(0.119098912753000000)*t; + + // [17,21]: 16,4,18, + tf = CONSTANT(-0.119098912754999990)*f[16] + CONSTANT(-0.112621225039000000)*f[4] + CONSTANT(0.045015157794399997)*f[18]; + tg = CONSTANT(-0.119098912754999990)*g[16] + CONSTANT(-0.112621225039000000)*g[4] + CONSTANT(0.045015157794399997)*g[18]; + y[17] += tf*g[21] + tg*f[21]; + y[21] += tf*g[17] + tg*f[17]; + t = f[17] * g[21] + f[21] * g[17]; + y[16] += CONSTANT(-0.119098912754999990)*t; + y[4] += CONSTANT(-0.112621225039000000)*t; + y[18] += CONSTANT(0.045015157794399997)*t; + + // [18,18]: 6,0,20,24, + tf = CONSTANT(0.065535909662600006)*f[6] + CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.083698454702400005)*f[20] + CONSTANT(-0.135045473384000000)*f[24]; + tg = CONSTANT(0.065535909662600006)*g[6] + CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.083698454702400005)*g[20] + CONSTANT(-0.135045473384000000)*g[24]; + y[18] += tf*g[18] + tg*f[18]; + t = f[18] * g[18]; + y[6] += CONSTANT(0.065535909662600006)*t; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.083698454702400005)*t; + y[24] += CONSTANT(-0.135045473384000000)*t; + + // [18,19]: 7,21,23, + tf = CONSTANT(0.090297865407399994)*f[7] + CONSTANT(0.102084782359000000)*f[21] + CONSTANT(-0.045015157794399997)*f[23]; + tg = CONSTANT(0.090297865407399994)*g[7] + CONSTANT(0.102084782359000000)*g[21] + CONSTANT(-0.045015157794399997)*g[23]; + y[18] += tf*g[19] + tg*f[19]; + y[19] += tf*g[18] + tg*f[18]; + t = f[18] * g[19] + f[19] * g[18]; + y[7] += CONSTANT(0.090297865407399994)*t; + y[21] += CONSTANT(0.102084782359000000)*t; + y[23] += CONSTANT(-0.045015157794399997)*t; + + // [19,19]: 6,8,0,20,22, + tf = CONSTANT(0.139263808033999990)*f[6] + CONSTANT(-0.141889406570999990)*f[8] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.068480553847200004)*f[20] + CONSTANT(-0.102084782360000000)*f[22]; + tg = CONSTANT(0.139263808033999990)*g[6] + CONSTANT(-0.141889406570999990)*g[8] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.068480553847200004)*g[20] + CONSTANT(-0.102084782360000000)*g[22]; + y[19] += tf*g[19] + tg*f[19]; + t = f[19] * g[19]; + y[6] += CONSTANT(0.139263808033999990)*t; + y[8] += CONSTANT(-0.141889406570999990)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[20] += CONSTANT(0.068480553847200004)*t; + y[22] += CONSTANT(-0.102084782360000000)*t; + + // [20,20]: 6,0,20, + tf = CONSTANT(0.163839797503000010)*f[6] + CONSTANT(0.282094802232000010)*f[0]; + tg = CONSTANT(0.163839797503000010)*g[6] + CONSTANT(0.282094802232000010)*g[0]; + y[20] += tf*g[20] + tg*f[20]; + t = f[20] * g[20]; + y[6] += CONSTANT(0.163839797503000010)*t; + y[0] += CONSTANT(0.282094802232000010)*t; + y[20] += CONSTANT(0.136961139005999990)*t; + + // [21,21]: 6,20,0,8,22, + tf = CONSTANT(0.139263808033999990)*f[6] + CONSTANT(0.068480553847200004)*f[20] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.141889406570999990)*f[8] + CONSTANT(0.102084782360000000)*f[22]; + tg = CONSTANT(0.139263808033999990)*g[6] + CONSTANT(0.068480553847200004)*g[20] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.141889406570999990)*g[8] + CONSTANT(0.102084782360000000)*g[22]; + y[21] += tf*g[21] + tg*f[21]; + t = f[21] * g[21]; + y[6] += CONSTANT(0.139263808033999990)*t; + y[20] += CONSTANT(0.068480553847200004)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.141889406570999990)*t; + y[22] += CONSTANT(0.102084782360000000)*t; + + // [21,23]: 8,22,24, + tf = CONSTANT(-0.112621225039000000)*f[8] + CONSTANT(0.045015157794100001)*f[22] + CONSTANT(-0.119098912753000000)*f[24]; + tg = CONSTANT(-0.112621225039000000)*g[8] + CONSTANT(0.045015157794100001)*g[22] + CONSTANT(-0.119098912753000000)*g[24]; + y[21] += tf*g[23] + tg*f[23]; + y[23] += tf*g[21] + tg*f[21]; + t = f[21] * g[23] + f[23] * g[21]; + y[8] += CONSTANT(-0.112621225039000000)*t; + y[22] += CONSTANT(0.045015157794100001)*t; + y[24] += CONSTANT(-0.119098912753000000)*t; + + // [22,22]: 6,20,0,24, + tf = CONSTANT(0.065535909662600006)*f[6] + CONSTANT(-0.083698454702400005)*f[20] + CONSTANT(0.282094791771999980)*f[0] + CONSTANT(0.135045473384000000)*f[24]; + tg = CONSTANT(0.065535909662600006)*g[6] + CONSTANT(-0.083698454702400005)*g[20] + CONSTANT(0.282094791771999980)*g[0] + CONSTANT(0.135045473384000000)*g[24]; + y[22] += tf*g[22] + tg*f[22]; + t = f[22] * g[22]; + y[6] += CONSTANT(0.065535909662600006)*t; + y[20] += CONSTANT(-0.083698454702400005)*t; + y[0] += CONSTANT(0.282094791771999980)*t; + y[24] += CONSTANT(0.135045473384000000)*t; + + // [23,23]: 6,20,0, + tf = CONSTANT(-0.057343920955899998)*f[6] + CONSTANT(-0.159787958979000000)*f[20] + CONSTANT(0.282094791768999990)*f[0]; + tg = CONSTANT(-0.057343920955899998)*g[6] + CONSTANT(-0.159787958979000000)*g[20] + CONSTANT(0.282094791768999990)*g[0]; + y[23] += tf*g[23] + tg*f[23]; + t = f[23] * g[23]; + y[6] += CONSTANT(-0.057343920955899998)*t; + y[20] += CONSTANT(-0.159787958979000000)*t; + y[0] += CONSTANT(0.282094791768999990)*t; + + // [24,24]: 6,0,20, + tf = CONSTANT(-0.229375683829000000)*f[6] + CONSTANT(0.282094791763999990)*f[0] + CONSTANT(0.106525305981000000)*f[20]; + tg = CONSTANT(-0.229375683829000000)*g[6] + CONSTANT(0.282094791763999990)*g[0] + CONSTANT(0.106525305981000000)*g[20]; + y[24] += tf*g[24] + tg*f[24]; + t = f[24] * g[24]; + y[6] += CONSTANT(-0.229375683829000000)*t; + y[0] += CONSTANT(0.282094791763999990)*t; + y[20] += CONSTANT(0.106525305981000000)*t; + + // multiply count=1135 + + return y; +} + + +//------------------------------------------------------------------------------------- +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb232909.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +float* DirectX::XMSHMultiply6( + float *y, + const float *f, + const float *g) noexcept +{ + if (!y || !f || !g) + return nullptr; + + REAL tf, tg, t; + // [0,0]: 0, + y[0] = CONSTANT(0.282094792935999980)*f[0] * g[0]; + + // [1,1]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(-0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(-0.218509686119999990)*g[8]; + y[1] = tf*g[1] + tg*f[1]; + t = f[1] * g[1]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] = CONSTANT(-0.126156626101000010)*t; + y[8] = CONSTANT(-0.218509686119999990)*t; + + // [1,4]: 3,13,15, + tf = CONSTANT(0.218509686114999990)*f[3] + CONSTANT(-0.058399170082300000)*f[13] + CONSTANT(-0.226179013157999990)*f[15]; + tg = CONSTANT(0.218509686114999990)*g[3] + CONSTANT(-0.058399170082300000)*g[13] + CONSTANT(-0.226179013157999990)*g[15]; + y[1] += tf*g[4] + tg*f[4]; + y[4] = tf*g[1] + tg*f[1]; + t = f[1] * g[4] + f[4] * g[1]; + y[3] = CONSTANT(0.218509686114999990)*t; + y[13] = CONSTANT(-0.058399170082300000)*t; + y[15] = CONSTANT(-0.226179013157999990)*t; + + // [1,5]: 2,12, + tf = CONSTANT(0.218509686118000010)*f[2] + CONSTANT(-0.143048168103000000)*f[12]; + tg = CONSTANT(0.218509686118000010)*g[2] + CONSTANT(-0.143048168103000000)*g[12]; + y[1] += tf*g[5] + tg*f[5]; + y[5] = tf*g[1] + tg*f[1]; + t = f[1] * g[5] + f[5] * g[1]; + y[2] = CONSTANT(0.218509686118000010)*t; + y[12] = CONSTANT(-0.143048168103000000)*t; + + // [1,11]: 6,8,20,22, + tf = CONSTANT(0.202300659402999990)*f[6] + CONSTANT(0.058399170081799998)*f[8] + CONSTANT(-0.150786008773000000)*f[20] + CONSTANT(-0.168583882836999990)*f[22]; + tg = CONSTANT(0.202300659402999990)*g[6] + CONSTANT(0.058399170081799998)*g[8] + CONSTANT(-0.150786008773000000)*g[20] + CONSTANT(-0.168583882836999990)*g[22]; + y[1] += tf*g[11] + tg*f[11]; + y[11] = tf*g[1] + tg*f[1]; + t = f[1] * g[11] + f[11] * g[1]; + y[6] += CONSTANT(0.202300659402999990)*t; + y[8] += CONSTANT(0.058399170081799998)*t; + y[20] = CONSTANT(-0.150786008773000000)*t; + y[22] = CONSTANT(-0.168583882836999990)*t; + + // [1,16]: 15,33,35, + tf = CONSTANT(0.230329432973999990)*f[15] + CONSTANT(-0.034723468517399998)*f[33] + CONSTANT(-0.232932108051999990)*f[35]; + tg = CONSTANT(0.230329432973999990)*g[15] + CONSTANT(-0.034723468517399998)*g[33] + CONSTANT(-0.232932108051999990)*g[35]; + y[1] += tf*g[16] + tg*f[16]; + y[16] = tf*g[1] + tg*f[1]; + t = f[1] * g[16] + f[16] * g[1]; + y[15] += CONSTANT(0.230329432973999990)*t; + y[33] = CONSTANT(-0.034723468517399998)*t; + y[35] = CONSTANT(-0.232932108051999990)*t; + + // [1,18]: 15,13,31,33, + tf = CONSTANT(0.043528171377799997)*f[15] + CONSTANT(0.168583882834000000)*f[13] + CONSTANT(-0.085054779966799998)*f[31] + CONSTANT(-0.183739324705999990)*f[33]; + tg = CONSTANT(0.043528171377799997)*g[15] + CONSTANT(0.168583882834000000)*g[13] + CONSTANT(-0.085054779966799998)*g[31] + CONSTANT(-0.183739324705999990)*g[33]; + y[1] += tf*g[18] + tg*f[18]; + y[18] = tf*g[1] + tg*f[1]; + t = f[1] * g[18] + f[18] * g[1]; + y[15] += CONSTANT(0.043528171377799997)*t; + y[13] += CONSTANT(0.168583882834000000)*t; + y[31] = CONSTANT(-0.085054779966799998)*t; + y[33] += CONSTANT(-0.183739324705999990)*t; + + // [1,19]: 14,12,30,32, + tf = CONSTANT(0.075393004386399995)*f[14] + CONSTANT(0.194663900273000010)*f[12] + CONSTANT(-0.155288072037000010)*f[30] + CONSTANT(-0.159122922869999990)*f[32]; + tg = CONSTANT(0.075393004386399995)*g[14] + CONSTANT(0.194663900273000010)*g[12] + CONSTANT(-0.155288072037000010)*g[30] + CONSTANT(-0.159122922869999990)*g[32]; + y[1] += tf*g[19] + tg*f[19]; + y[19] = tf*g[1] + tg*f[1]; + t = f[1] * g[19] + f[19] * g[1]; + y[14] = CONSTANT(0.075393004386399995)*t; + y[12] += CONSTANT(0.194663900273000010)*t; + y[30] = CONSTANT(-0.155288072037000010)*t; + y[32] = CONSTANT(-0.159122922869999990)*t; + + // [1,24]: 9,25,27, + tf = CONSTANT(-0.230329432978999990)*f[9] + CONSTANT(0.232932108049000000)*f[25] + CONSTANT(0.034723468517100002)*f[27]; + tg = CONSTANT(-0.230329432978999990)*g[9] + CONSTANT(0.232932108049000000)*g[25] + CONSTANT(0.034723468517100002)*g[27]; + y[1] += tf*g[24] + tg*f[24]; + y[24] = tf*g[1] + tg*f[1]; + t = f[1] * g[24] + f[24] * g[1]; + y[9] = CONSTANT(-0.230329432978999990)*t; + y[25] = CONSTANT(0.232932108049000000)*t; + y[27] = CONSTANT(0.034723468517100002)*t; + + // [1,29]: 22,20, + tf = CONSTANT(0.085054779965999999)*f[22] + CONSTANT(0.190188269815000010)*f[20]; + tg = CONSTANT(0.085054779965999999)*g[22] + CONSTANT(0.190188269815000010)*g[20]; + y[1] += tf*g[29] + tg*f[29]; + y[29] = tf*g[1] + tg*f[1]; + t = f[1] * g[29] + f[29] * g[1]; + y[22] += CONSTANT(0.085054779965999999)*t; + y[20] += CONSTANT(0.190188269815000010)*t; + + // [2,2]: 0,6, + tf = CONSTANT(0.282094795249000000)*f[0] + CONSTANT(0.252313259986999990)*f[6]; + tg = CONSTANT(0.282094795249000000)*g[0] + CONSTANT(0.252313259986999990)*g[6]; + y[2] += tf*g[2] + tg*f[2]; + t = f[2] * g[2]; + y[0] += CONSTANT(0.282094795249000000)*t; + y[6] += CONSTANT(0.252313259986999990)*t; + + // [2,12]: 6,20, + tf = CONSTANT(0.247766706973999990)*f[6] + CONSTANT(0.246232537174000010)*f[20]; + tg = CONSTANT(0.247766706973999990)*g[6] + CONSTANT(0.246232537174000010)*g[20]; + y[2] += tf*g[12] + tg*f[12]; + y[12] += tf*g[2] + tg*f[2]; + t = f[2] * g[12] + f[12] * g[2]; + y[6] += CONSTANT(0.247766706973999990)*t; + y[20] += CONSTANT(0.246232537174000010)*t; + + // [2,20]: 30, + tf = CONSTANT(0.245532020560000010)*f[30]; + tg = CONSTANT(0.245532020560000010)*g[30]; + y[2] += tf*g[20] + tg*f[20]; + y[20] += tf*g[2] + tg*f[2]; + t = f[2] * g[20] + f[20] * g[2]; + y[30] += CONSTANT(0.245532020560000010)*t; + + // [3,3]: 0,6,8, + tf = CONSTANT(0.282094791773000010)*f[0] + CONSTANT(-0.126156626101000010)*f[6] + CONSTANT(0.218509686119999990)*f[8]; + tg = CONSTANT(0.282094791773000010)*g[0] + CONSTANT(-0.126156626101000010)*g[6] + CONSTANT(0.218509686119999990)*g[8]; + y[3] += tf*g[3] + tg*f[3]; + t = f[3] * g[3]; + y[0] += CONSTANT(0.282094791773000010)*t; + y[6] += CONSTANT(-0.126156626101000010)*t; + y[8] += CONSTANT(0.218509686119999990)*t; + + // [3,7]: 2,12, + tf = CONSTANT(0.218509686118000010)*f[2] + CONSTANT(-0.143048168103000000)*f[12]; + tg = CONSTANT(0.218509686118000010)*g[2] + CONSTANT(-0.143048168103000000)*g[12]; + y[3] += tf*g[7] + tg*f[7]; + y[7] = tf*g[3] + tg*f[3]; + t = f[3] * g[7] + f[7] * g[3]; + y[2] += CONSTANT(0.218509686118000010)*t; + y[12] += CONSTANT(-0.143048168103000000)*t; + + // [3,13]: 8,6,20,22, + tf = CONSTANT(-0.058399170081799998)*f[8] + CONSTANT(0.202300659402999990)*f[6] + CONSTANT(-0.150786008773000000)*f[20] + CONSTANT(0.168583882836999990)*f[22]; + tg = CONSTANT(-0.058399170081799998)*g[8] + CONSTANT(0.202300659402999990)*g[6] + CONSTANT(-0.150786008773000000)*g[20] + CONSTANT(0.168583882836999990)*g[22]; + y[3] += tf*g[13] + tg*f[13]; + y[13] += tf*g[3] + tg*f[3]; + t = f[3] * g[13] + f[13] * g[3]; + y[8] += CONSTANT(-0.058399170081799998)*t; + y[6] += CONSTANT(0.202300659402999990)*t; + y[20] += CONSTANT(-0.150786008773000000)*t; + y[22] += CONSTANT(0.168583882836999990)*t; + + // [3,16]: 9,25,27, + tf = CONSTANT(0.230329432973999990)*f[9] + CONSTANT(0.232932108051999990)*f[25] + CONSTANT(-0.034723468517399998)*f[27]; + tg = CONSTANT(0.230329432973999990)*g[9] + CONSTANT(0.232932108051999990)*g[25] + CONSTANT(-0.034723468517399998)*g[27]; + y[3] += tf*g[16] + tg*f[16]; + y[16] += tf*g[3] + tg*f[3]; + t = f[3] * g[16] + f[16] * g[3]; + y[9] += CONSTANT(0.230329432973999990)*t; + y[25] += CONSTANT(0.232932108051999990)*t; + y[27] += CONSTANT(-0.034723468517399998)*t; + + // [3,21]: 12,14,30,32, + tf = CONSTANT(0.194663900273000010)*f[12] + CONSTANT(-0.075393004386399995)*f[14] + CONSTANT(-0.155288072037000010)*f[30] + CONSTANT(0.159122922869999990)*f[32]; + tg = CONSTANT(0.194663900273000010)*g[12] + CONSTANT(-0.075393004386399995)*g[14] + CONSTANT(-0.155288072037000010)*g[30] + CONSTANT(0.159122922869999990)*g[32]; + y[3] += tf*g[21] + tg*f[21]; + y[21] = tf*g[3] + tg*f[3]; + t = f[3] * g[21] + f[21] * g[3]; + y[12] += CONSTANT(0.194663900273000010)*t; + y[14] += CONSTANT(-0.075393004386399995)*t; + y[30] += CONSTANT(-0.155288072037000010)*t; + y[32] += CONSTANT(0.159122922869999990)*t; + + // [3,24]: 15,33,35, + tf = CONSTANT(0.230329432978999990)*f[15] + CONSTANT(-0.034723468517100002)*f[33] + CONSTANT(0.232932108049000000)*f[35]; + tg = CONSTANT(0.230329432978999990)*g[15] + CONSTANT(-0.034723468517100002)*g[33] + CONSTANT(0.232932108049000000)*g[35]; + y[3] += tf*g[24] + tg*f[24]; + y[24] += tf*g[3] + tg*f[3]; + t = f[3] * g[24] + f[24] * g[3]; + y[15] += CONSTANT(0.230329432978999990)*t; + y[33] += CONSTANT(-0.034723468517100002)*t; + y[35] += CONSTANT(0.232932108049000000)*t; + + // [3,31]: 20,22, + tf = CONSTANT(0.190188269815000010)*f[20] + CONSTANT(-0.085054779965999999)*f[22]; + tg = CONSTANT(0.190188269815000010)*g[20] + CONSTANT(-0.085054779965999999)*g[22]; + y[3] += tf*g[31] + tg*f[31]; + y[31] += tf*g[3] + tg*f[3]; + t = f[3] * g[31] + f[31] * g[3]; + y[20] += CONSTANT(0.190188269815000010)*t; + y[22] += CONSTANT(-0.085054779965999999)*t; + + // [4,4]: 0,6,20,24, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6] + CONSTANT(0.040299255967500003)*f[20] + CONSTANT(-0.238413613505999990)*f[24]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6] + CONSTANT(0.040299255967500003)*g[20] + CONSTANT(-0.238413613505999990)*g[24]; + y[4] += tf*g[4] + tg*f[4]; + t = f[4] * g[4]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + y[20] += CONSTANT(0.040299255967500003)*t; + y[24] += CONSTANT(-0.238413613505999990)*t; + + // [4,5]: 7,21,23, + tf = CONSTANT(0.156078347226000000)*f[7] + CONSTANT(-0.063718718434399996)*f[21] + CONSTANT(-0.168583882835000000)*f[23]; + tg = CONSTANT(0.156078347226000000)*g[7] + CONSTANT(-0.063718718434399996)*g[21] + CONSTANT(-0.168583882835000000)*g[23]; + y[4] += tf*g[5] + tg*f[5]; + y[5] += tf*g[4] + tg*f[4]; + t = f[4] * g[5] + f[5] * g[4]; + y[7] += CONSTANT(0.156078347226000000)*t; + y[21] += CONSTANT(-0.063718718434399996)*t; + y[23] = CONSTANT(-0.168583882835000000)*t; + + // [4,9]: 3,13,31,35, + tf = CONSTANT(0.226179013157999990)*f[3] + CONSTANT(-0.094031597258400004)*f[13] + CONSTANT(0.016943317729299998)*f[31] + CONSTANT(-0.245532000542000000)*f[35]; + tg = CONSTANT(0.226179013157999990)*g[3] + CONSTANT(-0.094031597258400004)*g[13] + CONSTANT(0.016943317729299998)*g[31] + CONSTANT(-0.245532000542000000)*g[35]; + y[4] += tf*g[9] + tg*f[9]; + y[9] += tf*g[4] + tg*f[4]; + t = f[4] * g[9] + f[9] * g[4]; + y[3] += CONSTANT(0.226179013157999990)*t; + y[13] += CONSTANT(-0.094031597258400004)*t; + y[31] += CONSTANT(0.016943317729299998)*t; + y[35] += CONSTANT(-0.245532000542000000)*t; + + // [4,10]: 2,12,30,34, + tf = CONSTANT(0.184674390919999990)*f[2] + CONSTANT(-0.188063194517999990)*f[12] + CONSTANT(0.053579475144400000)*f[30] + CONSTANT(-0.190188269816000010)*f[34]; + tg = CONSTANT(0.184674390919999990)*g[2] + CONSTANT(-0.188063194517999990)*g[12] + CONSTANT(0.053579475144400000)*g[30] + CONSTANT(-0.190188269816000010)*g[34]; + y[4] += tf*g[10] + tg*f[10]; + y[10] = tf*g[4] + tg*f[4]; + t = f[4] * g[10] + f[10] * g[4]; + y[2] += CONSTANT(0.184674390919999990)*t; + y[12] += CONSTANT(-0.188063194517999990)*t; + y[30] += CONSTANT(0.053579475144400000)*t; + y[34] = CONSTANT(-0.190188269816000010)*t; + + // [4,11]: 3,13,15,31,33, + tf = CONSTANT(-0.058399170082300000)*f[3] + CONSTANT(0.145673124078000010)*f[13] + CONSTANT(0.094031597258400004)*f[15] + CONSTANT(-0.065621187395699998)*f[31] + CONSTANT(-0.141757966610000010)*f[33]; + tg = CONSTANT(-0.058399170082300000)*g[3] + CONSTANT(0.145673124078000010)*g[13] + CONSTANT(0.094031597258400004)*g[15] + CONSTANT(-0.065621187395699998)*g[31] + CONSTANT(-0.141757966610000010)*g[33]; + y[4] += tf*g[11] + tg*f[11]; + y[11] += tf*g[4] + tg*f[4]; + t = f[4] * g[11] + f[11] * g[4]; + y[3] += CONSTANT(-0.058399170082300000)*t; + y[13] += CONSTANT(0.145673124078000010)*t; + y[15] += CONSTANT(0.094031597258400004)*t; + y[31] += CONSTANT(-0.065621187395699998)*t; + y[33] += CONSTANT(-0.141757966610000010)*t; + + // [4,16]: 8,22, + tf = CONSTANT(0.238413613494000000)*f[8] + CONSTANT(-0.075080816693699995)*f[22]; + tg = CONSTANT(0.238413613494000000)*g[8] + CONSTANT(-0.075080816693699995)*g[22]; + y[4] += tf*g[16] + tg*f[16]; + y[16] += tf*g[4] + tg*f[4]; + t = f[4] * g[16] + f[16] * g[4]; + y[8] += CONSTANT(0.238413613494000000)*t; + y[22] += CONSTANT(-0.075080816693699995)*t; + + // [4,18]: 6,20,24, + tf = CONSTANT(0.156078347226000000)*f[6] + CONSTANT(-0.190364615029000010)*f[20] + CONSTANT(0.075080816691500005)*f[24]; + tg = CONSTANT(0.156078347226000000)*g[6] + CONSTANT(-0.190364615029000010)*g[20] + CONSTANT(0.075080816691500005)*g[24]; + y[4] += tf*g[18] + tg*f[18]; + y[18] += tf*g[4] + tg*f[4]; + t = f[4] * g[18] + f[18] * g[4]; + y[6] += CONSTANT(0.156078347226000000)*t; + y[20] += CONSTANT(-0.190364615029000010)*t; + y[24] += CONSTANT(0.075080816691500005)*t; + + // [4,19]: 7,21,23, + tf = CONSTANT(-0.063718718434399996)*f[7] + CONSTANT(0.141889406569999990)*f[21] + CONSTANT(0.112621225039000000)*f[23]; + tg = CONSTANT(-0.063718718434399996)*g[7] + CONSTANT(0.141889406569999990)*g[21] + CONSTANT(0.112621225039000000)*g[23]; + y[4] += tf*g[19] + tg*f[19]; + y[19] += tf*g[4] + tg*f[4]; + t = f[4] * g[19] + f[19] * g[4]; + y[7] += CONSTANT(-0.063718718434399996)*t; + y[21] += CONSTANT(0.141889406569999990)*t; + y[23] += CONSTANT(0.112621225039000000)*t; + + // [4,25]: 15,33, + tf = CONSTANT(0.245532000542000000)*f[15] + CONSTANT(-0.062641347680800000)*f[33]; + tg = CONSTANT(0.245532000542000000)*g[15] + CONSTANT(-0.062641347680800000)*g[33]; + y[4] += tf*g[25] + tg*f[25]; + y[25] += tf*g[4] + tg*f[4]; + t = f[4] * g[25] + f[25] * g[4]; + y[15] += CONSTANT(0.245532000542000000)*t; + y[33] += CONSTANT(-0.062641347680800000)*t; + + // [4,26]: 14,32, + tf = CONSTANT(0.190188269806999990)*f[14] + CONSTANT(-0.097043558542400002)*f[32]; + tg = CONSTANT(0.190188269806999990)*g[14] + CONSTANT(-0.097043558542400002)*g[32]; + y[4] += tf*g[26] + tg*f[26]; + y[26] = tf*g[4] + tg*f[4]; + t = f[4] * g[26] + f[26] * g[4]; + y[14] += CONSTANT(0.190188269806999990)*t; + y[32] += CONSTANT(-0.097043558542400002)*t; + + // [4,27]: 13,31,35, + tf = CONSTANT(0.141757966610000010)*f[13] + CONSTANT(-0.121034582549000000)*f[31] + CONSTANT(0.062641347680800000)*f[35]; + tg = CONSTANT(0.141757966610000010)*g[13] + CONSTANT(-0.121034582549000000)*g[31] + CONSTANT(0.062641347680800000)*g[35]; + y[4] += tf*g[27] + tg*f[27]; + y[27] += tf*g[4] + tg*f[4]; + t = f[4] * g[27] + f[27] * g[4]; + y[13] += CONSTANT(0.141757966610000010)*t; + y[31] += CONSTANT(-0.121034582549000000)*t; + y[35] += CONSTANT(0.062641347680800000)*t; + + // [4,28]: 12,30,34, + tf = CONSTANT(0.141757966609000000)*f[12] + CONSTANT(-0.191372478254000000)*f[30] + CONSTANT(0.097043558538899996)*f[34]; + tg = CONSTANT(0.141757966609000000)*g[12] + CONSTANT(-0.191372478254000000)*g[30] + CONSTANT(0.097043558538899996)*g[34]; + y[4] += tf*g[28] + tg*f[28]; + y[28] = tf*g[4] + tg*f[4]; + t = f[4] * g[28] + f[28] * g[4]; + y[12] += CONSTANT(0.141757966609000000)*t; + y[30] += CONSTANT(-0.191372478254000000)*t; + y[34] += CONSTANT(0.097043558538899996)*t; + + // [4,29]: 13,15,31,33, + tf = CONSTANT(-0.065621187395699998)*f[13] + CONSTANT(-0.016943317729299998)*f[15] + CONSTANT(0.140070311613999990)*f[31] + CONSTANT(0.121034582549000000)*f[33]; + tg = CONSTANT(-0.065621187395699998)*g[13] + CONSTANT(-0.016943317729299998)*g[15] + CONSTANT(0.140070311613999990)*g[31] + CONSTANT(0.121034582549000000)*g[33]; + y[4] += tf*g[29] + tg*f[29]; + y[29] += tf*g[4] + tg*f[4]; + t = f[4] * g[29] + f[29] * g[4]; + y[13] += CONSTANT(-0.065621187395699998)*t; + y[15] += CONSTANT(-0.016943317729299998)*t; + y[31] += CONSTANT(0.140070311613999990)*t; + y[33] += CONSTANT(0.121034582549000000)*t; + + // [5,5]: 0,6,8,20,22, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.090111875786499998)*f[6] + CONSTANT(-0.156078347227999990)*f[8] + CONSTANT(-0.161197023870999990)*f[20] + CONSTANT(-0.180223751574000000)*f[22]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.090111875786499998)*g[6] + CONSTANT(-0.156078347227999990)*g[8] + CONSTANT(-0.161197023870999990)*g[20] + CONSTANT(-0.180223751574000000)*g[22]; + y[5] += tf*g[5] + tg*f[5]; + t = f[5] * g[5]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.090111875786499998)*t; + y[8] += CONSTANT(-0.156078347227999990)*t; + y[20] += CONSTANT(-0.161197023870999990)*t; + y[22] += CONSTANT(-0.180223751574000000)*t; + + // [5,10]: 3,13,15,31,33, + tf = CONSTANT(0.184674390919999990)*f[3] + CONSTANT(0.115164716490000000)*f[13] + CONSTANT(-0.148677009678999990)*f[15] + CONSTANT(-0.083004965974099995)*f[31] + CONSTANT(-0.179311220383999990)*f[33]; + tg = CONSTANT(0.184674390919999990)*g[3] + CONSTANT(0.115164716490000000)*g[13] + CONSTANT(-0.148677009678999990)*g[15] + CONSTANT(-0.083004965974099995)*g[31] + CONSTANT(-0.179311220383999990)*g[33]; + y[5] += tf*g[10] + tg*f[10]; + y[10] += tf*g[5] + tg*f[5]; + t = f[5] * g[10] + f[10] * g[5]; + y[3] += CONSTANT(0.184674390919999990)*t; + y[13] += CONSTANT(0.115164716490000000)*t; + y[15] += CONSTANT(-0.148677009678999990)*t; + y[31] += CONSTANT(-0.083004965974099995)*t; + y[33] += CONSTANT(-0.179311220383999990)*t; + + // [5,11]: 2,12,14,30,32, + tf = CONSTANT(0.233596680327000010)*f[2] + CONSTANT(0.059470803871800003)*f[12] + CONSTANT(-0.115164716491000000)*f[14] + CONSTANT(-0.169433177294000010)*f[30] + CONSTANT(-0.173617342585000000)*f[32]; + tg = CONSTANT(0.233596680327000010)*g[2] + CONSTANT(0.059470803871800003)*g[12] + CONSTANT(-0.115164716491000000)*g[14] + CONSTANT(-0.169433177294000010)*g[30] + CONSTANT(-0.173617342585000000)*g[32]; + y[5] += tf*g[11] + tg*f[11]; + y[11] += tf*g[5] + tg*f[5]; + t = f[5] * g[11] + f[11] * g[5]; + y[2] += CONSTANT(0.233596680327000010)*t; + y[12] += CONSTANT(0.059470803871800003)*t; + y[14] += CONSTANT(-0.115164716491000000)*t; + y[30] += CONSTANT(-0.169433177294000010)*t; + y[32] += CONSTANT(-0.173617342585000000)*t; + + // [5,14]: 9,1,27,29, + tf = CONSTANT(0.148677009677999990)*f[9] + CONSTANT(-0.184674390923000000)*f[1] + CONSTANT(0.179311220382000010)*f[27] + CONSTANT(0.083004965973399999)*f[29]; + tg = CONSTANT(0.148677009677999990)*g[9] + CONSTANT(-0.184674390923000000)*g[1] + CONSTANT(0.179311220382000010)*g[27] + CONSTANT(0.083004965973399999)*g[29]; + y[5] += tf*g[14] + tg*f[14]; + y[14] += tf*g[5] + tg*f[5]; + t = f[5] * g[14] + f[14] * g[5]; + y[9] += CONSTANT(0.148677009677999990)*t; + y[1] += CONSTANT(-0.184674390923000000)*t; + y[27] += CONSTANT(0.179311220382000010)*t; + y[29] += CONSTANT(0.083004965973399999)*t; + + // [5,17]: 8,22,24, + tf = CONSTANT(0.168583882832999990)*f[8] + CONSTANT(0.132725386548000010)*f[22] + CONSTANT(-0.140463346189000000)*f[24]; + tg = CONSTANT(0.168583882832999990)*g[8] + CONSTANT(0.132725386548000010)*g[22] + CONSTANT(-0.140463346189000000)*g[24]; + y[5] += tf*g[17] + tg*f[17]; + y[17] = tf*g[5] + tg*f[5]; + t = f[5] * g[17] + f[17] * g[5]; + y[8] += CONSTANT(0.168583882832999990)*t; + y[22] += CONSTANT(0.132725386548000010)*t; + y[24] += CONSTANT(-0.140463346189000000)*t; + + // [5,18]: 7,21,23, + tf = CONSTANT(0.180223751571000010)*f[7] + CONSTANT(0.090297865407399994)*f[21] + CONSTANT(-0.132725386549000010)*f[23]; + tg = CONSTANT(0.180223751571000010)*g[7] + CONSTANT(0.090297865407399994)*g[21] + CONSTANT(-0.132725386549000010)*g[23]; + y[5] += tf*g[18] + tg*f[18]; + y[18] += tf*g[5] + tg*f[5]; + t = f[5] * g[18] + f[18] * g[5]; + y[7] += CONSTANT(0.180223751571000010)*t; + y[21] += CONSTANT(0.090297865407399994)*t; + y[23] += CONSTANT(-0.132725386549000010)*t; + + // [5,19]: 6,8,20,22, + tf = CONSTANT(0.220728115440999990)*f[6] + CONSTANT(0.063718718433900007)*f[8] + CONSTANT(0.044869370061299998)*f[20] + CONSTANT(-0.090297865408399999)*f[22]; + tg = CONSTANT(0.220728115440999990)*g[6] + CONSTANT(0.063718718433900007)*g[8] + CONSTANT(0.044869370061299998)*g[20] + CONSTANT(-0.090297865408399999)*g[22]; + y[5] += tf*g[19] + tg*f[19]; + y[19] += tf*g[5] + tg*f[5]; + t = f[5] * g[19] + f[19] * g[5]; + y[6] += CONSTANT(0.220728115440999990)*t; + y[8] += CONSTANT(0.063718718433900007)*t; + y[20] += CONSTANT(0.044869370061299998)*t; + y[22] += CONSTANT(-0.090297865408399999)*t; + + // [5,26]: 15,33,35, + tf = CONSTANT(0.155288072035000000)*f[15] + CONSTANT(0.138662534056999990)*f[33] + CONSTANT(-0.132882365179999990)*f[35]; + tg = CONSTANT(0.155288072035000000)*g[15] + CONSTANT(0.138662534056999990)*g[33] + CONSTANT(-0.132882365179999990)*g[35]; + y[5] += tf*g[26] + tg*f[26]; + y[26] += tf*g[5] + tg*f[5]; + t = f[5] * g[26] + f[26] * g[5]; + y[15] += CONSTANT(0.155288072035000000)*t; + y[33] += CONSTANT(0.138662534056999990)*t; + y[35] += CONSTANT(-0.132882365179999990)*t; + + // [5,28]: 15,13,31,33, + tf = CONSTANT(0.044827805096399997)*f[15] + CONSTANT(0.173617342584000000)*f[13] + CONSTANT(0.074118242118699995)*f[31] + CONSTANT(-0.114366930522000000)*f[33]; + tg = CONSTANT(0.044827805096399997)*g[15] + CONSTANT(0.173617342584000000)*g[13] + CONSTANT(0.074118242118699995)*g[31] + CONSTANT(-0.114366930522000000)*g[33]; + y[5] += tf*g[28] + tg*f[28]; + y[28] += tf*g[5] + tg*f[5]; + t = f[5] * g[28] + f[28] * g[5]; + y[15] += CONSTANT(0.044827805096399997)*t; + y[13] += CONSTANT(0.173617342584000000)*t; + y[31] += CONSTANT(0.074118242118699995)*t; + y[33] += CONSTANT(-0.114366930522000000)*t; + + // [5,29]: 12,30,32, + tf = CONSTANT(0.214317900578999990)*f[12] + CONSTANT(0.036165998945399999)*f[30] + CONSTANT(-0.074118242119099995)*f[32]; + tg = CONSTANT(0.214317900578999990)*g[12] + CONSTANT(0.036165998945399999)*g[30] + CONSTANT(-0.074118242119099995)*g[32]; + y[5] += tf*g[29] + tg*f[29]; + y[29] += tf*g[5] + tg*f[5]; + t = f[5] * g[29] + f[29] * g[5]; + y[12] += CONSTANT(0.214317900578999990)*t; + y[30] += CONSTANT(0.036165998945399999)*t; + y[32] += CONSTANT(-0.074118242119099995)*t; + + // [5,32]: 9,27, + tf = CONSTANT(-0.044827805096799997)*f[9] + CONSTANT(0.114366930522000000)*f[27]; + tg = CONSTANT(-0.044827805096799997)*g[9] + CONSTANT(0.114366930522000000)*g[27]; + y[5] += tf*g[32] + tg*f[32]; + y[32] += tf*g[5] + tg*f[5]; + t = f[5] * g[32] + f[32] * g[5]; + y[9] += CONSTANT(-0.044827805096799997)*t; + y[27] += CONSTANT(0.114366930522000000)*t; + + // [5,34]: 9,27,25, + tf = CONSTANT(-0.155288072036000010)*f[9] + CONSTANT(-0.138662534059000000)*f[27] + CONSTANT(0.132882365179000010)*f[25]; + tg = CONSTANT(-0.155288072036000010)*g[9] + CONSTANT(-0.138662534059000000)*g[27] + CONSTANT(0.132882365179000010)*g[25]; + y[5] += tf*g[34] + tg*f[34]; + y[34] += tf*g[5] + tg*f[5]; + t = f[5] * g[34] + f[34] * g[5]; + y[9] += CONSTANT(-0.155288072036000010)*t; + y[27] += CONSTANT(-0.138662534059000000)*t; + y[25] += CONSTANT(0.132882365179000010)*t; + + // [6,6]: 0,6,20, + tf = CONSTANT(0.282094797560000000)*f[0] + CONSTANT(0.241795553185999990)*f[20]; + tg = CONSTANT(0.282094797560000000)*g[0] + CONSTANT(0.241795553185999990)*g[20]; + y[6] += tf*g[6] + tg*f[6]; + t = f[6] * g[6]; + y[0] += CONSTANT(0.282094797560000000)*t; + y[6] += CONSTANT(0.180223764527000010)*t; + y[20] += CONSTANT(0.241795553185999990)*t; + + // [7,7]: 6,0,8,20,22, + tf = CONSTANT(0.090111875786499998)*f[6] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.156078347227999990)*f[8] + CONSTANT(-0.161197023870999990)*f[20] + CONSTANT(0.180223751574000000)*f[22]; + tg = CONSTANT(0.090111875786499998)*g[6] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.156078347227999990)*g[8] + CONSTANT(-0.161197023870999990)*g[20] + CONSTANT(0.180223751574000000)*g[22]; + y[7] += tf*g[7] + tg*f[7]; + t = f[7] * g[7]; + y[6] += CONSTANT(0.090111875786499998)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.156078347227999990)*t; + y[20] += CONSTANT(-0.161197023870999990)*t; + y[22] += CONSTANT(0.180223751574000000)*t; + + // [7,10]: 9,1,11,27,29, + tf = CONSTANT(0.148677009678999990)*f[9] + CONSTANT(0.184674390919999990)*f[1] + CONSTANT(0.115164716490000000)*f[11] + CONSTANT(0.179311220383999990)*f[27] + CONSTANT(-0.083004965974099995)*f[29]; + tg = CONSTANT(0.148677009678999990)*g[9] + CONSTANT(0.184674390919999990)*g[1] + CONSTANT(0.115164716490000000)*g[11] + CONSTANT(0.179311220383999990)*g[27] + CONSTANT(-0.083004965974099995)*g[29]; + y[7] += tf*g[10] + tg*f[10]; + y[10] += tf*g[7] + tg*f[7]; + t = f[7] * g[10] + f[10] * g[7]; + y[9] += CONSTANT(0.148677009678999990)*t; + y[1] += CONSTANT(0.184674390919999990)*t; + y[11] += CONSTANT(0.115164716490000000)*t; + y[27] += CONSTANT(0.179311220383999990)*t; + y[29] += CONSTANT(-0.083004965974099995)*t; + + // [7,13]: 12,2,14,30,32, + tf = CONSTANT(0.059470803871800003)*f[12] + CONSTANT(0.233596680327000010)*f[2] + CONSTANT(0.115164716491000000)*f[14] + CONSTANT(-0.169433177294000010)*f[30] + CONSTANT(0.173617342585000000)*f[32]; + tg = CONSTANT(0.059470803871800003)*g[12] + CONSTANT(0.233596680327000010)*g[2] + CONSTANT(0.115164716491000000)*g[14] + CONSTANT(-0.169433177294000010)*g[30] + CONSTANT(0.173617342585000000)*g[32]; + y[7] += tf*g[13] + tg*f[13]; + y[13] += tf*g[7] + tg*f[7]; + t = f[7] * g[13] + f[13] * g[7]; + y[12] += CONSTANT(0.059470803871800003)*t; + y[2] += CONSTANT(0.233596680327000010)*t; + y[14] += CONSTANT(0.115164716491000000)*t; + y[30] += CONSTANT(-0.169433177294000010)*t; + y[32] += CONSTANT(0.173617342585000000)*t; + + // [7,14]: 3,15,31,33, + tf = CONSTANT(0.184674390923000000)*f[3] + CONSTANT(0.148677009677999990)*f[15] + CONSTANT(-0.083004965973399999)*f[31] + CONSTANT(0.179311220382000010)*f[33]; + tg = CONSTANT(0.184674390923000000)*g[3] + CONSTANT(0.148677009677999990)*g[15] + CONSTANT(-0.083004965973399999)*g[31] + CONSTANT(0.179311220382000010)*g[33]; + y[7] += tf*g[14] + tg*f[14]; + y[14] += tf*g[7] + tg*f[7]; + t = f[7] * g[14] + f[14] * g[7]; + y[3] += CONSTANT(0.184674390923000000)*t; + y[15] += CONSTANT(0.148677009677999990)*t; + y[31] += CONSTANT(-0.083004965973399999)*t; + y[33] += CONSTANT(0.179311220382000010)*t; + + // [7,17]: 16,4,18, + tf = CONSTANT(0.140463346187999990)*f[16] + CONSTANT(0.168583882835000000)*f[4] + CONSTANT(0.132725386549000010)*f[18]; + tg = CONSTANT(0.140463346187999990)*g[16] + CONSTANT(0.168583882835000000)*g[4] + CONSTANT(0.132725386549000010)*g[18]; + y[7] += tf*g[17] + tg*f[17]; + y[17] += tf*g[7] + tg*f[7]; + t = f[7] * g[17] + f[17] * g[7]; + y[16] += CONSTANT(0.140463346187999990)*t; + y[4] += CONSTANT(0.168583882835000000)*t; + y[18] += CONSTANT(0.132725386549000010)*t; + + // [7,21]: 8,20,6,22, + tf = CONSTANT(-0.063718718433900007)*f[8] + CONSTANT(0.044869370061299998)*f[20] + CONSTANT(0.220728115440999990)*f[6] + CONSTANT(0.090297865408399999)*f[22]; + tg = CONSTANT(-0.063718718433900007)*g[8] + CONSTANT(0.044869370061299998)*g[20] + CONSTANT(0.220728115440999990)*g[6] + CONSTANT(0.090297865408399999)*g[22]; + y[7] += tf*g[21] + tg*f[21]; + y[21] += tf*g[7] + tg*f[7]; + t = f[7] * g[21] + f[21] * g[7]; + y[8] += CONSTANT(-0.063718718433900007)*t; + y[20] += CONSTANT(0.044869370061299998)*t; + y[6] += CONSTANT(0.220728115440999990)*t; + y[22] += CONSTANT(0.090297865408399999)*t; + + // [7,23]: 8,22,24, + tf = CONSTANT(0.168583882832999990)*f[8] + CONSTANT(0.132725386548000010)*f[22] + CONSTANT(0.140463346189000000)*f[24]; + tg = CONSTANT(0.168583882832999990)*g[8] + CONSTANT(0.132725386548000010)*g[22] + CONSTANT(0.140463346189000000)*g[24]; + y[7] += tf*g[23] + tg*f[23]; + y[23] += tf*g[7] + tg*f[7]; + t = f[7] * g[23] + f[23] * g[7]; + y[8] += CONSTANT(0.168583882832999990)*t; + y[22] += CONSTANT(0.132725386548000010)*t; + y[24] += CONSTANT(0.140463346189000000)*t; + + // [7,26]: 9,25,27, + tf = CONSTANT(0.155288072035000000)*f[9] + CONSTANT(0.132882365179999990)*f[25] + CONSTANT(0.138662534056999990)*f[27]; + tg = CONSTANT(0.155288072035000000)*g[9] + CONSTANT(0.132882365179999990)*g[25] + CONSTANT(0.138662534056999990)*g[27]; + y[7] += tf*g[26] + tg*f[26]; + y[26] += tf*g[7] + tg*f[7]; + t = f[7] * g[26] + f[26] * g[7]; + y[9] += CONSTANT(0.155288072035000000)*t; + y[25] += CONSTANT(0.132882365179999990)*t; + y[27] += CONSTANT(0.138662534056999990)*t; + + // [7,28]: 27,11,9,29, + tf = CONSTANT(0.114366930522000000)*f[27] + CONSTANT(0.173617342584000000)*f[11] + CONSTANT(-0.044827805096399997)*f[9] + CONSTANT(0.074118242118699995)*f[29]; + tg = CONSTANT(0.114366930522000000)*g[27] + CONSTANT(0.173617342584000000)*g[11] + CONSTANT(-0.044827805096399997)*g[9] + CONSTANT(0.074118242118699995)*g[29]; + y[7] += tf*g[28] + tg*f[28]; + y[28] += tf*g[7] + tg*f[7]; + t = f[7] * g[28] + f[28] * g[7]; + y[27] += CONSTANT(0.114366930522000000)*t; + y[11] += CONSTANT(0.173617342584000000)*t; + y[9] += CONSTANT(-0.044827805096399997)*t; + y[29] += CONSTANT(0.074118242118699995)*t; + + // [7,31]: 30,12,32, + tf = CONSTANT(0.036165998945399999)*f[30] + CONSTANT(0.214317900578999990)*f[12] + CONSTANT(0.074118242119099995)*f[32]; + tg = CONSTANT(0.036165998945399999)*g[30] + CONSTANT(0.214317900578999990)*g[12] + CONSTANT(0.074118242119099995)*g[32]; + y[7] += tf*g[31] + tg*f[31]; + y[31] += tf*g[7] + tg*f[7]; + t = f[7] * g[31] + f[31] * g[7]; + y[30] += CONSTANT(0.036165998945399999)*t; + y[12] += CONSTANT(0.214317900578999990)*t; + y[32] += CONSTANT(0.074118242119099995)*t; + + // [7,32]: 15,33, + tf = CONSTANT(-0.044827805096799997)*f[15] + CONSTANT(0.114366930522000000)*f[33]; + tg = CONSTANT(-0.044827805096799997)*g[15] + CONSTANT(0.114366930522000000)*g[33]; + y[7] += tf*g[32] + tg*f[32]; + y[32] += tf*g[7] + tg*f[7]; + t = f[7] * g[32] + f[32] * g[7]; + y[15] += CONSTANT(-0.044827805096799997)*t; + y[33] += CONSTANT(0.114366930522000000)*t; + + // [7,34]: 15,33,35, + tf = CONSTANT(0.155288072036000010)*f[15] + CONSTANT(0.138662534059000000)*f[33] + CONSTANT(0.132882365179000010)*f[35]; + tg = CONSTANT(0.155288072036000010)*g[15] + CONSTANT(0.138662534059000000)*g[33] + CONSTANT(0.132882365179000010)*g[35]; + y[7] += tf*g[34] + tg*f[34]; + y[34] += tf*g[7] + tg*f[7]; + t = f[7] * g[34] + f[34] * g[7]; + y[15] += CONSTANT(0.155288072036000010)*t; + y[33] += CONSTANT(0.138662534059000000)*t; + y[35] += CONSTANT(0.132882365179000010)*t; + + // [8,8]: 0,6,20,24, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.180223751576000010)*f[6] + CONSTANT(0.040299255967500003)*f[20] + CONSTANT(0.238413613505999990)*f[24]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.180223751576000010)*g[6] + CONSTANT(0.040299255967500003)*g[20] + CONSTANT(0.238413613505999990)*g[24]; + y[8] += tf*g[8] + tg*f[8]; + t = f[8] * g[8]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[6] += CONSTANT(-0.180223751576000010)*t; + y[20] += CONSTANT(0.040299255967500003)*t; + y[24] += CONSTANT(0.238413613505999990)*t; + + // [8,9]: 1,11,25,29, + tf = CONSTANT(0.226179013155000000)*f[1] + CONSTANT(-0.094031597259499999)*f[11] + CONSTANT(0.245532000541000000)*f[25] + CONSTANT(0.016943317729199998)*f[29]; + tg = CONSTANT(0.226179013155000000)*g[1] + CONSTANT(-0.094031597259499999)*g[11] + CONSTANT(0.245532000541000000)*g[25] + CONSTANT(0.016943317729199998)*g[29]; + y[8] += tf*g[9] + tg*f[9]; + y[9] += tf*g[8] + tg*f[8]; + t = f[8] * g[9] + f[9] * g[8]; + y[1] += CONSTANT(0.226179013155000000)*t; + y[11] += CONSTANT(-0.094031597259499999)*t; + y[25] += CONSTANT(0.245532000541000000)*t; + y[29] += CONSTANT(0.016943317729199998)*t; + + // [8,14]: 2,12,30,34, + tf = CONSTANT(0.184674390919999990)*f[2] + CONSTANT(-0.188063194517999990)*f[12] + CONSTANT(0.053579475144400000)*f[30] + CONSTANT(0.190188269816000010)*f[34]; + tg = CONSTANT(0.184674390919999990)*g[2] + CONSTANT(-0.188063194517999990)*g[12] + CONSTANT(0.053579475144400000)*g[30] + CONSTANT(0.190188269816000010)*g[34]; + y[8] += tf*g[14] + tg*f[14]; + y[14] += tf*g[8] + tg*f[8]; + t = f[8] * g[14] + f[14] * g[8]; + y[2] += CONSTANT(0.184674390919999990)*t; + y[12] += CONSTANT(-0.188063194517999990)*t; + y[30] += CONSTANT(0.053579475144400000)*t; + y[34] += CONSTANT(0.190188269816000010)*t; + + // [8,15]: 13,3,31,35, + tf = CONSTANT(-0.094031597259499999)*f[13] + CONSTANT(0.226179013155000000)*f[3] + CONSTANT(0.016943317729199998)*f[31] + CONSTANT(0.245532000541000000)*f[35]; + tg = CONSTANT(-0.094031597259499999)*g[13] + CONSTANT(0.226179013155000000)*g[3] + CONSTANT(0.016943317729199998)*g[31] + CONSTANT(0.245532000541000000)*g[35]; + y[8] += tf*g[15] + tg*f[15]; + y[15] += tf*g[8] + tg*f[8]; + t = f[8] * g[15] + f[15] * g[8]; + y[13] += CONSTANT(-0.094031597259499999)*t; + y[3] += CONSTANT(0.226179013155000000)*t; + y[31] += CONSTANT(0.016943317729199998)*t; + y[35] += CONSTANT(0.245532000541000000)*t; + + // [8,22]: 6,20,24, + tf = CONSTANT(0.156078347226000000)*f[6] + CONSTANT(-0.190364615029000010)*f[20] + CONSTANT(-0.075080816691500005)*f[24]; + tg = CONSTANT(0.156078347226000000)*g[6] + CONSTANT(-0.190364615029000010)*g[20] + CONSTANT(-0.075080816691500005)*g[24]; + y[8] += tf*g[22] + tg*f[22]; + y[22] += tf*g[8] + tg*f[8]; + t = f[8] * g[22] + f[22] * g[8]; + y[6] += CONSTANT(0.156078347226000000)*t; + y[20] += CONSTANT(-0.190364615029000010)*t; + y[24] += CONSTANT(-0.075080816691500005)*t; + + // [8,26]: 10,28, + tf = CONSTANT(0.190188269806999990)*f[10] + CONSTANT(-0.097043558542400002)*f[28]; + tg = CONSTANT(0.190188269806999990)*g[10] + CONSTANT(-0.097043558542400002)*g[28]; + y[8] += tf*g[26] + tg*f[26]; + y[26] += tf*g[8] + tg*f[8]; + t = f[8] * g[26] + f[26] * g[8]; + y[10] += CONSTANT(0.190188269806999990)*t; + y[28] += CONSTANT(-0.097043558542400002)*t; + + // [8,27]: 25,11,29, + tf = CONSTANT(-0.062641347680800000)*f[25] + CONSTANT(0.141757966609000000)*f[11] + CONSTANT(-0.121034582550000010)*f[29]; + tg = CONSTANT(-0.062641347680800000)*g[25] + CONSTANT(0.141757966609000000)*g[11] + CONSTANT(-0.121034582550000010)*g[29]; + y[8] += tf*g[27] + tg*f[27]; + y[27] += tf*g[8] + tg*f[8]; + t = f[8] * g[27] + f[27] * g[8]; + y[25] += CONSTANT(-0.062641347680800000)*t; + y[11] += CONSTANT(0.141757966609000000)*t; + y[29] += CONSTANT(-0.121034582550000010)*t; + + // [8,32]: 30,12,34, + tf = CONSTANT(-0.191372478254000000)*f[30] + CONSTANT(0.141757966609000000)*f[12] + CONSTANT(-0.097043558538899996)*f[34]; + tg = CONSTANT(-0.191372478254000000)*g[30] + CONSTANT(0.141757966609000000)*g[12] + CONSTANT(-0.097043558538899996)*g[34]; + y[8] += tf*g[32] + tg*f[32]; + y[32] += tf*g[8] + tg*f[8]; + t = f[8] * g[32] + f[32] * g[8]; + y[30] += CONSTANT(-0.191372478254000000)*t; + y[12] += CONSTANT(0.141757966609000000)*t; + y[34] += CONSTANT(-0.097043558538899996)*t; + + // [8,33]: 13,31,35, + tf = CONSTANT(0.141757966609000000)*f[13] + CONSTANT(-0.121034582550000010)*f[31] + CONSTANT(-0.062641347680800000)*f[35]; + tg = CONSTANT(0.141757966609000000)*g[13] + CONSTANT(-0.121034582550000010)*g[31] + CONSTANT(-0.062641347680800000)*g[35]; + y[8] += tf*g[33] + tg*f[33]; + y[33] += tf*g[8] + tg*f[8]; + t = f[8] * g[33] + f[33] * g[8]; + y[13] += CONSTANT(0.141757966609000000)*t; + y[31] += CONSTANT(-0.121034582550000010)*t; + y[35] += CONSTANT(-0.062641347680800000)*t; + + // [9,9]: 6,0,20, + tf = CONSTANT(-0.210261043508000010)*f[6] + CONSTANT(0.282094791766999970)*f[0] + CONSTANT(0.076934943209800002)*f[20]; + tg = CONSTANT(-0.210261043508000010)*g[6] + CONSTANT(0.282094791766999970)*g[0] + CONSTANT(0.076934943209800002)*g[20]; + y[9] += tf*g[9] + tg*f[9]; + t = f[9] * g[9]; + y[6] += CONSTANT(-0.210261043508000010)*t; + y[0] += CONSTANT(0.282094791766999970)*t; + y[20] += CONSTANT(0.076934943209800002)*t; + + // [9,17]: 2,12,30, + tf = CONSTANT(0.162867503964999990)*f[2] + CONSTANT(-0.203550726872999990)*f[12] + CONSTANT(0.098140130728100003)*f[30]; + tg = CONSTANT(0.162867503964999990)*g[2] + CONSTANT(-0.203550726872999990)*g[12] + CONSTANT(0.098140130728100003)*g[30]; + y[9] += tf*g[17] + tg*f[17]; + y[17] += tf*g[9] + tg*f[9]; + t = f[9] * g[17] + f[17] * g[9]; + y[2] += CONSTANT(0.162867503964999990)*t; + y[12] += CONSTANT(-0.203550726872999990)*t; + y[30] += CONSTANT(0.098140130728100003)*t; + + // [9,18]: 3,13,31,35, + tf = CONSTANT(-0.043528171377799997)*f[3] + CONSTANT(0.133255230519000010)*f[13] + CONSTANT(-0.101584686310000010)*f[31] + CONSTANT(0.098140130731999994)*f[35]; + tg = CONSTANT(-0.043528171377799997)*g[3] + CONSTANT(0.133255230519000010)*g[13] + CONSTANT(-0.101584686310000010)*g[31] + CONSTANT(0.098140130731999994)*g[35]; + y[9] += tf*g[18] + tg*f[18]; + y[18] += tf*g[9] + tg*f[9]; + t = f[9] * g[18] + f[18] * g[9]; + y[3] += CONSTANT(-0.043528171377799997)*t; + y[13] += CONSTANT(0.133255230519000010)*t; + y[31] += CONSTANT(-0.101584686310000010)*t; + y[35] += CONSTANT(0.098140130731999994)*t; + + // [9,19]: 14,32,34, + tf = CONSTANT(-0.099322584600699995)*f[14] + CONSTANT(0.126698363970000010)*f[32] + CONSTANT(0.131668802180999990)*f[34]; + tg = CONSTANT(-0.099322584600699995)*g[14] + CONSTANT(0.126698363970000010)*g[32] + CONSTANT(0.131668802180999990)*g[34]; + y[9] += tf*g[19] + tg*f[19]; + y[19] += tf*g[9] + tg*f[9]; + t = f[9] * g[19] + f[19] * g[9]; + y[14] += CONSTANT(-0.099322584600699995)*t; + y[32] += CONSTANT(0.126698363970000010)*t; + y[34] += CONSTANT(0.131668802180999990)*t; + + // [9,22]: 1,11,25,29, + tf = CONSTANT(-0.043528171378199997)*f[1] + CONSTANT(0.133255230518000010)*f[11] + CONSTANT(-0.098140130732499997)*f[25] + CONSTANT(-0.101584686311000000)*f[29]; + tg = CONSTANT(-0.043528171378199997)*g[1] + CONSTANT(0.133255230518000010)*g[11] + CONSTANT(-0.098140130732499997)*g[25] + CONSTANT(-0.101584686311000000)*g[29]; + y[9] += tf*g[22] + tg*f[22]; + y[22] += tf*g[9] + tg*f[9]; + t = f[9] * g[22] + f[22] * g[9]; + y[1] += CONSTANT(-0.043528171378199997)*t; + y[11] += CONSTANT(0.133255230518000010)*t; + y[25] += CONSTANT(-0.098140130732499997)*t; + y[29] += CONSTANT(-0.101584686311000000)*t; + + // [9,27]: 6,20, + tf = CONSTANT(0.126792179874999990)*f[6] + CONSTANT(-0.196280261464999990)*f[20]; + tg = CONSTANT(0.126792179874999990)*g[6] + CONSTANT(-0.196280261464999990)*g[20]; + y[9] += tf*g[27] + tg*f[27]; + y[27] += tf*g[9] + tg*f[9]; + t = f[9] * g[27] + f[27] * g[9]; + y[6] += CONSTANT(0.126792179874999990)*t; + y[20] += CONSTANT(-0.196280261464999990)*t; + + // [10,10]: 0,20,24, + tf = CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.179514867494000000)*f[20] + CONSTANT(-0.151717754049000010)*f[24]; + tg = CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.179514867494000000)*g[20] + CONSTANT(-0.151717754049000010)*g[24]; + y[10] += tf*g[10] + tg*f[10]; + t = f[10] * g[10]; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.179514867494000000)*t; + y[24] += CONSTANT(-0.151717754049000010)*t; + + // [10,16]: 14,32, + tf = CONSTANT(0.151717754044999990)*f[14] + CONSTANT(-0.077413979111300005)*f[32]; + tg = CONSTANT(0.151717754044999990)*g[14] + CONSTANT(-0.077413979111300005)*g[32]; + y[10] += tf*g[16] + tg*f[16]; + y[16] += tf*g[10] + tg*f[10]; + t = f[10] * g[16] + f[16] * g[10]; + y[14] += CONSTANT(0.151717754044999990)*t; + y[32] += CONSTANT(-0.077413979111300005)*t; + + // [10,17]: 13,3,31,35, + tf = CONSTANT(0.067850242288900006)*f[13] + CONSTANT(0.199471140200000010)*f[3] + CONSTANT(-0.113793659091000000)*f[31] + CONSTANT(-0.149911525925999990)*f[35]; + tg = CONSTANT(0.067850242288900006)*g[13] + CONSTANT(0.199471140200000010)*g[3] + CONSTANT(-0.113793659091000000)*g[31] + CONSTANT(-0.149911525925999990)*g[35]; + y[10] += tf*g[17] + tg*f[17]; + y[17] += tf*g[10] + tg*f[10]; + t = f[10] * g[17] + f[17] * g[10]; + y[13] += CONSTANT(0.067850242288900006)*t; + y[3] += CONSTANT(0.199471140200000010)*t; + y[31] += CONSTANT(-0.113793659091000000)*t; + y[35] += CONSTANT(-0.149911525925999990)*t; + + // [10,18]: 12,2,30,34, + tf = CONSTANT(-0.044418410173299998)*f[12] + CONSTANT(0.213243618621000000)*f[2] + CONSTANT(-0.171327458205000000)*f[30] + CONSTANT(-0.101358691177000000)*f[34]; + tg = CONSTANT(-0.044418410173299998)*g[12] + CONSTANT(0.213243618621000000)*g[2] + CONSTANT(-0.171327458205000000)*g[30] + CONSTANT(-0.101358691177000000)*g[34]; + y[10] += tf*g[18] + tg*f[18]; + y[18] += tf*g[10] + tg*f[10]; + t = f[10] * g[18] + f[18] * g[10]; + y[12] += CONSTANT(-0.044418410173299998)*t; + y[2] += CONSTANT(0.213243618621000000)*t; + y[30] += CONSTANT(-0.171327458205000000)*t; + y[34] += CONSTANT(-0.101358691177000000)*t; + + // [10,19]: 3,15,13,31,33, + tf = CONSTANT(-0.075393004386799994)*f[3] + CONSTANT(0.099322584599600000)*f[15] + CONSTANT(0.102579924281000000)*f[13] + CONSTANT(0.097749909976500002)*f[31] + CONSTANT(-0.025339672794100002)*f[33]; + tg = CONSTANT(-0.075393004386799994)*g[3] + CONSTANT(0.099322584599600000)*g[15] + CONSTANT(0.102579924281000000)*g[13] + CONSTANT(0.097749909976500002)*g[31] + CONSTANT(-0.025339672794100002)*g[33]; + y[10] += tf*g[19] + tg*f[19]; + y[19] += tf*g[10] + tg*f[10]; + t = f[10] * g[19] + f[19] * g[10]; + y[3] += CONSTANT(-0.075393004386799994)*t; + y[15] += CONSTANT(0.099322584599600000)*t; + y[13] += CONSTANT(0.102579924281000000)*t; + y[31] += CONSTANT(0.097749909976500002)*t; + y[33] += CONSTANT(-0.025339672794100002)*t; + + // [10,21]: 11,1,9,27,29, + tf = CONSTANT(0.102579924281000000)*f[11] + CONSTANT(-0.075393004386799994)*f[1] + CONSTANT(-0.099322584599600000)*f[9] + CONSTANT(0.025339672794100002)*f[27] + CONSTANT(0.097749909976500002)*f[29]; + tg = CONSTANT(0.102579924281000000)*g[11] + CONSTANT(-0.075393004386799994)*g[1] + CONSTANT(-0.099322584599600000)*g[9] + CONSTANT(0.025339672794100002)*g[27] + CONSTANT(0.097749909976500002)*g[29]; + y[10] += tf*g[21] + tg*f[21]; + y[21] += tf*g[10] + tg*f[10]; + t = f[10] * g[21] + f[21] * g[10]; + y[11] += CONSTANT(0.102579924281000000)*t; + y[1] += CONSTANT(-0.075393004386799994)*t; + y[9] += CONSTANT(-0.099322584599600000)*t; + y[27] += CONSTANT(0.025339672794100002)*t; + y[29] += CONSTANT(0.097749909976500002)*t; + + // [10,23]: 11,1,25,29, + tf = CONSTANT(-0.067850242288900006)*f[11] + CONSTANT(-0.199471140200000010)*f[1] + CONSTANT(0.149911525925999990)*f[25] + CONSTANT(0.113793659091000000)*f[29]; + tg = CONSTANT(-0.067850242288900006)*g[11] + CONSTANT(-0.199471140200000010)*g[1] + CONSTANT(0.149911525925999990)*g[25] + CONSTANT(0.113793659091000000)*g[29]; + y[10] += tf*g[23] + tg*f[23]; + y[23] += tf*g[10] + tg*f[10]; + t = f[10] * g[23] + f[23] * g[10]; + y[11] += CONSTANT(-0.067850242288900006)*t; + y[1] += CONSTANT(-0.199471140200000010)*t; + y[25] += CONSTANT(0.149911525925999990)*t; + y[29] += CONSTANT(0.113793659091000000)*t; + + // [10,28]: 6,20,24, + tf = CONSTANT(0.190188269814000000)*f[6] + CONSTANT(-0.065426753820500005)*f[20] + CONSTANT(0.077413979109600004)*f[24]; + tg = CONSTANT(0.190188269814000000)*g[6] + CONSTANT(-0.065426753820500005)*g[20] + CONSTANT(0.077413979109600004)*g[24]; + y[10] += tf*g[28] + tg*f[28]; + y[28] += tf*g[10] + tg*f[10]; + t = f[10] * g[28] + f[28] * g[10]; + y[6] += CONSTANT(0.190188269814000000)*t; + y[20] += CONSTANT(-0.065426753820500005)*t; + y[24] += CONSTANT(0.077413979109600004)*t; + + // [11,11]: 0,6,8,20,22, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.126156626101000010)*f[6] + CONSTANT(-0.145673124078999990)*f[8] + CONSTANT(0.025644981070299999)*f[20] + CONSTANT(-0.114687841910000000)*f[22]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.126156626101000010)*g[6] + CONSTANT(-0.145673124078999990)*g[8] + CONSTANT(0.025644981070299999)*g[20] + CONSTANT(-0.114687841910000000)*g[22]; + y[11] += tf*g[11] + tg*f[11]; + t = f[11] * g[11]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[6] += CONSTANT(0.126156626101000010)*t; + y[8] += CONSTANT(-0.145673124078999990)*t; + y[20] += CONSTANT(0.025644981070299999)*t; + y[22] += CONSTANT(-0.114687841910000000)*t; + + // [11,16]: 15,33,35, + tf = CONSTANT(-0.117520066953000000)*f[15] + CONSTANT(0.119929220739999990)*f[33] + CONSTANT(0.134084945035999990)*f[35]; + tg = CONSTANT(-0.117520066953000000)*g[15] + CONSTANT(0.119929220739999990)*g[33] + CONSTANT(0.134084945035999990)*g[35]; + y[11] += tf*g[16] + tg*f[16]; + y[16] += tf*g[11] + tg*f[11]; + t = f[11] * g[16] + f[16] * g[11]; + y[15] += CONSTANT(-0.117520066953000000)*t; + y[33] += CONSTANT(0.119929220739999990)*t; + y[35] += CONSTANT(0.134084945035999990)*t; + + // [11,18]: 3,13,15,31,33, + tf = CONSTANT(0.168583882834000000)*f[3] + CONSTANT(0.114687841909000000)*f[13] + CONSTANT(-0.133255230519000010)*f[15] + CONSTANT(0.075189952564900006)*f[31] + CONSTANT(-0.101990215611000000)*f[33]; + tg = CONSTANT(0.168583882834000000)*g[3] + CONSTANT(0.114687841909000000)*g[13] + CONSTANT(-0.133255230519000010)*g[15] + CONSTANT(0.075189952564900006)*g[31] + CONSTANT(-0.101990215611000000)*g[33]; + y[11] += tf*g[18] + tg*f[18]; + y[18] += tf*g[11] + tg*f[11]; + t = f[11] * g[18] + f[18] * g[11]; + y[3] += CONSTANT(0.168583882834000000)*t; + y[13] += CONSTANT(0.114687841909000000)*t; + y[15] += CONSTANT(-0.133255230519000010)*t; + y[31] += CONSTANT(0.075189952564900006)*t; + y[33] += CONSTANT(-0.101990215611000000)*t; + + // [11,19]: 2,14,12,30,32, + tf = CONSTANT(0.238413613504000000)*f[2] + CONSTANT(-0.102579924282000000)*f[14] + CONSTANT(0.099322584599300004)*f[12] + CONSTANT(0.009577496073830001)*f[30] + CONSTANT(-0.104682806112000000)*f[32]; + tg = CONSTANT(0.238413613504000000)*g[2] + CONSTANT(-0.102579924282000000)*g[14] + CONSTANT(0.099322584599300004)*g[12] + CONSTANT(0.009577496073830001)*g[30] + CONSTANT(-0.104682806112000000)*g[32]; + y[11] += tf*g[19] + tg*f[19]; + y[19] += tf*g[11] + tg*f[11]; + t = f[11] * g[19] + f[19] * g[11]; + y[2] += CONSTANT(0.238413613504000000)*t; + y[14] += CONSTANT(-0.102579924282000000)*t; + y[12] += CONSTANT(0.099322584599300004)*t; + y[30] += CONSTANT(0.009577496073830001)*t; + y[32] += CONSTANT(-0.104682806112000000)*t; + + // [11,24]: 9,25,27, + tf = CONSTANT(0.117520066950999990)*f[9] + CONSTANT(-0.134084945037000000)*f[25] + CONSTANT(-0.119929220742000010)*f[27]; + tg = CONSTANT(0.117520066950999990)*g[9] + CONSTANT(-0.134084945037000000)*g[25] + CONSTANT(-0.119929220742000010)*g[27]; + y[11] += tf*g[24] + tg*f[24]; + y[24] += tf*g[11] + tg*f[11]; + t = f[11] * g[24] + f[24] * g[11]; + y[9] += CONSTANT(0.117520066950999990)*t; + y[25] += CONSTANT(-0.134084945037000000)*t; + y[27] += CONSTANT(-0.119929220742000010)*t; + + // [11,29]: 6,20,22,8, + tf = CONSTANT(0.227318461243000010)*f[6] + CONSTANT(0.086019920779800002)*f[20] + CONSTANT(-0.075189952565200002)*f[22] + CONSTANT(0.065621187395299999)*f[8]; + tg = CONSTANT(0.227318461243000010)*g[6] + CONSTANT(0.086019920779800002)*g[20] + CONSTANT(-0.075189952565200002)*g[22] + CONSTANT(0.065621187395299999)*g[8]; + y[11] += tf*g[29] + tg*f[29]; + y[29] += tf*g[11] + tg*f[11]; + t = f[11] * g[29] + f[29] * g[11]; + y[6] += CONSTANT(0.227318461243000010)*t; + y[20] += CONSTANT(0.086019920779800002)*t; + y[22] += CONSTANT(-0.075189952565200002)*t; + y[8] += CONSTANT(0.065621187395299999)*t; + + // [12,12]: 0,6,20, + tf = CONSTANT(0.282094799871999980)*f[0] + CONSTANT(0.168208852954000010)*f[6] + CONSTANT(0.153869910786000010)*f[20]; + tg = CONSTANT(0.282094799871999980)*g[0] + CONSTANT(0.168208852954000010)*g[6] + CONSTANT(0.153869910786000010)*g[20]; + y[12] += tf*g[12] + tg*f[12]; + t = f[12] * g[12]; + y[0] += CONSTANT(0.282094799871999980)*t; + y[6] += CONSTANT(0.168208852954000010)*t; + y[20] += CONSTANT(0.153869910786000010)*t; + + // [12,30]: 20,6, + tf = CONSTANT(0.148373961712999990)*f[20] + CONSTANT(0.239614719999000000)*f[6]; + tg = CONSTANT(0.148373961712999990)*g[20] + CONSTANT(0.239614719999000000)*g[6]; + y[12] += tf*g[30] + tg*f[30]; + y[30] += tf*g[12] + tg*f[12]; + t = f[12] * g[30] + f[30] * g[12]; + y[20] += CONSTANT(0.148373961712999990)*t; + y[6] += CONSTANT(0.239614719999000000)*t; + + // [13,13]: 0,8,6,20,22, + tf = CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.145673124078999990)*f[8] + CONSTANT(0.126156626101000010)*f[6] + CONSTANT(0.025644981070299999)*f[20] + CONSTANT(0.114687841910000000)*f[22]; + tg = CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.145673124078999990)*g[8] + CONSTANT(0.126156626101000010)*g[6] + CONSTANT(0.025644981070299999)*g[20] + CONSTANT(0.114687841910000000)*g[22]; + y[13] += tf*g[13] + tg*f[13]; + t = f[13] * g[13]; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.145673124078999990)*t; + y[6] += CONSTANT(0.126156626101000010)*t; + y[20] += CONSTANT(0.025644981070299999)*t; + y[22] += CONSTANT(0.114687841910000000)*t; + + // [13,16]: 9,25,27, + tf = CONSTANT(-0.117520066953000000)*f[9] + CONSTANT(-0.134084945035999990)*f[25] + CONSTANT(0.119929220739999990)*f[27]; + tg = CONSTANT(-0.117520066953000000)*g[9] + CONSTANT(-0.134084945035999990)*g[25] + CONSTANT(0.119929220739999990)*g[27]; + y[13] += tf*g[16] + tg*f[16]; + y[16] += tf*g[13] + tg*f[13]; + t = f[13] * g[16] + f[16] * g[13]; + y[9] += CONSTANT(-0.117520066953000000)*t; + y[25] += CONSTANT(-0.134084945035999990)*t; + y[27] += CONSTANT(0.119929220739999990)*t; + + // [13,21]: 2,12,14,30,32, + tf = CONSTANT(0.238413613504000000)*f[2] + CONSTANT(0.099322584599300004)*f[12] + CONSTANT(0.102579924282000000)*f[14] + CONSTANT(0.009577496073830001)*f[30] + CONSTANT(0.104682806112000000)*f[32]; + tg = CONSTANT(0.238413613504000000)*g[2] + CONSTANT(0.099322584599300004)*g[12] + CONSTANT(0.102579924282000000)*g[14] + CONSTANT(0.009577496073830001)*g[30] + CONSTANT(0.104682806112000000)*g[32]; + y[13] += tf*g[21] + tg*f[21]; + y[21] += tf*g[13] + tg*f[13]; + t = f[13] * g[21] + f[21] * g[13]; + y[2] += CONSTANT(0.238413613504000000)*t; + y[12] += CONSTANT(0.099322584599300004)*t; + y[14] += CONSTANT(0.102579924282000000)*t; + y[30] += CONSTANT(0.009577496073830001)*t; + y[32] += CONSTANT(0.104682806112000000)*t; + + // [13,24]: 15,33,35, + tf = CONSTANT(-0.117520066950999990)*f[15] + CONSTANT(0.119929220742000010)*f[33] + CONSTANT(-0.134084945037000000)*f[35]; + tg = CONSTANT(-0.117520066950999990)*g[15] + CONSTANT(0.119929220742000010)*g[33] + CONSTANT(-0.134084945037000000)*g[35]; + y[13] += tf*g[24] + tg*f[24]; + y[24] += tf*g[13] + tg*f[13]; + t = f[13] * g[24] + f[24] * g[13]; + y[15] += CONSTANT(-0.117520066950999990)*t; + y[33] += CONSTANT(0.119929220742000010)*t; + y[35] += CONSTANT(-0.134084945037000000)*t; + + // [13,31]: 6,22,20,8, + tf = CONSTANT(0.227318461243000010)*f[6] + CONSTANT(0.075189952565200002)*f[22] + CONSTANT(0.086019920779800002)*f[20] + CONSTANT(-0.065621187395299999)*f[8]; + tg = CONSTANT(0.227318461243000010)*g[6] + CONSTANT(0.075189952565200002)*g[22] + CONSTANT(0.086019920779800002)*g[20] + CONSTANT(-0.065621187395299999)*g[8]; + y[13] += tf*g[31] + tg*f[31]; + y[31] += tf*g[13] + tg*f[13]; + t = f[13] * g[31] + f[31] * g[13]; + y[6] += CONSTANT(0.227318461243000010)*t; + y[22] += CONSTANT(0.075189952565200002)*t; + y[20] += CONSTANT(0.086019920779800002)*t; + y[8] += CONSTANT(-0.065621187395299999)*t; + + // [14,14]: 0,20,24, + tf = CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.179514867494000000)*f[20] + CONSTANT(0.151717754049000010)*f[24]; + tg = CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.179514867494000000)*g[20] + CONSTANT(0.151717754049000010)*g[24]; + y[14] += tf*g[14] + tg*f[14]; + t = f[14] * g[14]; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.179514867494000000)*t; + y[24] += CONSTANT(0.151717754049000010)*t; + + // [14,17]: 11,1,25,29, + tf = CONSTANT(0.067850242288500007)*f[11] + CONSTANT(0.199471140196999990)*f[1] + CONSTANT(0.149911525925999990)*f[25] + CONSTANT(-0.113793659092000000)*f[29]; + tg = CONSTANT(0.067850242288500007)*g[11] + CONSTANT(0.199471140196999990)*g[1] + CONSTANT(0.149911525925999990)*g[25] + CONSTANT(-0.113793659092000000)*g[29]; + y[14] += tf*g[17] + tg*f[17]; + y[17] += tf*g[14] + tg*f[14]; + t = f[14] * g[17] + f[17] * g[14]; + y[11] += CONSTANT(0.067850242288500007)*t; + y[1] += CONSTANT(0.199471140196999990)*t; + y[25] += CONSTANT(0.149911525925999990)*t; + y[29] += CONSTANT(-0.113793659092000000)*t; + + // [14,22]: 12,2,30,34, + tf = CONSTANT(-0.044418410173299998)*f[12] + CONSTANT(0.213243618621000000)*f[2] + CONSTANT(-0.171327458205000000)*f[30] + CONSTANT(0.101358691177000000)*f[34]; + tg = CONSTANT(-0.044418410173299998)*g[12] + CONSTANT(0.213243618621000000)*g[2] + CONSTANT(-0.171327458205000000)*g[30] + CONSTANT(0.101358691177000000)*g[34]; + y[14] += tf*g[22] + tg*f[22]; + y[22] += tf*g[14] + tg*f[14]; + t = f[14] * g[22] + f[22] * g[14]; + y[12] += CONSTANT(-0.044418410173299998)*t; + y[2] += CONSTANT(0.213243618621000000)*t; + y[30] += CONSTANT(-0.171327458205000000)*t; + y[34] += CONSTANT(0.101358691177000000)*t; + + // [14,23]: 13,3,31,35, + tf = CONSTANT(0.067850242288500007)*f[13] + CONSTANT(0.199471140196999990)*f[3] + CONSTANT(-0.113793659092000000)*f[31] + CONSTANT(0.149911525925999990)*f[35]; + tg = CONSTANT(0.067850242288500007)*g[13] + CONSTANT(0.199471140196999990)*g[3] + CONSTANT(-0.113793659092000000)*g[31] + CONSTANT(0.149911525925999990)*g[35]; + y[14] += tf*g[23] + tg*f[23]; + y[23] += tf*g[14] + tg*f[14]; + t = f[14] * g[23] + f[23] * g[14]; + y[13] += CONSTANT(0.067850242288500007)*t; + y[3] += CONSTANT(0.199471140196999990)*t; + y[31] += CONSTANT(-0.113793659092000000)*t; + y[35] += CONSTANT(0.149911525925999990)*t; + + // [14,32]: 20,6,24, + tf = CONSTANT(-0.065426753820500005)*f[20] + CONSTANT(0.190188269814000000)*f[6] + CONSTANT(-0.077413979109600004)*f[24]; + tg = CONSTANT(-0.065426753820500005)*g[20] + CONSTANT(0.190188269814000000)*g[6] + CONSTANT(-0.077413979109600004)*g[24]; + y[14] += tf*g[32] + tg*f[32]; + y[32] += tf*g[14] + tg*f[14]; + t = f[14] * g[32] + f[32] * g[14]; + y[20] += CONSTANT(-0.065426753820500005)*t; + y[6] += CONSTANT(0.190188269814000000)*t; + y[24] += CONSTANT(-0.077413979109600004)*t; + + // [15,15]: 0,6,20, + tf = CONSTANT(0.282094791766999970)*f[0] + CONSTANT(-0.210261043508000010)*f[6] + CONSTANT(0.076934943209800002)*f[20]; + tg = CONSTANT(0.282094791766999970)*g[0] + CONSTANT(-0.210261043508000010)*g[6] + CONSTANT(0.076934943209800002)*g[20]; + y[15] += tf*g[15] + tg*f[15]; + t = f[15] * g[15]; + y[0] += CONSTANT(0.282094791766999970)*t; + y[6] += CONSTANT(-0.210261043508000010)*t; + y[20] += CONSTANT(0.076934943209800002)*t; + + // [15,21]: 14,32,34, + tf = CONSTANT(-0.099322584600699995)*f[14] + CONSTANT(0.126698363970000010)*f[32] + CONSTANT(-0.131668802180999990)*f[34]; + tg = CONSTANT(-0.099322584600699995)*g[14] + CONSTANT(0.126698363970000010)*g[32] + CONSTANT(-0.131668802180999990)*g[34]; + y[15] += tf*g[21] + tg*f[21]; + y[21] += tf*g[15] + tg*f[15]; + t = f[15] * g[21] + f[21] * g[15]; + y[14] += CONSTANT(-0.099322584600699995)*t; + y[32] += CONSTANT(0.126698363970000010)*t; + y[34] += CONSTANT(-0.131668802180999990)*t; + + // [15,22]: 13,3,31,35, + tf = CONSTANT(0.133255230518000010)*f[13] + CONSTANT(-0.043528171378199997)*f[3] + CONSTANT(-0.101584686311000000)*f[31] + CONSTANT(-0.098140130732499997)*f[35]; + tg = CONSTANT(0.133255230518000010)*g[13] + CONSTANT(-0.043528171378199997)*g[3] + CONSTANT(-0.101584686311000000)*g[31] + CONSTANT(-0.098140130732499997)*g[35]; + y[15] += tf*g[22] + tg*f[22]; + y[22] += tf*g[15] + tg*f[15]; + t = f[15] * g[22] + f[22] * g[15]; + y[13] += CONSTANT(0.133255230518000010)*t; + y[3] += CONSTANT(-0.043528171378199997)*t; + y[31] += CONSTANT(-0.101584686311000000)*t; + y[35] += CONSTANT(-0.098140130732499997)*t; + + // [15,23]: 12,2,30, + tf = CONSTANT(-0.203550726872999990)*f[12] + CONSTANT(0.162867503964999990)*f[2] + CONSTANT(0.098140130728100003)*f[30]; + tg = CONSTANT(-0.203550726872999990)*g[12] + CONSTANT(0.162867503964999990)*g[2] + CONSTANT(0.098140130728100003)*g[30]; + y[15] += tf*g[23] + tg*f[23]; + y[23] += tf*g[15] + tg*f[15]; + t = f[15] * g[23] + f[23] * g[15]; + y[12] += CONSTANT(-0.203550726872999990)*t; + y[2] += CONSTANT(0.162867503964999990)*t; + y[30] += CONSTANT(0.098140130728100003)*t; + + // [15,33]: 6,20, + tf = CONSTANT(0.126792179874999990)*f[6] + CONSTANT(-0.196280261464999990)*f[20]; + tg = CONSTANT(0.126792179874999990)*g[6] + CONSTANT(-0.196280261464999990)*g[20]; + y[15] += tf*g[33] + tg*f[33]; + y[33] += tf*g[15] + tg*f[15]; + t = f[15] * g[33] + f[33] * g[15]; + y[6] += CONSTANT(0.126792179874999990)*t; + y[20] += CONSTANT(-0.196280261464999990)*t; + + // [16,16]: 0,6,20, + tf = CONSTANT(0.282094791763999990)*f[0] + CONSTANT(-0.229375683829000000)*f[6] + CONSTANT(0.106525305981000000)*f[20]; + tg = CONSTANT(0.282094791763999990)*g[0] + CONSTANT(-0.229375683829000000)*g[6] + CONSTANT(0.106525305981000000)*g[20]; + y[16] += tf*g[16] + tg*f[16]; + t = f[16] * g[16]; + y[0] += CONSTANT(0.282094791763999990)*t; + y[6] += CONSTANT(-0.229375683829000000)*t; + y[20] += CONSTANT(0.106525305981000000)*t; + + // [16,18]: 8,22, + tf = CONSTANT(-0.075080816693699995)*f[8] + CONSTANT(0.135045473380000000)*f[22]; + tg = CONSTANT(-0.075080816693699995)*g[8] + CONSTANT(0.135045473380000000)*g[22]; + y[16] += tf*g[18] + tg*f[18]; + y[18] += tf*g[16] + tg*f[16]; + t = f[16] * g[18] + f[18] * g[16]; + y[8] += CONSTANT(-0.075080816693699995)*t; + y[22] += CONSTANT(0.135045473380000000)*t; + + // [16,23]: 19,5, + tf = CONSTANT(-0.119098912754999990)*f[19] + CONSTANT(0.140463346187999990)*f[5]; + tg = CONSTANT(-0.119098912754999990)*g[19] + CONSTANT(0.140463346187999990)*g[5]; + y[16] += tf*g[23] + tg*f[23]; + y[23] += tf*g[16] + tg*f[16]; + t = f[16] * g[23] + f[23] * g[16]; + y[19] += CONSTANT(-0.119098912754999990)*t; + y[5] += CONSTANT(0.140463346187999990)*t; + + // [16,26]: 12,2,30, + tf = CONSTANT(-0.207723503645000000)*f[12] + CONSTANT(0.147319200325000010)*f[2] + CONSTANT(0.130197596199999990)*f[30]; + tg = CONSTANT(-0.207723503645000000)*g[12] + CONSTANT(0.147319200325000010)*g[2] + CONSTANT(0.130197596199999990)*g[30]; + y[16] += tf*g[26] + tg*f[26]; + y[26] += tf*g[16] + tg*f[16]; + t = f[16] * g[26] + f[26] * g[16]; + y[12] += CONSTANT(-0.207723503645000000)*t; + y[2] += CONSTANT(0.147319200325000010)*t; + y[30] += CONSTANT(0.130197596199999990)*t; + + // [16,28]: 14,32, + tf = CONSTANT(-0.077413979111300005)*f[14] + CONSTANT(0.128376561115000010)*f[32]; + tg = CONSTANT(-0.077413979111300005)*g[14] + CONSTANT(0.128376561115000010)*g[32]; + y[16] += tf*g[28] + tg*f[28]; + y[28] += tf*g[16] + tg*f[16]; + t = f[16] * g[28] + f[28] * g[16]; + y[14] += CONSTANT(-0.077413979111300005)*t; + y[32] += CONSTANT(0.128376561115000010)*t; + + // [16,29]: 15,33,35, + tf = CONSTANT(0.035835708931099997)*f[15] + CONSTANT(-0.118853600623999990)*f[33] + CONSTANT(-0.053152946071899999)*f[35]; + tg = CONSTANT(0.035835708931099997)*g[15] + CONSTANT(-0.118853600623999990)*g[33] + CONSTANT(-0.053152946071899999)*g[35]; + y[16] += tf*g[29] + tg*f[29]; + y[29] += tf*g[16] + tg*f[16]; + t = f[16] * g[29] + f[29] * g[16]; + y[15] += CONSTANT(0.035835708931099997)*t; + y[33] += CONSTANT(-0.118853600623999990)*t; + y[35] += CONSTANT(-0.053152946071899999)*t; + + // [16,31]: 27,9,25, + tf = CONSTANT(-0.118853600623999990)*f[27] + CONSTANT(0.035835708931099997)*f[9] + CONSTANT(0.053152946071899999)*f[25]; + tg = CONSTANT(-0.118853600623999990)*g[27] + CONSTANT(0.035835708931099997)*g[9] + CONSTANT(0.053152946071899999)*g[25]; + y[16] += tf*g[31] + tg*f[31]; + y[31] += tf*g[16] + tg*f[16]; + t = f[16] * g[31] + f[31] * g[16]; + y[27] += CONSTANT(-0.118853600623999990)*t; + y[9] += CONSTANT(0.035835708931099997)*t; + y[25] += CONSTANT(0.053152946071899999)*t; + + // [17,17]: 0,6,20, + tf = CONSTANT(0.282094791768999990)*f[0] + CONSTANT(-0.057343920955899998)*f[6] + CONSTANT(-0.159787958979000000)*f[20]; + tg = CONSTANT(0.282094791768999990)*g[0] + CONSTANT(-0.057343920955899998)*g[6] + CONSTANT(-0.159787958979000000)*g[20]; + y[17] += tf*g[17] + tg*f[17]; + t = f[17] * g[17]; + y[0] += CONSTANT(0.282094791768999990)*t; + y[6] += CONSTANT(-0.057343920955899998)*t; + y[20] += CONSTANT(-0.159787958979000000)*t; + + // [17,19]: 8,22,24, + tf = CONSTANT(-0.112621225039000000)*f[8] + CONSTANT(0.045015157794100001)*f[22] + CONSTANT(0.119098912753000000)*f[24]; + tg = CONSTANT(-0.112621225039000000)*g[8] + CONSTANT(0.045015157794100001)*g[22] + CONSTANT(0.119098912753000000)*g[24]; + y[17] += tf*g[19] + tg*f[19]; + y[19] += tf*g[17] + tg*f[17]; + t = f[17] * g[19] + f[19] * g[17]; + y[8] += CONSTANT(-0.112621225039000000)*t; + y[22] += CONSTANT(0.045015157794100001)*t; + y[24] += CONSTANT(0.119098912753000000)*t; + + // [17,21]: 16,4,18, + tf = CONSTANT(-0.119098912754999990)*f[16] + CONSTANT(-0.112621225039000000)*f[4] + CONSTANT(0.045015157794399997)*f[18]; + tg = CONSTANT(-0.119098912754999990)*g[16] + CONSTANT(-0.112621225039000000)*g[4] + CONSTANT(0.045015157794399997)*g[18]; + y[17] += tf*g[21] + tg*f[21]; + y[21] += tf*g[17] + tg*f[17]; + t = f[17] * g[21] + f[21] * g[17]; + y[16] += CONSTANT(-0.119098912754999990)*t; + y[4] += CONSTANT(-0.112621225039000000)*t; + y[18] += CONSTANT(0.045015157794399997)*t; + + // [17,26]: 3,13,31, + tf = CONSTANT(0.208340811096000000)*f[3] + CONSTANT(0.029982305185199998)*f[13] + CONSTANT(-0.118853600623999990)*f[31]; + tg = CONSTANT(0.208340811096000000)*g[3] + CONSTANT(0.029982305185199998)*g[13] + CONSTANT(-0.118853600623999990)*g[31]; + y[17] += tf*g[26] + tg*f[26]; + y[26] += tf*g[17] + tg*f[17]; + t = f[17] * g[26] + f[26] * g[17]; + y[3] += CONSTANT(0.208340811096000000)*t; + y[13] += CONSTANT(0.029982305185199998)*t; + y[31] += CONSTANT(-0.118853600623999990)*t; + + // [17,27]: 12,2,30, + tf = CONSTANT(-0.103861751821000010)*f[12] + CONSTANT(0.196425600433000000)*f[2] + CONSTANT(-0.130197596204999990)*f[30]; + tg = CONSTANT(-0.103861751821000010)*g[12] + CONSTANT(0.196425600433000000)*g[2] + CONSTANT(-0.130197596204999990)*g[30]; + y[17] += tf*g[27] + tg*f[27]; + y[27] += tf*g[17] + tg*f[17]; + t = f[17] * g[27] + f[27] * g[17]; + y[12] += CONSTANT(-0.103861751821000010)*t; + y[2] += CONSTANT(0.196425600433000000)*t; + y[30] += CONSTANT(-0.130197596204999990)*t; + + // [17,28]: 13,3,31,35, + tf = CONSTANT(0.121172043789000000)*f[13] + CONSTANT(-0.060142811686500000)*f[3] + CONSTANT(0.034310079156700000)*f[31] + CONSTANT(0.099440056652200001)*f[35]; + tg = CONSTANT(0.121172043789000000)*g[13] + CONSTANT(-0.060142811686500000)*g[3] + CONSTANT(0.034310079156700000)*g[31] + CONSTANT(0.099440056652200001)*g[35]; + y[17] += tf*g[28] + tg*f[28]; + y[28] += tf*g[17] + tg*f[17]; + t = f[17] * g[28] + f[28] * g[17]; + y[13] += CONSTANT(0.121172043789000000)*t; + y[3] += CONSTANT(-0.060142811686500000)*t; + y[31] += CONSTANT(0.034310079156700000)*t; + y[35] += CONSTANT(0.099440056652200001)*t; + + // [17,32]: 11,1,25,29, + tf = CONSTANT(0.121172043788000010)*f[11] + CONSTANT(-0.060142811686900000)*f[1] + CONSTANT(-0.099440056652700004)*f[25] + CONSTANT(0.034310079156599997)*f[29]; + tg = CONSTANT(0.121172043788000010)*g[11] + CONSTANT(-0.060142811686900000)*g[1] + CONSTANT(-0.099440056652700004)*g[25] + CONSTANT(0.034310079156599997)*g[29]; + y[17] += tf*g[32] + tg*f[32]; + y[32] += tf*g[17] + tg*f[17]; + t = f[17] * g[32] + f[32] * g[17]; + y[11] += CONSTANT(0.121172043788000010)*t; + y[1] += CONSTANT(-0.060142811686900000)*t; + y[25] += CONSTANT(-0.099440056652700004)*t; + y[29] += CONSTANT(0.034310079156599997)*t; + + // [17,34]: 29,11,1, + tf = CONSTANT(0.118853600623000000)*f[29] + CONSTANT(-0.029982305185400002)*f[11] + CONSTANT(-0.208340811100000000)*f[1]; + tg = CONSTANT(0.118853600623000000)*g[29] + CONSTANT(-0.029982305185400002)*g[11] + CONSTANT(-0.208340811100000000)*g[1]; + y[17] += tf*g[34] + tg*f[34]; + y[34] += tf*g[17] + tg*f[17]; + t = f[17] * g[34] + f[34] * g[17]; + y[29] += CONSTANT(0.118853600623000000)*t; + y[11] += CONSTANT(-0.029982305185400002)*t; + y[1] += CONSTANT(-0.208340811100000000)*t; + + // [18,18]: 6,0,20,24, + tf = CONSTANT(0.065535909662600006)*f[6] + CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.083698454702400005)*f[20] + CONSTANT(-0.135045473384000000)*f[24]; + tg = CONSTANT(0.065535909662600006)*g[6] + CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.083698454702400005)*g[20] + CONSTANT(-0.135045473384000000)*g[24]; + y[18] += tf*g[18] + tg*f[18]; + t = f[18] * g[18]; + y[6] += CONSTANT(0.065535909662600006)*t; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.083698454702400005)*t; + y[24] += CONSTANT(-0.135045473384000000)*t; + + // [18,19]: 7,21,23, + tf = CONSTANT(0.090297865407399994)*f[7] + CONSTANT(0.102084782359000000)*f[21] + CONSTANT(-0.045015157794399997)*f[23]; + tg = CONSTANT(0.090297865407399994)*g[7] + CONSTANT(0.102084782359000000)*g[21] + CONSTANT(-0.045015157794399997)*g[23]; + y[18] += tf*g[19] + tg*f[19]; + y[19] += tf*g[18] + tg*f[18]; + t = f[18] * g[19] + f[19] * g[18]; + y[7] += CONSTANT(0.090297865407399994)*t; + y[21] += CONSTANT(0.102084782359000000)*t; + y[23] += CONSTANT(-0.045015157794399997)*t; + + // [18,25]: 15,33, + tf = CONSTANT(-0.098140130731999994)*f[15] + CONSTANT(0.130197596202000000)*f[33]; + tg = CONSTANT(-0.098140130731999994)*g[15] + CONSTANT(0.130197596202000000)*g[33]; + y[18] += tf*g[25] + tg*f[25]; + y[25] += tf*g[18] + tg*f[18]; + t = f[18] * g[25] + f[25] * g[18]; + y[15] += CONSTANT(-0.098140130731999994)*t; + y[33] += CONSTANT(0.130197596202000000)*t; + + // [18,26]: 14,32, + tf = CONSTANT(0.101358691174000000)*f[14] + CONSTANT(0.084042186965900004)*f[32]; + tg = CONSTANT(0.101358691174000000)*g[14] + CONSTANT(0.084042186965900004)*g[32]; + y[18] += tf*g[26] + tg*f[26]; + y[26] += tf*g[18] + tg*f[18]; + t = f[18] * g[26] + f[26] * g[18]; + y[14] += CONSTANT(0.101358691174000000)*t; + y[32] += CONSTANT(0.084042186965900004)*t; + + // [18,27]: 13,3,35, + tf = CONSTANT(0.101990215611000000)*f[13] + CONSTANT(0.183739324705999990)*f[3] + CONSTANT(-0.130197596202000000)*f[35]; + tg = CONSTANT(0.101990215611000000)*g[13] + CONSTANT(0.183739324705999990)*g[3] + CONSTANT(-0.130197596202000000)*g[35]; + y[18] += tf*g[27] + tg*f[27]; + y[27] += tf*g[18] + tg*f[18]; + t = f[18] * g[27] + f[27] * g[18]; + y[13] += CONSTANT(0.101990215611000000)*t; + y[3] += CONSTANT(0.183739324705999990)*t; + y[35] += CONSTANT(-0.130197596202000000)*t; + + // [18,28]: 2,12,30,34, + tf = CONSTANT(0.225033795606000010)*f[2] + CONSTANT(0.022664492358099999)*f[12] + CONSTANT(-0.099440056651100006)*f[30] + CONSTANT(-0.084042186968800003)*f[34]; + tg = CONSTANT(0.225033795606000010)*g[2] + CONSTANT(0.022664492358099999)*g[12] + CONSTANT(-0.099440056651100006)*g[30] + CONSTANT(-0.084042186968800003)*g[34]; + y[18] += tf*g[28] + tg*f[28]; + y[28] += tf*g[18] + tg*f[18]; + t = f[18] * g[28] + f[28] * g[18]; + y[2] += CONSTANT(0.225033795606000010)*t; + y[12] += CONSTANT(0.022664492358099999)*t; + y[30] += CONSTANT(-0.099440056651100006)*t; + y[34] += CONSTANT(-0.084042186968800003)*t; + + // [18,29]: 3,13,15,31, + tf = CONSTANT(-0.085054779966799998)*f[3] + CONSTANT(0.075189952564900006)*f[13] + CONSTANT(0.101584686310000010)*f[15] + CONSTANT(0.097043558538999999)*f[31]; + tg = CONSTANT(-0.085054779966799998)*g[3] + CONSTANT(0.075189952564900006)*g[13] + CONSTANT(0.101584686310000010)*g[15] + CONSTANT(0.097043558538999999)*g[31]; + y[18] += tf*g[29] + tg*f[29]; + y[29] += tf*g[18] + tg*f[18]; + t = f[18] * g[29] + f[29] * g[18]; + y[3] += CONSTANT(-0.085054779966799998)*t; + y[13] += CONSTANT(0.075189952564900006)*t; + y[15] += CONSTANT(0.101584686310000010)*t; + y[31] += CONSTANT(0.097043558538999999)*t; + + // [19,19]: 6,8,0,20,22, + tf = CONSTANT(0.139263808033999990)*f[6] + CONSTANT(-0.141889406570999990)*f[8] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.068480553847200004)*f[20] + CONSTANT(-0.102084782360000000)*f[22]; + tg = CONSTANT(0.139263808033999990)*g[6] + CONSTANT(-0.141889406570999990)*g[8] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.068480553847200004)*g[20] + CONSTANT(-0.102084782360000000)*g[22]; + y[19] += tf*g[19] + tg*f[19]; + t = f[19] * g[19]; + y[6] += CONSTANT(0.139263808033999990)*t; + y[8] += CONSTANT(-0.141889406570999990)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[20] += CONSTANT(0.068480553847200004)*t; + y[22] += CONSTANT(-0.102084782360000000)*t; + + // [19,25]: 34, + tf = CONSTANT(-0.130197596205999990)*f[34]; + tg = CONSTANT(-0.130197596205999990)*g[34]; + y[19] += tf*g[25] + tg*f[25]; + y[25] += tf*g[19] + tg*f[19]; + t = f[19] * g[25] + f[25] * g[19]; + y[34] += CONSTANT(-0.130197596205999990)*t; + + // [19,26]: 15,35, + tf = CONSTANT(-0.131668802182000000)*f[15] + CONSTANT(0.130197596204999990)*f[35]; + tg = CONSTANT(-0.131668802182000000)*g[15] + CONSTANT(0.130197596204999990)*g[35]; + y[19] += tf*g[26] + tg*f[26]; + y[26] += tf*g[19] + tg*f[19]; + t = f[19] * g[26] + f[26] * g[19]; + y[15] += CONSTANT(-0.131668802182000000)*t; + y[35] += CONSTANT(0.130197596204999990)*t; + + // [19,27]: 14,32, + tf = CONSTANT(0.025339672793899998)*f[14] + CONSTANT(0.084042186967699994)*f[32]; + tg = CONSTANT(0.025339672793899998)*g[14] + CONSTANT(0.084042186967699994)*g[32]; + y[19] += tf*g[27] + tg*f[27]; + y[27] += tf*g[19] + tg*f[19]; + t = f[19] * g[27] + f[27] * g[19]; + y[14] += CONSTANT(0.025339672793899998)*t; + y[32] += CONSTANT(0.084042186967699994)*t; + + // [19,28]: 13,3,15,31,33, + tf = CONSTANT(0.104682806111000000)*f[13] + CONSTANT(0.159122922869999990)*f[3] + CONSTANT(-0.126698363970000010)*f[15] + CONSTANT(0.090775936911399999)*f[31] + CONSTANT(-0.084042186968400004)*f[33]; + tg = CONSTANT(0.104682806111000000)*g[13] + CONSTANT(0.159122922869999990)*g[3] + CONSTANT(-0.126698363970000010)*g[15] + CONSTANT(0.090775936911399999)*g[31] + CONSTANT(-0.084042186968400004)*g[33]; + y[19] += tf*g[28] + tg*f[28]; + y[28] += tf*g[19] + tg*f[19]; + t = f[19] * g[28] + f[28] * g[19]; + y[13] += CONSTANT(0.104682806111000000)*t; + y[3] += CONSTANT(0.159122922869999990)*t; + y[15] += CONSTANT(-0.126698363970000010)*t; + y[31] += CONSTANT(0.090775936911399999)*t; + y[33] += CONSTANT(-0.084042186968400004)*t; + + // [19,29]: 12,14,2,30,32, + tf = CONSTANT(0.115089467124000010)*f[12] + CONSTANT(-0.097749909977199997)*f[14] + CONSTANT(0.240571246744999990)*f[2] + CONSTANT(0.053152946072499999)*f[30] + CONSTANT(-0.090775936912099994)*f[32]; + tg = CONSTANT(0.115089467124000010)*g[12] + CONSTANT(-0.097749909977199997)*g[14] + CONSTANT(0.240571246744999990)*g[2] + CONSTANT(0.053152946072499999)*g[30] + CONSTANT(-0.090775936912099994)*g[32]; + y[19] += tf*g[29] + tg*f[29]; + y[29] += tf*g[19] + tg*f[19]; + t = f[19] * g[29] + f[29] * g[19]; + y[12] += CONSTANT(0.115089467124000010)*t; + y[14] += CONSTANT(-0.097749909977199997)*t; + y[2] += CONSTANT(0.240571246744999990)*t; + y[30] += CONSTANT(0.053152946072499999)*t; + y[32] += CONSTANT(-0.090775936912099994)*t; + + // [20,20]: 6,0,20, + tf = CONSTANT(0.163839797503000010)*f[6] + CONSTANT(0.282094802232000010)*f[0]; + tg = CONSTANT(0.163839797503000010)*g[6] + CONSTANT(0.282094802232000010)*g[0]; + y[20] += tf*g[20] + tg*f[20]; + t = f[20] * g[20]; + y[6] += CONSTANT(0.163839797503000010)*t; + y[0] += CONSTANT(0.282094802232000010)*t; + y[20] += CONSTANT(0.136961139005999990)*t; + + // [21,21]: 6,20,0,8,22, + tf = CONSTANT(0.139263808033999990)*f[6] + CONSTANT(0.068480553847200004)*f[20] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(0.141889406570999990)*f[8] + CONSTANT(0.102084782360000000)*f[22]; + tg = CONSTANT(0.139263808033999990)*g[6] + CONSTANT(0.068480553847200004)*g[20] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(0.141889406570999990)*g[8] + CONSTANT(0.102084782360000000)*g[22]; + y[21] += tf*g[21] + tg*f[21]; + t = f[21] * g[21]; + y[6] += CONSTANT(0.139263808033999990)*t; + y[20] += CONSTANT(0.068480553847200004)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[8] += CONSTANT(0.141889406570999990)*t; + y[22] += CONSTANT(0.102084782360000000)*t; + + // [21,23]: 8,22,24, + tf = CONSTANT(-0.112621225039000000)*f[8] + CONSTANT(0.045015157794100001)*f[22] + CONSTANT(-0.119098912753000000)*f[24]; + tg = CONSTANT(-0.112621225039000000)*g[8] + CONSTANT(0.045015157794100001)*g[22] + CONSTANT(-0.119098912753000000)*g[24]; + y[21] += tf*g[23] + tg*f[23]; + y[23] += tf*g[21] + tg*f[21]; + t = f[21] * g[23] + f[23] * g[21]; + y[8] += CONSTANT(-0.112621225039000000)*t; + y[22] += CONSTANT(0.045015157794100001)*t; + y[24] += CONSTANT(-0.119098912753000000)*t; + + // [21,26]: 9,25, + tf = CONSTANT(-0.131668802182000000)*f[9] + CONSTANT(-0.130197596204999990)*f[25]; + tg = CONSTANT(-0.131668802182000000)*g[9] + CONSTANT(-0.130197596204999990)*g[25]; + y[21] += tf*g[26] + tg*f[26]; + y[26] += tf*g[21] + tg*f[21]; + t = f[21] * g[26] + f[26] * g[21]; + y[9] += CONSTANT(-0.131668802182000000)*t; + y[25] += CONSTANT(-0.130197596204999990)*t; + + // [21,28]: 27,1,11,9,29, + tf = CONSTANT(0.084042186968400004)*f[27] + CONSTANT(0.159122922869999990)*f[1] + CONSTANT(0.104682806111000000)*f[11] + CONSTANT(0.126698363970000010)*f[9] + CONSTANT(0.090775936911399999)*f[29]; + tg = CONSTANT(0.084042186968400004)*g[27] + CONSTANT(0.159122922869999990)*g[1] + CONSTANT(0.104682806111000000)*g[11] + CONSTANT(0.126698363970000010)*g[9] + CONSTANT(0.090775936911399999)*g[29]; + y[21] += tf*g[28] + tg*f[28]; + y[28] += tf*g[21] + tg*f[21]; + t = f[21] * g[28] + f[28] * g[21]; + y[27] += CONSTANT(0.084042186968400004)*t; + y[1] += CONSTANT(0.159122922869999990)*t; + y[11] += CONSTANT(0.104682806111000000)*t; + y[9] += CONSTANT(0.126698363970000010)*t; + y[29] += CONSTANT(0.090775936911399999)*t; + + // [21,31]: 14,2,30,12,32, + tf = CONSTANT(0.097749909977199997)*f[14] + CONSTANT(0.240571246744999990)*f[2] + CONSTANT(0.053152946072499999)*f[30] + CONSTANT(0.115089467124000010)*f[12] + CONSTANT(0.090775936912099994)*f[32]; + tg = CONSTANT(0.097749909977199997)*g[14] + CONSTANT(0.240571246744999990)*g[2] + CONSTANT(0.053152946072499999)*g[30] + CONSTANT(0.115089467124000010)*g[12] + CONSTANT(0.090775936912099994)*g[32]; + y[21] += tf*g[31] + tg*f[31]; + y[31] += tf*g[21] + tg*f[21]; + t = f[21] * g[31] + f[31] * g[21]; + y[14] += CONSTANT(0.097749909977199997)*t; + y[2] += CONSTANT(0.240571246744999990)*t; + y[30] += CONSTANT(0.053152946072499999)*t; + y[12] += CONSTANT(0.115089467124000010)*t; + y[32] += CONSTANT(0.090775936912099994)*t; + + // [21,33]: 32,14, + tf = CONSTANT(0.084042186967699994)*f[32] + CONSTANT(0.025339672793899998)*f[14]; + tg = CONSTANT(0.084042186967699994)*g[32] + CONSTANT(0.025339672793899998)*g[14]; + y[21] += tf*g[33] + tg*f[33]; + y[33] += tf*g[21] + tg*f[21]; + t = f[21] * g[33] + f[33] * g[21]; + y[32] += CONSTANT(0.084042186967699994)*t; + y[14] += CONSTANT(0.025339672793899998)*t; + + // [21,34]: 35, + tf = CONSTANT(-0.130197596205999990)*f[35]; + tg = CONSTANT(-0.130197596205999990)*g[35]; + y[21] += tf*g[34] + tg*f[34]; + y[34] += tf*g[21] + tg*f[21]; + t = f[21] * g[34] + f[34] * g[21]; + y[35] += CONSTANT(-0.130197596205999990)*t; + + // [22,22]: 6,20,0,24, + tf = CONSTANT(0.065535909662600006)*f[6] + CONSTANT(-0.083698454702400005)*f[20] + CONSTANT(0.282094791771999980)*f[0] + CONSTANT(0.135045473384000000)*f[24]; + tg = CONSTANT(0.065535909662600006)*g[6] + CONSTANT(-0.083698454702400005)*g[20] + CONSTANT(0.282094791771999980)*g[0] + CONSTANT(0.135045473384000000)*g[24]; + y[22] += tf*g[22] + tg*f[22]; + t = f[22] * g[22]; + y[6] += CONSTANT(0.065535909662600006)*t; + y[20] += CONSTANT(-0.083698454702400005)*t; + y[0] += CONSTANT(0.282094791771999980)*t; + y[24] += CONSTANT(0.135045473384000000)*t; + + // [22,26]: 10,28, + tf = CONSTANT(0.101358691174000000)*f[10] + CONSTANT(0.084042186965900004)*f[28]; + tg = CONSTANT(0.101358691174000000)*g[10] + CONSTANT(0.084042186965900004)*g[28]; + y[22] += tf*g[26] + tg*f[26]; + y[26] += tf*g[22] + tg*f[22]; + t = f[22] * g[26] + f[26] * g[22]; + y[10] += CONSTANT(0.101358691174000000)*t; + y[28] += CONSTANT(0.084042186965900004)*t; + + // [22,27]: 1,11,25, + tf = CONSTANT(0.183739324704000010)*f[1] + CONSTANT(0.101990215611000000)*f[11] + CONSTANT(0.130197596200999990)*f[25]; + tg = CONSTANT(0.183739324704000010)*g[1] + CONSTANT(0.101990215611000000)*g[11] + CONSTANT(0.130197596200999990)*g[25]; + y[22] += tf*g[27] + tg*f[27]; + y[27] += tf*g[22] + tg*f[22]; + t = f[22] * g[27] + f[27] * g[22]; + y[1] += CONSTANT(0.183739324704000010)*t; + y[11] += CONSTANT(0.101990215611000000)*t; + y[25] += CONSTANT(0.130197596200999990)*t; + + // [22,32]: 2,30,12,34, + tf = CONSTANT(0.225033795606000010)*f[2] + CONSTANT(-0.099440056651100006)*f[30] + CONSTANT(0.022664492358099999)*f[12] + CONSTANT(0.084042186968800003)*f[34]; + tg = CONSTANT(0.225033795606000010)*g[2] + CONSTANT(-0.099440056651100006)*g[30] + CONSTANT(0.022664492358099999)*g[12] + CONSTANT(0.084042186968800003)*g[34]; + y[22] += tf*g[32] + tg*f[32]; + y[32] += tf*g[22] + tg*f[22]; + t = f[22] * g[32] + f[32] * g[22]; + y[2] += CONSTANT(0.225033795606000010)*t; + y[30] += CONSTANT(-0.099440056651100006)*t; + y[12] += CONSTANT(0.022664492358099999)*t; + y[34] += CONSTANT(0.084042186968800003)*t; + + // [22,33]: 3,13,35, + tf = CONSTANT(0.183739324704000010)*f[3] + CONSTANT(0.101990215611000000)*f[13] + CONSTANT(0.130197596200999990)*f[35]; + tg = CONSTANT(0.183739324704000010)*g[3] + CONSTANT(0.101990215611000000)*g[13] + CONSTANT(0.130197596200999990)*g[35]; + y[22] += tf*g[33] + tg*f[33]; + y[33] += tf*g[22] + tg*f[22]; + t = f[22] * g[33] + f[33] * g[22]; + y[3] += CONSTANT(0.183739324704000010)*t; + y[13] += CONSTANT(0.101990215611000000)*t; + y[35] += CONSTANT(0.130197596200999990)*t; + + // [23,23]: 6,20,0, + tf = CONSTANT(-0.057343920955899998)*f[6] + CONSTANT(-0.159787958979000000)*f[20] + CONSTANT(0.282094791768999990)*f[0]; + tg = CONSTANT(-0.057343920955899998)*g[6] + CONSTANT(-0.159787958979000000)*g[20] + CONSTANT(0.282094791768999990)*g[0]; + y[23] += tf*g[23] + tg*f[23]; + t = f[23] * g[23]; + y[6] += CONSTANT(-0.057343920955899998)*t; + y[20] += CONSTANT(-0.159787958979000000)*t; + y[0] += CONSTANT(0.282094791768999990)*t; + + // [23,26]: 1,11,29, + tf = CONSTANT(0.208340811096000000)*f[1] + CONSTANT(0.029982305185199998)*f[11] + CONSTANT(-0.118853600623999990)*f[29]; + tg = CONSTANT(0.208340811096000000)*g[1] + CONSTANT(0.029982305185199998)*g[11] + CONSTANT(-0.118853600623999990)*g[29]; + y[23] += tf*g[26] + tg*f[26]; + y[26] += tf*g[23] + tg*f[23]; + t = f[23] * g[26] + f[26] * g[23]; + y[1] += CONSTANT(0.208340811096000000)*t; + y[11] += CONSTANT(0.029982305185199998)*t; + y[29] += CONSTANT(-0.118853600623999990)*t; + + // [23,28]: 25,11,1,29, + tf = CONSTANT(-0.099440056652200001)*f[25] + CONSTANT(-0.121172043789000000)*f[11] + CONSTANT(0.060142811686500000)*f[1] + CONSTANT(-0.034310079156700000)*f[29]; + tg = CONSTANT(-0.099440056652200001)*g[25] + CONSTANT(-0.121172043789000000)*g[11] + CONSTANT(0.060142811686500000)*g[1] + CONSTANT(-0.034310079156700000)*g[29]; + y[23] += tf*g[28] + tg*f[28]; + y[28] += tf*g[23] + tg*f[23]; + t = f[23] * g[28] + f[28] * g[23]; + y[25] += CONSTANT(-0.099440056652200001)*t; + y[11] += CONSTANT(-0.121172043789000000)*t; + y[1] += CONSTANT(0.060142811686500000)*t; + y[29] += CONSTANT(-0.034310079156700000)*t; + + // [23,32]: 31,13,3,35, + tf = CONSTANT(0.034310079156599997)*f[31] + CONSTANT(0.121172043788000010)*f[13] + CONSTANT(-0.060142811686900000)*f[3] + CONSTANT(-0.099440056652700004)*f[35]; + tg = CONSTANT(0.034310079156599997)*g[31] + CONSTANT(0.121172043788000010)*g[13] + CONSTANT(-0.060142811686900000)*g[3] + CONSTANT(-0.099440056652700004)*g[35]; + y[23] += tf*g[32] + tg*f[32]; + y[32] += tf*g[23] + tg*f[23]; + t = f[23] * g[32] + f[32] * g[23]; + y[31] += CONSTANT(0.034310079156599997)*t; + y[13] += CONSTANT(0.121172043788000010)*t; + y[3] += CONSTANT(-0.060142811686900000)*t; + y[35] += CONSTANT(-0.099440056652700004)*t; + + // [23,33]: 2,30,12, + tf = CONSTANT(0.196425600433000000)*f[2] + CONSTANT(-0.130197596204999990)*f[30] + CONSTANT(-0.103861751821000010)*f[12]; + tg = CONSTANT(0.196425600433000000)*g[2] + CONSTANT(-0.130197596204999990)*g[30] + CONSTANT(-0.103861751821000010)*g[12]; + y[23] += tf*g[33] + tg*f[33]; + y[33] += tf*g[23] + tg*f[23]; + t = f[23] * g[33] + f[33] * g[23]; + y[2] += CONSTANT(0.196425600433000000)*t; + y[30] += CONSTANT(-0.130197596204999990)*t; + y[12] += CONSTANT(-0.103861751821000010)*t; + + // [23,34]: 3,13,31, + tf = CONSTANT(0.208340811100000000)*f[3] + CONSTANT(0.029982305185400002)*f[13] + CONSTANT(-0.118853600623000000)*f[31]; + tg = CONSTANT(0.208340811100000000)*g[3] + CONSTANT(0.029982305185400002)*g[13] + CONSTANT(-0.118853600623000000)*g[31]; + y[23] += tf*g[34] + tg*f[34]; + y[34] += tf*g[23] + tg*f[23]; + t = f[23] * g[34] + f[34] * g[23]; + y[3] += CONSTANT(0.208340811100000000)*t; + y[13] += CONSTANT(0.029982305185400002)*t; + y[31] += CONSTANT(-0.118853600623000000)*t; + + // [24,24]: 6,0,20, + tf = CONSTANT(-0.229375683829000000)*f[6] + CONSTANT(0.282094791763999990)*f[0] + CONSTANT(0.106525305981000000)*f[20]; + tg = CONSTANT(-0.229375683829000000)*g[6] + CONSTANT(0.282094791763999990)*g[0] + CONSTANT(0.106525305981000000)*g[20]; + y[24] += tf*g[24] + tg*f[24]; + t = f[24] * g[24]; + y[6] += CONSTANT(-0.229375683829000000)*t; + y[0] += CONSTANT(0.282094791763999990)*t; + y[20] += CONSTANT(0.106525305981000000)*t; + + // [24,29]: 9,27,25, + tf = CONSTANT(-0.035835708931400000)*f[9] + CONSTANT(0.118853600623000000)*f[27] + CONSTANT(0.053152946071199997)*f[25]; + tg = CONSTANT(-0.035835708931400000)*g[9] + CONSTANT(0.118853600623000000)*g[27] + CONSTANT(0.053152946071199997)*g[25]; + y[24] += tf*g[29] + tg*f[29]; + y[29] += tf*g[24] + tg*f[24]; + t = f[24] * g[29] + f[29] * g[24]; + y[9] += CONSTANT(-0.035835708931400000)*t; + y[27] += CONSTANT(0.118853600623000000)*t; + y[25] += CONSTANT(0.053152946071199997)*t; + + // [24,31]: 15,33,35, + tf = CONSTANT(0.035835708931400000)*f[15] + CONSTANT(-0.118853600623000000)*f[33] + CONSTANT(0.053152946071199997)*f[35]; + tg = CONSTANT(0.035835708931400000)*g[15] + CONSTANT(-0.118853600623000000)*g[33] + CONSTANT(0.053152946071199997)*g[35]; + y[24] += tf*g[31] + tg*f[31]; + y[31] += tf*g[24] + tg*f[24]; + t = f[24] * g[31] + f[31] * g[24]; + y[15] += CONSTANT(0.035835708931400000)*t; + y[33] += CONSTANT(-0.118853600623000000)*t; + y[35] += CONSTANT(0.053152946071199997)*t; + + // [24,34]: 12,30,2, + tf = CONSTANT(-0.207723503645000000)*f[12] + CONSTANT(0.130197596199999990)*f[30] + CONSTANT(0.147319200325000010)*f[2]; + tg = CONSTANT(-0.207723503645000000)*g[12] + CONSTANT(0.130197596199999990)*g[30] + CONSTANT(0.147319200325000010)*g[2]; + y[24] += tf*g[34] + tg*f[34]; + y[34] += tf*g[24] + tg*f[24]; + t = f[24] * g[34] + f[34] * g[24]; + y[12] += CONSTANT(-0.207723503645000000)*t; + y[30] += CONSTANT(0.130197596199999990)*t; + y[2] += CONSTANT(0.147319200325000010)*t; + + // [25,25]: 0,6,20, + tf = CONSTANT(0.282094791761999970)*f[0] + CONSTANT(-0.242608896358999990)*f[6] + CONSTANT(0.130197596198000000)*f[20]; + tg = CONSTANT(0.282094791761999970)*g[0] + CONSTANT(-0.242608896358999990)*g[6] + CONSTANT(0.130197596198000000)*g[20]; + y[25] += tf*g[25] + tg*f[25]; + t = f[25] * g[25]; + y[0] += CONSTANT(0.282094791761999970)*t; + y[6] += CONSTANT(-0.242608896358999990)*t; + y[20] += CONSTANT(0.130197596198000000)*t; + + // [26,26]: 6,20,0, + tf = CONSTANT(-0.097043558542400002)*f[6] + CONSTANT(-0.130197596207000000)*f[20] + CONSTANT(0.282094791766000000)*f[0]; + tg = CONSTANT(-0.097043558542400002)*g[6] + CONSTANT(-0.130197596207000000)*g[20] + CONSTANT(0.282094791766000000)*g[0]; + y[26] += tf*g[26] + tg*f[26]; + t = f[26] * g[26]; + y[6] += CONSTANT(-0.097043558542400002)*t; + y[20] += CONSTANT(-0.130197596207000000)*t; + y[0] += CONSTANT(0.282094791766000000)*t; + + // [27,27]: 0,20,6, + tf = CONSTANT(0.282094791770000020)*f[0] + CONSTANT(-0.130197596204999990)*f[20] + CONSTANT(0.016173926423100001)*f[6]; + tg = CONSTANT(0.282094791770000020)*g[0] + CONSTANT(-0.130197596204999990)*g[20] + CONSTANT(0.016173926423100001)*g[6]; + y[27] += tf*g[27] + tg*f[27]; + t = f[27] * g[27]; + y[0] += CONSTANT(0.282094791770000020)*t; + y[20] += CONSTANT(-0.130197596204999990)*t; + y[6] += CONSTANT(0.016173926423100001)*t; + + // [28,28]: 6,0,20,24, + tf = CONSTANT(0.097043558538800007)*f[6] + CONSTANT(0.282094791771999980)*f[0] + CONSTANT(-0.021699599367299999)*f[20] + CONSTANT(-0.128376561118000000)*f[24]; + tg = CONSTANT(0.097043558538800007)*g[6] + CONSTANT(0.282094791771999980)*g[0] + CONSTANT(-0.021699599367299999)*g[20] + CONSTANT(-0.128376561118000000)*g[24]; + y[28] += tf*g[28] + tg*f[28]; + t = f[28] * g[28]; + y[6] += CONSTANT(0.097043558538800007)*t; + y[0] += CONSTANT(0.282094791771999980)*t; + y[20] += CONSTANT(-0.021699599367299999)*t; + y[24] += CONSTANT(-0.128376561118000000)*t; + + // [29,29]: 20,6,0,22,8, + tf = CONSTANT(0.086798397468799998)*f[20] + CONSTANT(0.145565337808999990)*f[6] + CONSTANT(0.282094791773999990)*f[0] + CONSTANT(-0.097043558539500002)*f[22] + CONSTANT(-0.140070311615000000)*f[8]; + tg = CONSTANT(0.086798397468799998)*g[20] + CONSTANT(0.145565337808999990)*g[6] + CONSTANT(0.282094791773999990)*g[0] + CONSTANT(-0.097043558539500002)*g[22] + CONSTANT(-0.140070311615000000)*g[8]; + y[29] += tf*g[29] + tg*f[29]; + t = f[29] * g[29]; + y[20] += CONSTANT(0.086798397468799998)*t; + y[6] += CONSTANT(0.145565337808999990)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + y[22] += CONSTANT(-0.097043558539500002)*t; + y[8] += CONSTANT(-0.140070311615000000)*t; + + // [30,30]: 0,20,6, + tf = CONSTANT(0.282094804531000000)*f[0] + CONSTANT(0.130197634486000000)*f[20] + CONSTANT(0.161739292769000010)*f[6]; + tg = CONSTANT(0.282094804531000000)*g[0] + CONSTANT(0.130197634486000000)*g[20] + CONSTANT(0.161739292769000010)*g[6]; + y[30] += tf*g[30] + tg*f[30]; + t = f[30] * g[30]; + y[0] += CONSTANT(0.282094804531000000)*t; + y[20] += CONSTANT(0.130197634486000000)*t; + y[6] += CONSTANT(0.161739292769000010)*t; + + // [31,31]: 6,8,20,22,0, + tf = CONSTANT(0.145565337808999990)*f[6] + CONSTANT(0.140070311615000000)*f[8] + CONSTANT(0.086798397468799998)*f[20] + CONSTANT(0.097043558539500002)*f[22] + CONSTANT(0.282094791773999990)*f[0]; + tg = CONSTANT(0.145565337808999990)*g[6] + CONSTANT(0.140070311615000000)*g[8] + CONSTANT(0.086798397468799998)*g[20] + CONSTANT(0.097043558539500002)*g[22] + CONSTANT(0.282094791773999990)*g[0]; + y[31] += tf*g[31] + tg*f[31]; + t = f[31] * g[31]; + y[6] += CONSTANT(0.145565337808999990)*t; + y[8] += CONSTANT(0.140070311615000000)*t; + y[20] += CONSTANT(0.086798397468799998)*t; + y[22] += CONSTANT(0.097043558539500002)*t; + y[0] += CONSTANT(0.282094791773999990)*t; + + // [32,32]: 0,24,20,6, + tf = CONSTANT(0.282094791771999980)*f[0] + CONSTANT(0.128376561118000000)*f[24] + CONSTANT(-0.021699599367299999)*f[20] + CONSTANT(0.097043558538800007)*f[6]; + tg = CONSTANT(0.282094791771999980)*g[0] + CONSTANT(0.128376561118000000)*g[24] + CONSTANT(-0.021699599367299999)*g[20] + CONSTANT(0.097043558538800007)*g[6]; + y[32] += tf*g[32] + tg*f[32]; + t = f[32] * g[32]; + y[0] += CONSTANT(0.282094791771999980)*t; + y[24] += CONSTANT(0.128376561118000000)*t; + y[20] += CONSTANT(-0.021699599367299999)*t; + y[6] += CONSTANT(0.097043558538800007)*t; + + // [33,33]: 6,20,0, + tf = CONSTANT(0.016173926423100001)*f[6] + CONSTANT(-0.130197596204999990)*f[20] + CONSTANT(0.282094791770000020)*f[0]; + tg = CONSTANT(0.016173926423100001)*g[6] + CONSTANT(-0.130197596204999990)*g[20] + CONSTANT(0.282094791770000020)*g[0]; + y[33] += tf*g[33] + tg*f[33]; + t = f[33] * g[33]; + y[6] += CONSTANT(0.016173926423100001)*t; + y[20] += CONSTANT(-0.130197596204999990)*t; + y[0] += CONSTANT(0.282094791770000020)*t; + + // [34,34]: 20,6,0, + tf = CONSTANT(-0.130197596207000000)*f[20] + CONSTANT(-0.097043558542400002)*f[6] + CONSTANT(0.282094791766000000)*f[0]; + tg = CONSTANT(-0.130197596207000000)*g[20] + CONSTANT(-0.097043558542400002)*g[6] + CONSTANT(0.282094791766000000)*g[0]; + y[34] += tf*g[34] + tg*f[34]; + t = f[34] * g[34]; + y[20] += CONSTANT(-0.130197596207000000)*t; + y[6] += CONSTANT(-0.097043558542400002)*t; + y[0] += CONSTANT(0.282094791766000000)*t; + + // [35,35]: 6,0,20, + tf = CONSTANT(-0.242608896358999990)*f[6] + CONSTANT(0.282094791761999970)*f[0] + CONSTANT(0.130197596198000000)*f[20]; + tg = CONSTANT(-0.242608896358999990)*g[6] + CONSTANT(0.282094791761999970)*g[0] + CONSTANT(0.130197596198000000)*g[20]; + y[35] += tf*g[35] + tg*f[35]; + t = f[35] * g[35]; + y[6] += CONSTANT(-0.242608896358999990)*t; + y[0] += CONSTANT(0.282094791761999970)*t; + y[20] += CONSTANT(0.130197596198000000)*t; + + // multiply count=2527 + + return y; +} + + +//------------------------------------------------------------------------------------- +// Evaluates a directional light and returns spectral SH data. The output +// vector is computed so that if the intensity of R/G/B is unit the resulting +// exit radiance of a point directly under the light on a diffuse object with +// an albedo of 1 would be 1.0. This will compute 3 spectral samples, resultR +// has to be specified, while resultG and resultB are optional. +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb204988.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +bool XM_CALLCONV DirectX::XMSHEvalDirectionalLight( + size_t order, + FXMVECTOR dir, + FXMVECTOR color, + float *resultR, + float *resultG, + float *resultB) noexcept +{ + if (!resultR) + return false; + + if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER) + return false; + + XMFLOAT3A clr; + XMStoreFloat3A(&clr, color); + + float fTmp[XM_SH_MAXORDER * XM_SH_MAXORDER]; + + XMSHEvalDirection(fTmp, order, dir); // evaluate the BF in this direction... + + // now compute "normalization" and scale vector for each valid spectral band + const float fNorm = XM_PI / CosWtInt(order); + + const size_t numcoeff = order*order; + + const float fRScale = fNorm * clr.x; + + for (size_t i = 0; i < numcoeff; ++i) + { + resultR[i] = fTmp[i] * fRScale; + } + + if (resultG) + { + const float fGScale = fNorm * clr.y; + + for (size_t i = 0; i < numcoeff; ++i) + { + resultG[i] = fTmp[i] * fGScale; + } + } + + if (resultB) + { + const float fBScale = fNorm * clr.z; + + for (size_t i = 0; i < numcoeff; ++i) + { + resultB[i] = fTmp[i] * fBScale; + } + } + + return true; +} + + +//------------------------------------------------------------------------------------ +// Evaluates a spherical light and returns spectral SH data. There is no +// normalization of the intensity of the light like there is for directional +// lights, care has to be taken when specifiying the intensities. This will +// compute 3 spectral samples, resultR has to be specified, while resultG and +// resultB are optional. +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb205451.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +bool XM_CALLCONV DirectX::XMSHEvalSphericalLight( + size_t order, + FXMVECTOR pos, + float radius, + FXMVECTOR color, + float *resultR, + float *resultG, + float *resultB) noexcept +{ + if (!resultR) + return false; + + if (radius < 0.f) + return false; + + const float fDist = XMVectorGetX(XMVector3Length(pos)); + + // WARNING: fDist should not be < radius - otherwise light contains origin + + //const float fSinConeAngle = (fDist <= radius) ? 0.99999f : radius/fDist; + const float fConeAngle = (fDist <= radius) ? (XM_PIDIV2) : asinf(radius / fDist); + + XMVECTOR dir = XMVector3Normalize(pos); + + float fTmpDir[XM_SH_MAXORDER* XM_SH_MAXORDER]; // rotation "vector" + float fTmpL0[XM_SH_MAXORDER]; + + // + // Sphere at distance fDist, the cone angle is determined by looking at the + // right triangle with one side (the hypotenuse) beind the vector from the + // origin to the center of the sphere, another side is from the origin to + // a point on the sphere whose normal is perpendicular to the given side (this + // is one of the points on the cone that is defined by the projection of the sphere + // through the origin - we want to find the angle of this cone) and the final + // side being from the center of the sphere to the point of tagency (the two + // sides conected to this are at a right angle by construction.) + // From trig we know that sin(theta) = ||opposite||/||hypotenuse||, where + // ||opposite|| = Radius, ||hypotenuse|| = fDist + // theta is the angle of the cone that subtends the sphere from the origin + // + + // no default normalization is done for this case, have to be careful how + // you represent the coefficients... + + const float fNewNorm = 1.0f;///(fSinConeAngle*fSinConeAngle); + + ComputeCapInt(order, fConeAngle, fTmpL0); + + XMFLOAT3A vd; + XMStoreFloat3(&vd, dir); + + const float fX = vd.x; + const float fY = vd.y; + const float fZ = vd.z; + + switch (order) + { + case 2: + sh_eval_basis_1(fX, fY, fZ, fTmpDir); + break; + + case 3: + sh_eval_basis_2(fX, fY, fZ, fTmpDir); + break; + + case 4: + sh_eval_basis_3(fX, fY, fZ, fTmpDir); + break; + + case 5: + sh_eval_basis_4(fX, fY, fZ, fTmpDir); + break; + + case 6: + sh_eval_basis_5(fX, fY, fZ, fTmpDir); + break; + + default: + assert(order < XM_SH_MINORDER || order > XM_SH_MAXORDER); + return false; + } + + XMFLOAT3A clr; + XMStoreFloat3A(&clr, color); + + for (size_t i = 0; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * clr.x*fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) resultR[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + + if (resultG) + { + for (size_t i = 0; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * clr.y*fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) resultG[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + } + + if (resultB) + { + for (size_t i = 0; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * clr.z*fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) resultB[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + } + + return true; +} + + +//------------------------------------------------------------------------------------- +// Evaluates a light that is a cone of constant intensity and returns spectral +// SH data. The output vector is computed so that if the intensity of R/G/B is +// unit the resulting exit radiance of a point directly under the light oriented +// in the cone direction on a diffuse object with an albedo of 1 would be 1.0. +// This will compute 3 spectral samples, resultR has to be specified, while resultG +// and resultB are optional. +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb204986.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +bool XM_CALLCONV DirectX::XMSHEvalConeLight( + size_t order, + FXMVECTOR dir, + float radius, + FXMVECTOR color, + float *resultR, + float *resultG, + float *resultB) noexcept +{ + if (!resultR) + return false; + + if (radius < 0.f || radius >(XM_PI*1.00001f)) + return false; + + if (radius < 0.0001f) + { + // turn it into a pure directional light... + return XMSHEvalDirectionalLight(order, dir, color, resultR, resultG, resultB); + } + else + { + float fTmpL0[XM_SH_MAXORDER]; + float fTmpDir[XM_SH_MAXORDER * XM_SH_MAXORDER]; + + const float fConeAngle = radius; + const float fAngCheck = (fConeAngle > XM_PIDIV2) ? (XM_PIDIV2) : fConeAngle; + + const float fNewNorm = 1.0f / (sinf(fAngCheck)*sinf(fAngCheck)); + + ComputeCapInt(order, fConeAngle, fTmpL0); + + XMFLOAT3A vd; + XMStoreFloat3(&vd, dir); + + const float fX = vd.x; + const float fY = vd.y; + const float fZ = vd.z; + + switch (order) + { + case 2: + sh_eval_basis_1(fX, fY, fZ, fTmpDir); + break; + + case 3: + sh_eval_basis_2(fX, fY, fZ, fTmpDir); + break; + + case 4: + sh_eval_basis_3(fX, fY, fZ, fTmpDir); + break; + + case 5: + sh_eval_basis_4(fX, fY, fZ, fTmpDir); + break; + + case 6: + sh_eval_basis_5(fX, fY, fZ, fTmpDir); + break; + + default: + assert(order < XM_SH_MINORDER || order > XM_SH_MAXORDER); + return false; + } + + XMFLOAT3A clr; + XMStoreFloat3A(&clr, color); + + for (size_t i = 0; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * clr.x*fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) + resultR[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + + if (resultG) + { + for (size_t i = 0; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * clr.y*fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) + resultG[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + } + + if (resultB) + { + for (size_t i = 0; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * clr.z*fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) + resultB[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + } + } + + return true; +} + + +//------------------------------------------------------------------------------------ +// Evaluates a light that is a linear interpolant between two colors over the +// sphere. The interpolant is linear along the axis of the two points, not +// over the surface of the sphere (ie: if the axis was (0,0,1) it is linear in +// Z, not in the azimuthal angle.) The resulting spherical lighting function +// is normalized so that a point on a perfectly diffuse surface with no +// shadowing and a normal pointed in the direction pDir would result in exit +// radiance with a value of 1 if the top color was white and the bottom color +// was black. This is a very simple model where topColor represents the intensity +// of the "sky" and bottomColor represents the intensity of the "ground". +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/bb204989.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +bool XM_CALLCONV DirectX::XMSHEvalHemisphereLight( + size_t order, + FXMVECTOR dir, + FXMVECTOR topColor, + FXMVECTOR bottomColor, + float *resultR, + float *resultG, + float *resultB) noexcept +{ + if (!resultR) + return false; + + if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER) + return false; + + // seperate "R/G/B colors... + + float fTmpDir[XM_SH_MAXORDER * XM_SH_MAXORDER]; // rotation "vector" + float fTmpL0[XM_SH_MAXORDER]; + + const float fNewNorm = 3.0f / 2.0f; // normalizes things for 1 sky color, 0 ground color... + + XMFLOAT3A vd; + XMStoreFloat3(&vd, dir); + + const float fX = vd.x; + const float fY = vd.y; + const float fZ = vd.z; + + sh_eval_basis_1(fX, fY, fZ, fTmpDir); + + XMFLOAT3A clrTop; + XMStoreFloat3A(&clrTop, topColor); + + XMFLOAT3A clrBottom; + XMStoreFloat3A(&clrBottom, bottomColor); + + float fA = clrTop.x; + float fAvrg = (clrTop.x + clrBottom.x)*0.5f; + + fTmpL0[0] = fAvrg*2.0f*SHEvalHemisphereLight_fSqrtPi; + fTmpL0[1] = (fA - fAvrg)*2.0f*SHEvalHemisphereLight_fSqrtPi3; + + size_t i = 0; + for (; i < 2; ++i) + { + _Analysis_assume_(i < order); + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) resultR[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + + for (; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + for (size_t j = 0; j < cNumCoefs; ++j) resultR[cStart + j] = 0.0f; + } + + if (resultG) + { + fA = clrTop.y; + fAvrg = (clrTop.y + clrBottom.y)*0.5f; + + fTmpL0[0] = fAvrg*2.0f*SHEvalHemisphereLight_fSqrtPi; + fTmpL0[1] = (fA - fAvrg)*2.0f*SHEvalHemisphereLight_fSqrtPi3; + + for (i = 0; i < 2; ++i) + { + _Analysis_assume_(i < order); + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) resultG[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + + for (; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + for (size_t j = 0; j < cNumCoefs; ++j) resultG[cStart + j] = 0.0f; + } + } + + if (resultB) + { + fA = clrTop.z; + fAvrg = (clrTop.z + clrBottom.z)*0.5f; + + fTmpL0[0] = fAvrg*2.0f*SHEvalHemisphereLight_fSqrtPi; + fTmpL0[1] = (fA - fAvrg)*2.0f*SHEvalHemisphereLight_fSqrtPi3; + + for (i = 0; i < 2; ++i) + { + _Analysis_assume_(i < order); + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + const float fValUse = fTmpL0[i] * fNewNorm*fExtraNormFac[i]; + for (size_t j = 0; j < cNumCoefs; ++j) resultB[cStart + j] = fTmpDir[cStart + j] * fValUse; + } + + for (; i < order; ++i) + { + const size_t cNumCoefs = 2 * i + 1; + const size_t cStart = i*i; + for (size_t j = 0; j < cNumCoefs; ++j) resultB[cStart + j] = 0.0f; + } + } + + return true; +} diff --git a/Sdk/External/DirectXMath/SHMath/DirectXSH.h b/Sdk/External/DirectXMath/SHMath/DirectXSH.h new file mode 100644 index 0000000..2cfef54 --- /dev/null +++ b/Sdk/External/DirectXMath/SHMath/DirectXSH.h @@ -0,0 +1,72 @@ +//------------------------------------------------------------------------------------- +// DirectXSH.h -- C++ Spherical Harmonics Math Library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/p/?LinkId=262885 +//------------------------------------------------------------------------------------- + +#pragma once + +#define DIRECTX_SHMATH_VERSION 106 + +#include + +namespace DirectX +{ + const size_t XM_SH_MINORDER = 2; + const size_t XM_SH_MAXORDER = 6; + + float* XM_CALLCONV XMSHEvalDirection(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMVECTOR dir) noexcept; + + float* XM_CALLCONV XMSHRotate(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ FXMMATRIX rotMatrix, _In_reads_(order*order) const float *input) noexcept; + + float* XMSHRotateZ(_Out_writes_(order*order) float *result, _In_ size_t order, _In_ float angle, _In_reads_(order*order) const float *input) noexcept; + + float* XMSHAdd(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB) noexcept; + + float* XMSHScale(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *input, _In_ float scale) noexcept; + + float XMSHDot(_In_ size_t order, _In_reads_(order*order) const float *inputA, _In_reads_(order*order) const float *inputB) noexcept; + + float* XMSHMultiply(_Out_writes_(order*order) float *result, _In_ size_t order, _In_reads_(order*order) const float *inputF, _In_reads_(order*order) const float *inputG) noexcept; + + float* XMSHMultiply2(_Out_writes_(4) float *result, _In_reads_(4) const float *inputF, _In_reads_(4) const float *inputG) noexcept; + + float* XMSHMultiply3(_Out_writes_(9) float *result, _In_reads_(9) const float *inputF, _In_reads_(9) const float *inputG) noexcept; + + float* XMSHMultiply4(_Out_writes_(16) float *result, _In_reads_(16) const float *inputF, _In_reads_(16) const float *inputG) noexcept; + + float* XMSHMultiply5(_Out_writes_(25) float *result, _In_reads_(25) const float *inputF, _In_reads_(25) const float *inputG) noexcept; + + float* XMSHMultiply6(_Out_writes_(36) float *result, _In_reads_(36) const float *inputF, _In_reads_(36) const float *inputG) noexcept; + + bool XM_CALLCONV XMSHEvalDirectionalLight( + _In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR color, + _Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept; + + bool XM_CALLCONV XMSHEvalSphericalLight( + _In_ size_t order, _In_ FXMVECTOR pos, _In_ float radius, _In_ FXMVECTOR color, + _Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept; + + bool XM_CALLCONV XMSHEvalConeLight( + _In_ size_t order, _In_ FXMVECTOR dir, _In_ float radius, _In_ FXMVECTOR color, + _Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept; + + bool XM_CALLCONV XMSHEvalHemisphereLight( + _In_ size_t order, _In_ FXMVECTOR dir, _In_ FXMVECTOR topColor, _In_ FXMVECTOR bottomColor, + _Out_writes_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept; + + #if defined(__d3d11_h__) || defined(__d3d11_x_h__) + HRESULT SHProjectCubeMap( + _In_ ID3D11DeviceContext *context, _In_ size_t order, _In_ ID3D11Texture2D *cubeMap, + _Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept; + #endif + + #if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) + HRESULT SHProjectCubeMap( + _In_ size_t order, _In_ const D3D12_RESOURCE_DESC& desc, _In_ const D3D12_SUBRESOURCE_DATA cubeMap[6], + _Out_writes_opt_(order*order) float *resultR, _Out_writes_opt_(order*order) float *resultG, _Out_writes_opt_(order*order) float *resultB) noexcept; + #endif +} // namespace DirectX diff --git a/Sdk/External/DirectXMath/SHMath/DirectXSHD3D11.cpp b/Sdk/External/DirectXMath/SHMath/DirectXSHD3D11.cpp new file mode 100644 index 0000000..5f270a6 --- /dev/null +++ b/Sdk/External/DirectXMath/SHMath/DirectXSHD3D11.cpp @@ -0,0 +1,376 @@ +//------------------------------------------------------------------------------------- +// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/p/?LinkId=262885 +//------------------------------------------------------------------------------------- + +#pragma warning( disable : 4616 4619 4061 4265 4626 5039 ) +// C4616/C4619 #pragma warning warnings +// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label +// C4265 class has virtual functions, but destructor is not virtual +// C4626 assignment operator was implicitly defined as deleted +// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc + +#pragma warning(push) +#pragma warning(disable: 4365) +#include +#pragma warning(pop) + +#include "DirectXSH.h" + +#include + +#include +#include +#include + +#include + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wcovered-switch-default" +#pragma clang diagnostic ignored "-Wswitch-enum" +#endif + +using namespace DirectX; + +using Microsoft::WRL::ComPtr; + +namespace +{ + struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } }; + + using ScopedAlignedArrayXMVECTOR = std::unique_ptr; + + //------------------------------------------------------------------------------------- + // This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926 + // If you need additional DXGI format support, see DirectXTexConvert.cpp + //------------------------------------------------------------------------------------- +#define LOAD_SCANLINE( type, func )\ + if ( size >= sizeof(type) )\ + {\ + const type * __restrict sPtr = reinterpret_cast(pSource);\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ + {\ + if ( dPtr >= ePtr ) break;\ + *(dPtr++) = func( sPtr++ );\ + }\ + return true;\ + }\ + return false; + +#define LOAD_SCANLINE3( type, func, defvec )\ + if ( size >= sizeof(type) )\ + {\ + const type * __restrict sPtr = reinterpret_cast(pSource);\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ + {\ + XMVECTOR v = func( sPtr++ );\ + if ( dPtr >= ePtr ) break;\ + *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\ + }\ + return true;\ + }\ + return false; + +#define LOAD_SCANLINE2( type, func, defvec )\ + if ( size >= sizeof(type) )\ + {\ + const type * __restrict sPtr = reinterpret_cast(pSource);\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ + {\ + XMVECTOR v = func( sPtr++ );\ + if ( dPtr >= ePtr ) break;\ + *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\ + }\ + return true;\ + }\ + return false; + +#pragma warning(push) +#pragma warning(disable : 6101) + _Success_(return) + bool _LoadScanline( + _Out_writes_(count) DirectX::XMVECTOR* pDestination, + size_t count, + _In_reads_bytes_(size) LPCVOID pSource, + size_t size, + DXGI_FORMAT format) + { + assert(pDestination && count > 0 && ((reinterpret_cast(pDestination) & 0xF) == 0)); + assert(pSource && size > 0); + + using namespace DirectX::PackedVector; + + XMVECTOR* __restrict dPtr = pDestination; + if (!dPtr) + return false; + + const XMVECTOR* ePtr = pDestination + count; + + switch (format) + { + case DXGI_FORMAT_R32G32B32A32_FLOAT: + { + size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size; + memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize); + } + return true; + + case DXGI_FORMAT_R32G32B32_FLOAT: + LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3) + + case DXGI_FORMAT_R16G16B16A16_FLOAT: + LOAD_SCANLINE(XMHALF4, XMLoadHalf4) + + case DXGI_FORMAT_R32G32_FLOAT: + LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3) + + case DXGI_FORMAT_R11G11B10_FLOAT: + LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3) + + case DXGI_FORMAT_R16G16_FLOAT: + LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3) + + case DXGI_FORMAT_R32_FLOAT: + if (size >= sizeof(float)) + { + const float* __restrict sPtr = reinterpret_cast(pSource); + for (size_t icount = 0; icount < size; icount += sizeof(float)) + { + XMVECTOR v = XMLoadFloat(sPtr++); + if (dPtr >= ePtr) break; + *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000); + } + return true; + } + return false; + + case DXGI_FORMAT_R16_FLOAT: + if (size >= sizeof(HALF)) + { + const HALF * __restrict sPtr = reinterpret_cast(pSource); + for (size_t icount = 0; icount < size; icount += sizeof(HALF)) + { + if (dPtr >= ePtr) break; + *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f); + } + return true; + } + return false; + + default: + return false; + } + } +#pragma warning(pop) + +} // namespace anonymous + +//------------------------------------------------------------------------------------- +// Projects a function represented in a cube map into spherical harmonics. +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::SHProjectCubeMap( + ID3D11DeviceContext *context, + size_t order, + ID3D11Texture2D *cubeMap, + float *resultR, + float *resultG, + float* resultB) noexcept +{ + if (!context || !cubeMap) + return E_INVALIDARG; + + if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER) + return E_INVALIDARG; + + D3D11_TEXTURE2D_DESC desc; + cubeMap->GetDesc(&desc); + + if ((desc.ArraySize != 6) + || (desc.Width != desc.Height) + || (desc.SampleDesc.Count > 1)) + return E_FAIL; + + switch (desc.Format) + { + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R16_FLOAT: + // See _LoadScanline to support more pixel formats + break; + + default: + return E_FAIL; + } + + //--- Create a staging resource copy (if needed) to be able to read data + ID3D11Texture2D* texture = nullptr; + + ComPtr staging; + if (!(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ)) + { + D3D11_TEXTURE2D_DESC sdesc = desc; + sdesc.BindFlags = 0; + sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + sdesc.Usage = D3D11_USAGE_STAGING; + + ComPtr device; + context->GetDevice(&device); + + HRESULT hr = device->CreateTexture2D(&sdesc, nullptr, &staging); + if (FAILED(hr)) + return hr; + + context->CopyResource(staging.Get(), cubeMap); + + texture = staging.Get(); + } + else + texture = cubeMap; + + assert(texture != nullptr); + + //--- Setup for SH projection + ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast(_aligned_malloc(sizeof(XMVECTOR)*desc.Width, 16))); + if (!scanline) + return E_OUTOFMEMORY; + + assert(desc.Width > 0); + float fSize = static_cast(desc.Width); + float fPicSize = 1.0f / fSize; + + // index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w + // linear function x*S +B, 1st constraint means B is (-1+1/W), plug into + // second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did + // this was incorrect - but only for computing the differential solid + // angle, where the final value was 1.0 instead of 1-1/w... + + float fB = -1.0f + 1.0f / fSize; + float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f; + + // clear out accumulation variables + float fWt = 0.0f; + + if (resultR) + memset(resultR, 0, sizeof(float)*order*order); + if (resultG) + memset(resultG, 0, sizeof(float)*order*order); + if (resultB) + memset(resultB, 0, sizeof(float)*order*order); + + float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {}; + float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {}; + + //--- Process each face of the cubemap + for (UINT face = 0; face < 6; ++face) + { + UINT dindex = D3D11CalcSubresource(0, face, desc.MipLevels); + + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = context->Map(texture, dindex, D3D11_MAP_READ, 0, &mapped); + if (FAILED(hr)) + return hr; + + const uint8_t *pSrc = reinterpret_cast(mapped.pData); + for (UINT y = 0; y < desc.Height; ++y) + { + XMVECTOR* ptr = scanline.get(); + if (!_LoadScanline(ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format)) + { + context->Unmap(texture, dindex); + return E_FAIL; + } + + const float v = float(y) * fS + fB; + + XMVECTOR* pixel = ptr; + for (UINT x = 0; x < desc.Width; ++x, ++pixel) + { + const float u = float(x) * fS + fB; + + float ix, iy, iz; + switch (face) + { + case 0: // Positive X + iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = 1.0f; + break; + + case 1: // Negative X + iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = -1; + break; + + case 2: // Positive Y + iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize; + iy = 1.0f; + ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + break; + + case 3: // Negative Y + iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + iy = -1.0f; + ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + break; + + case 4: // Positive Z + iz = 1.0f; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + break; + + case 5: // Negative Z + iz = -1.0f; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize; + break; + + default: + ix = iy = iz = 0.f; + assert(false); + break; + } + + XMVECTOR dir = XMVectorSet(ix, iy, iz, 0); + dir = XMVector3Normalize(dir); + + const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v)); + fWt += fDiffSolid; + + XMSHEvalDirection(shBuff, order, dir); + + XMFLOAT3A clr; + XMStoreFloat3A(&clr, *pixel); + + if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid)); + if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid)); + if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid)); + } + + pSrc += mapped.RowPitch; + } + + context->Unmap(texture, dindex); + } + + const float fNormProj = (4.0f*XM_PI) / fWt; + + if (resultR) XMSHScale(resultR, order, resultR, fNormProj); + if (resultG) XMSHScale(resultG, order, resultG, fNormProj); + if (resultB) XMSHScale(resultB, order, resultB, fNormProj); + + return S_OK; +} diff --git a/Sdk/External/DirectXMath/SHMath/DirectXSHD3D12.cpp b/Sdk/External/DirectXMath/SHMath/DirectXSHD3D12.cpp new file mode 100644 index 0000000..ff62a82 --- /dev/null +++ b/Sdk/External/DirectXMath/SHMath/DirectXSHD3D12.cpp @@ -0,0 +1,334 @@ +//------------------------------------------------------------------------------------- +// DirectXSHD3D12.cpp -- C++ Spherical Harmonics Math Library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/p/?LinkId=262885 +//------------------------------------------------------------------------------------- + +#pragma warning( disable : 4616 4619 4061 4265 4626 5039 ) +// C4616/C4619 #pragma warning warnings +// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label +// C4265 class has virtual functions, but destructor is not virtual +// C4626 assignment operator was implicitly defined as deleted +// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc + +#include + +#include "DirectXSH.h" + +#include + +#include +#include +#include + +#include + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wcovered-switch-default" +#pragma clang diagnostic ignored "-Wswitch-enum" +#endif + +using namespace DirectX; + +using Microsoft::WRL::ComPtr; + +namespace +{ + struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } }; + + using ScopedAlignedArrayXMVECTOR = std::unique_ptr; + + //------------------------------------------------------------------------------------- + // This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926 + // If you need additional DXGI format support, see DirectXTexConvert.cpp + //------------------------------------------------------------------------------------- +#define LOAD_SCANLINE( type, func )\ + if ( size >= sizeof(type) )\ + {\ + const type * __restrict sPtr = reinterpret_cast(pSource);\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ + {\ + if ( dPtr >= ePtr ) break;\ + *(dPtr++) = func( sPtr++ );\ + }\ + return true;\ + }\ + return false; + +#define LOAD_SCANLINE3( type, func, defvec )\ + if ( size >= sizeof(type) )\ + {\ + const type * __restrict sPtr = reinterpret_cast(pSource);\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ + {\ + XMVECTOR v = func( sPtr++ );\ + if ( dPtr >= ePtr ) break;\ + *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\ + }\ + return true;\ + }\ + return false; + +#define LOAD_SCANLINE2( type, func, defvec )\ + if ( size >= sizeof(type) )\ + {\ + const type * __restrict sPtr = reinterpret_cast(pSource);\ + for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\ + {\ + XMVECTOR v = func( sPtr++ );\ + if ( dPtr >= ePtr ) break;\ + *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\ + }\ + return true;\ + }\ + return false; + +#pragma warning(push) +#pragma warning(disable : 6101) + _Success_(return) + bool _LoadScanline( + _Out_writes_(count) DirectX::XMVECTOR* pDestination, + size_t count, + _In_reads_bytes_(size) LPCVOID pSource, + size_t size, + DXGI_FORMAT format) + { + assert(pDestination && count > 0 && ((reinterpret_cast(pDestination) & 0xF) == 0)); + assert(pSource && size > 0); + + using namespace DirectX::PackedVector; + + XMVECTOR* __restrict dPtr = pDestination; + if (!dPtr) + return false; + + const XMVECTOR* ePtr = pDestination + count; + + switch (format) + { + case DXGI_FORMAT_R32G32B32A32_FLOAT: + { + size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size; + memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize); + } + return true; + + case DXGI_FORMAT_R32G32B32_FLOAT: + LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3) + + case DXGI_FORMAT_R16G16B16A16_FLOAT: + LOAD_SCANLINE(XMHALF4, XMLoadHalf4) + + case DXGI_FORMAT_R32G32_FLOAT: + LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3) + + case DXGI_FORMAT_R11G11B10_FLOAT: + LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3) + + case DXGI_FORMAT_R16G16_FLOAT: + LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3) + + case DXGI_FORMAT_R32_FLOAT: + if (size >= sizeof(float)) + { + const float* __restrict sPtr = reinterpret_cast(pSource); + for (size_t icount = 0; icount < size; icount += sizeof(float)) + { + XMVECTOR v = XMLoadFloat(sPtr++); + if (dPtr >= ePtr) break; + *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000); + } + return true; + } + return false; + + case DXGI_FORMAT_R16_FLOAT: + if (size >= sizeof(HALF)) + { + const HALF * __restrict sPtr = reinterpret_cast(pSource); + for (size_t icount = 0; icount < size; icount += sizeof(HALF)) + { + if (dPtr >= ePtr) break; + *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f); + } + return true; + } + return false; + + default: + return false; + } + } +#pragma warning(pop) + +} // namespace anonymous + +//------------------------------------------------------------------------------------- +// Projects a function represented in a cube map into spherical harmonics. +// +// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::SHProjectCubeMap( + size_t order, + const D3D12_RESOURCE_DESC& desc, + const D3D12_SUBRESOURCE_DATA cubeMap[6], + float *resultR, + float *resultG, + float *resultB) noexcept +{ + if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER) + return E_INVALIDARG; + + if (desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D + || (desc.DepthOrArraySize != 6) + || (desc.Width != desc.Height) + || (desc.SampleDesc.Count > 1)) + return E_FAIL; + + switch (desc.Format) + { + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R16_FLOAT: + // See _LoadScanline to support more pixel formats + break; + + default: + return E_FAIL; + } + + //--- Setup for SH projection + ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast(_aligned_malloc(static_cast(sizeof(XMVECTOR)*desc.Width), 16))); + if (!scanline) + return E_OUTOFMEMORY; + + assert(desc.Width > 0); + float fSize = static_cast(desc.Width); + float fPicSize = 1.0f / fSize; + + // index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w + // linear function x*S +B, 1st constraint means B is (-1+1/W), plug into + // second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did + // this was incorrect - but only for computing the differential solid + // angle, where the final value was 1.0 instead of 1-1/w... + + float fB = -1.0f + 1.0f / fSize; + float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f; + + // clear out accumulation variables + float fWt = 0.0f; + + if (resultR) + memset(resultR, 0, sizeof(float)*order*order); + if (resultG) + memset(resultG, 0, sizeof(float)*order*order); + if (resultB) + memset(resultB, 0, sizeof(float)*order*order); + + float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {}; + float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {}; + + //--- Process each face of the cubemap + for (UINT face = 0; face < 6; ++face) + { + if (!cubeMap[face].pData) + return E_POINTER; + + const uint8_t *pSrc = reinterpret_cast(cubeMap[face].pData); + for (UINT y = 0; y < desc.Height; ++y) + { + XMVECTOR* ptr = scanline.get(); + if (!_LoadScanline(ptr, static_cast(desc.Width), pSrc, static_cast(cubeMap[face].RowPitch), desc.Format)) + { + return E_FAIL; + } + + const float v = float(y) * fS + fB; + + XMVECTOR* pixel = ptr; + for (UINT x = 0; x < desc.Width; ++x, ++pixel) + { + const float u = float(x) * fS + fB; + + float ix, iy, iz; + switch (face) + { + case 0: // Positive X + iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = 1.0f; + break; + + case 1: // Negative X + iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = -1; + break; + + case 2: // Positive Y + iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize; + iy = 1.0f; + ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + break; + + case 3: // Negative Y + iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + iy = -1.0f; + ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + break; + + case 4: // Positive Z + iz = 1.0f; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize; + break; + + case 5: // Negative Z + iz = -1.0f; + iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize; + ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize; + break; + + default: + ix = iy = iz = 0.f; + assert(false); + break; + } + + XMVECTOR dir = XMVectorSet(ix, iy, iz, 0); + dir = XMVector3Normalize(dir); + + const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v)); + fWt += fDiffSolid; + + XMSHEvalDirection(shBuff, order, dir); + + XMFLOAT3A clr; + XMStoreFloat3A(&clr, *pixel); + + if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid)); + if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid)); + if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid)); + } + + pSrc += cubeMap[face].RowPitch; + } + } + + const float fNormProj = (4.0f*XM_PI) / fWt; + + if (resultR) XMSHScale(resultR, order, resultR, fNormProj); + if (resultG) XMSHScale(resultG, order, resultG, fNormProj); + if (resultB) XMSHScale(resultB, order, resultB, fNormProj); + + return S_OK; +} diff --git a/Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.cpp b/Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.cpp new file mode 100644 index 0000000..dff03c9 --- /dev/null +++ b/Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.cpp @@ -0,0 +1,257 @@ +//------------------------------------------------------------------------------------- +// Stereo3DMatrixHelper.cpp -- SIMD C++ Math helper for Stereo 3D matricies +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//------------------------------------------------------------------------------------- + +#include "Stereo3DMatrixHelper.h" + +using namespace DirectX; + +namespace +{ + inline bool StereoProjectionHelper + ( + const STEREO_PARAMETERS& stereoParameters, + _Out_ float* fVirtualProjection, + _Out_ float* zNearWidth, + _Out_ float* zNearHeight, + float FovAngleY, + float AspectRatio, + float NearZ + ) + { + // note that most people have difficulty fusing images into 3D + // if the separation equals even just the human average. by + // reducing the separation (interocular distance) by 1/2, we + // guarantee a larger subset of people will see full 3D + + // the conservative setting should always be used. the only problem + // with the conservative setting is that the 3D effect will be less + // impressive on smaller screens (which makes sense, since your eye + // cannot be tricked as easily based on the smaller fov). to simulate + // the effect of a larger screen, use the liberal settings (debug only) + + // Conservative Settings: * max acuity angle: 0.8f degrees * interoc distance: 1.25 inches + + // Liberal Settings: * max acuity angle: 1.6f degrees * interoc distance: 2.5f inches + + // maximum visual accuity angle allowed is 3.2 degrees for + // a physical scene, and 1.6 degrees for a virtual one. + // thus we cannot allow an object to appear any closer to + // the viewer than 1.6 degrees (divided by two for most + // half-angle calculations) + + static const float fMaxStereoDistance = 780; // inches (should be between 10 and 20m) + static const float fMaxVisualAcuityAngle = 1.6f * (XM_PI / 180.0f); // radians + static const float fInterocularDistance = 1.25f; // inches + + float fDisplayHeight = stereoParameters.fDisplaySizeInches / sqrtf(AspectRatio * AspectRatio + 1.0f); + float fDisplayWidth = fDisplayHeight * AspectRatio; + float fHalfInterocular = 0.5f * fInterocularDistance * stereoParameters.fStereoExaggerationFactor; + float fHalfPixelWidth = fDisplayWidth / stereoParameters.fPixelResolutionWidth * 0.5f; + float fHalfMaximumAcuityAngle = fMaxVisualAcuityAngle * 0.5f * stereoParameters.fStereoExaggerationFactor; + // float fHalfWidth = fDisplayWidth * 0.5f; + + float fMaxSeparationAcuityAngle = atanf(fHalfInterocular / fMaxStereoDistance); + float fMaxSeparationDistance = fHalfPixelWidth / tanf(fMaxSeparationAcuityAngle); + float fRefinedMaxStereoDistance = fMaxStereoDistance - fMaxSeparationDistance; + float fFovHalfAngle = FovAngleY / 2.0f; + + bool ComfortableResult = true; + if (fRefinedMaxStereoDistance < 0.0f || fMaxSeparationDistance > 0.1f * fMaxStereoDistance) + { + // Pixel resolution is too low to offer a comfortable stereo experience + ComfortableResult = false; + } + + float fRefinedMaxSeparationAcuityAngle = atanf(fHalfInterocular / (fRefinedMaxStereoDistance)); + float fPhysicalZNearDistance = fHalfInterocular / tanf(fHalfMaximumAcuityAngle); + // float fScalingFactor = fHalfMaximumAcuityAngle / atanf(fHalfInterocular / stereoParameters.fViewerDistanceInches); + + float fNearZSeparation = tanf(fRefinedMaxSeparationAcuityAngle) * (fRefinedMaxStereoDistance - fPhysicalZNearDistance); + // float fNearZSeparation2 = fHalfInterocular * (fRefinedMaxStereoDistance - fPhysicalZNearDistance) / fRefinedMaxStereoDistance; + + (*zNearHeight) = cosf(fFovHalfAngle) / sinf(fFovHalfAngle); + (*zNearWidth) = (*zNearHeight) / AspectRatio; + (*fVirtualProjection) = (fNearZSeparation * NearZ * (*zNearWidth * 4.0f)) / (2.0f * NearZ); + + return ComfortableResult; + } +} + +//------------------------------------------------------------------------------ + +void DirectX::StereoCreateDefaultParameters +( + STEREO_PARAMETERS& stereoParameters +) +{ + // Default assumption is 1920x1200 resolution, a 22" LCD monitor, and a 2' viewing distance + stereoParameters.fViewerDistanceInches = 24.0f; + stereoParameters.fPixelResolutionWidth = 1920.0f; + stereoParameters.fPixelResolutionHeight = 1200.0f; + stereoParameters.fDisplaySizeInches = 22.0f; + + stereoParameters.fStereoSeparationFactor = 1.0f; + stereoParameters.fStereoExaggerationFactor = 1.0f; +} + +//------------------------------------------------------------------------------ + +XMMATRIX DirectX::StereoProjectionFovLH +( + _In_opt_ const STEREO_PARAMETERS* pStereoParameters, + STEREO_CHANNEL Channel, + float FovAngleY, + float AspectRatio, + float NearZ, + float FarZ, + STEREO_MODE StereoMode +) +{ + assert(Channel == STEREO_CHANNEL_LEFT || Channel == STEREO_CHANNEL_RIGHT); + assert(StereoMode == STEREO_MODE_NORMAL || StereoMode == STEREO_MODE_INVERTED); + assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); + assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + + STEREO_PARAMETERS DefaultParameters = {}; + if (pStereoParameters == nullptr) + { + StereoCreateDefaultParameters(DefaultParameters); + pStereoParameters = &DefaultParameters; + } + + assert(pStereoParameters->fStereoSeparationFactor >= 0.0f && pStereoParameters->fStereoSeparationFactor <= 1.0f); + assert(pStereoParameters->fStereoExaggerationFactor >= 1.0f && pStereoParameters->fStereoExaggerationFactor <= 2.0f); + + float fVirtualProjection = 0.0f; + float zNearWidth = 0.0f; + float zNearHeight = 0.0f; + StereoProjectionHelper(*pStereoParameters, &fVirtualProjection, &zNearWidth, &zNearHeight, FovAngleY, AspectRatio, NearZ); + + fVirtualProjection *= pStereoParameters->fStereoSeparationFactor; // incorporate developer defined bias + + // + // By applying a translation, we are forcing our cameras to be parallel + // + + float fInvertedAngle = atanf(fVirtualProjection / (2.0f * NearZ)); + + XMMATRIX proj = XMMatrixPerspectiveFovLH(FovAngleY, AspectRatio, NearZ, FarZ); + + XMMATRIX patchedProjection; + if (Channel == STEREO_CHANNEL_LEFT) + { + if (StereoMode > STEREO_MODE_NORMAL) + { + XMMATRIX rots = XMMatrixRotationY(fInvertedAngle); + XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj); + } + else + { + XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(trans, proj); + } + } + else + { + if (StereoMode > STEREO_MODE_NORMAL) + { + XMMATRIX rots = XMMatrixRotationY(-fInvertedAngle); + XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj); + } + else + { + XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(trans, proj); + } + } + + return patchedProjection; +} + +//------------------------------------------------------------------------------ + +XMMATRIX DirectX::StereoProjectionFovRH +( + _In_opt_ const STEREO_PARAMETERS* pStereoParameters, + STEREO_CHANNEL Channel, + float FovAngleY, + float AspectRatio, + float NearZ, + float FarZ, + STEREO_MODE StereoMode +) +{ + assert(Channel == STEREO_CHANNEL_LEFT || Channel == STEREO_CHANNEL_RIGHT); + assert(StereoMode == STEREO_MODE_NORMAL || StereoMode == STEREO_MODE_INVERTED); + assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); + assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f)); + assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); + + STEREO_PARAMETERS DefaultParameters = {}; + if (pStereoParameters == nullptr) + { + StereoCreateDefaultParameters(DefaultParameters); + pStereoParameters = &DefaultParameters; + } + + assert(pStereoParameters->fStereoSeparationFactor >= 0.0f && pStereoParameters->fStereoSeparationFactor <= 1.0f); + assert(pStereoParameters->fStereoExaggerationFactor >= 1.0f && pStereoParameters->fStereoExaggerationFactor <= 2.0f); + + float fVirtualProjection = 0.0f; + float zNearWidth = 0.0f; + float zNearHeight = 0.0f; + StereoProjectionHelper(*pStereoParameters, &fVirtualProjection, &zNearWidth, &zNearHeight, FovAngleY, AspectRatio, NearZ); + + fVirtualProjection *= pStereoParameters->fStereoSeparationFactor; // incorporate developer defined bias + + // + // By applying a translation, we are forcing our cameras to be parallel + // + + float fInvertedAngle = atanf(fVirtualProjection / (2.0f * NearZ)); + + XMMATRIX proj = XMMatrixPerspectiveFovRH(FovAngleY, AspectRatio, NearZ, FarZ); + + // + // By applying a translation, we are forcing our cameras to be parallel + // + + XMMATRIX patchedProjection; + if (Channel == STEREO_CHANNEL_LEFT) + { + if (StereoMode > STEREO_MODE_NORMAL) + { + XMMATRIX rots = XMMatrixRotationY(fInvertedAngle); + XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj); + } + else + { + XMMATRIX trans = XMMatrixTranslation(-fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(trans, proj); + } + } + else + { + if (StereoMode > STEREO_MODE_NORMAL) + { + XMMATRIX rots = XMMatrixRotationY(-fInvertedAngle); + XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(XMMatrixMultiply(rots, trans), proj); + } + else + { + XMMATRIX trans = XMMatrixTranslation(fVirtualProjection, 0, 0); + patchedProjection = XMMatrixMultiply(trans, proj); + } + } + + return patchedProjection; +} diff --git a/Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.h b/Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.h new file mode 100644 index 0000000..1ff2ddb --- /dev/null +++ b/Sdk/External/DirectXMath/Stereo3D/Stereo3DMatrixHelper.h @@ -0,0 +1,64 @@ +//------------------------------------------------------------------------------------- +// Stereo3DMatrixHelper.h -- SIMD C++ Math helper for Stereo 3D matrices +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//------------------------------------------------------------------------------------- + +#pragma once + +#include "DirectXMath.h" + +namespace DirectX +{ + // Enumeration for stereo channels (left and right). + enum STEREO_CHANNEL + { + STEREO_CHANNEL_LEFT = 0, + STEREO_CHANNEL_RIGHT + }; + + // Enumeration for stereo mode (normal or inverted). + enum STEREO_MODE + { + STEREO_MODE_NORMAL = 0, + STEREO_MODE_INVERTED, + }; + + //------------------------------------------------------------------------------ + // + // Stereo calibration settings + // + // * Viewer distance to the display + // * Physical display size + // * Render resolution + // + // The stereo separation factor indicates how much separation is between the left and right + // eyes. 0 is no separation, 1 is full separation. It defaults to 1.0. + // + // The debug stereo exaggeration factor indicates how much to increase the interocular spacing and + // maximum acuity angle from comfortable defaults. For retail builds, this value should always + // be 1.0, but during development, on small screens, this value can be raised to up to 2.0 in + // order to exaggerate the 3D effect. Values over 1.0 may cause discomfort on normal sized + // displays. It defaults to 1.0. + // + struct STEREO_PARAMETERS + { + float fViewerDistanceInches; + float fDisplaySizeInches; + float fPixelResolutionWidth; + float fPixelResolutionHeight; + float fStereoSeparationFactor; + float fStereoExaggerationFactor; + }; + + void StereoCreateDefaultParameters(STEREO_PARAMETERS& stereoParameters); + + XMMATRIX StereoProjectionFovLH(_In_opt_ const STEREO_PARAMETERS* pStereoParameters, + STEREO_CHANNEL Channel, float FovAngleY, float AspectRatio, float NearZ, float FarZ, + STEREO_MODE StereoMode = STEREO_MODE_NORMAL); + + XMMATRIX StereoProjectionFovRH(_In_opt_ const STEREO_PARAMETERS* pStereoParameters, + STEREO_CHANNEL Channel, float FovAngleY, float AspectRatio, float NearZ, float FarZ, + STEREO_MODE StereoMode = STEREO_MODE_NORMAL); +} \ No newline at end of file diff --git a/Sdk/External/DirectXMath/XDSP/XDSP.h b/Sdk/External/DirectXMath/XDSP/XDSP.h new file mode 100644 index 0000000..9a7e4ee --- /dev/null +++ b/Sdk/External/DirectXMath/XDSP/XDSP.h @@ -0,0 +1,813 @@ +//-------------------------------------------------------------------------------------- +// File: XDSP.h +// +// DirectXMath based Digital Signal Processing (DSP) functions for audio, +// primarily Fast Fourier Transform (FFT) +// +// All buffer parameters must be 16-byte aligned +// +// All FFT functions support only single-precision floating-point audio +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615557 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include + +#include +#include + +#pragma warning(push) +#pragma warning(disable: 6001 6262) + +namespace XDSP +{ + using XMVECTOR = DirectX::XMVECTOR; + using FXMVECTOR = DirectX::FXMVECTOR; + using GXMVECTOR = DirectX::GXMVECTOR; + using CXMVECTOR = DirectX::CXMVECTOR; + + inline bool ISPOWEROF2(size_t n) { return ( ((n)&((n)-1)) == 0 && (n) != 0 ); } + + // Parallel multiplication of four complex numbers, assuming real and imaginary values are stored in separate vectors. + inline void XM_CALLCONV vmulComplex( + _Out_ XMVECTOR& rResult, _Out_ XMVECTOR& iResult, + _In_ FXMVECTOR r1, _In_ FXMVECTOR i1, _In_ FXMVECTOR r2, _In_ GXMVECTOR i2) noexcept + { + using namespace DirectX; + // (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1) + XMVECTOR vr1r2 = XMVectorMultiply(r1, r2); + XMVECTOR vr1i2 = XMVectorMultiply(r1, i2); + rResult = XMVectorNegativeMultiplySubtract(i1, i2, vr1r2); // real: (r1*r2 - i1*i2) + iResult = XMVectorMultiplyAdd(r2, i1, vr1i2); // imaginary: (r1*i2 + r2*i1) + } + + inline void XM_CALLCONV vmulComplex( + _Inout_ XMVECTOR& r1, _Inout_ XMVECTOR& i1, _In_ FXMVECTOR r2, _In_ FXMVECTOR i2) noexcept + { + using namespace DirectX; + // (r1, i1) * (r2, i2) = (r1r2 - i1i2, r1i2 + r2i1) + XMVECTOR vr1r2 = XMVectorMultiply(r1, r2); + XMVECTOR vr1i2 = XMVectorMultiply(r1, i2); + r1 = XMVectorNegativeMultiplySubtract(i1, i2, vr1r2); // real: (r1*r2 - i1*i2) + i1 = XMVectorMultiplyAdd(r2, i1, vr1i2); // imaginary: (r1*i2 + r2*i1) + } + + //---------------------------------------------------------------------------------- + // Radix-4 decimation-in-time FFT butterfly. + // This version assumes that all four elements of the butterfly are + // adjacent in a single vector. + // + // Compute the product of the complex input vector and the + // 4-element DFT matrix: + // | 1 1 1 1 | | (r1X,i1X) | + // | 1 -j -1 j | | (r1Y,i1Y) | + // | 1 -1 1 -1 | | (r1Z,i1Z) | + // | 1 j -1 -j | | (r1W,i1W) | + // + // This matrix can be decomposed into two simpler ones to reduce the + // number of additions needed. The decomposed matrices look like this: + // | 1 0 1 0 | | 1 0 1 0 | + // | 0 1 0 -j | | 1 0 -1 0 | + // | 1 0 -1 0 | | 0 1 0 1 | + // | 0 1 0 j | | 0 1 0 -1 | + // + // Combine as follows: + // | 1 0 1 0 | | (r1X,i1X) | | (r1X + r1Z, i1X + i1Z) | + // Temp = | 1 0 -1 0 | * | (r1Y,i1Y) | = | (r1X - r1Z, i1X - i1Z) | + // | 0 1 0 1 | | (r1Z,i1Z) | | (r1Y + r1W, i1Y + i1W) | + // | 0 1 0 -1 | | (r1W,i1W) | | (r1Y - r1W, i1Y - i1W) | + // + // | 1 0 1 0 | | (rTempX,iTempX) | | (rTempX + rTempZ, iTempX + iTempZ) | + // Result = | 0 1 0 -j | * | (rTempY,iTempY) | = | (rTempY + iTempW, iTempY - rTempW) | + // | 1 0 -1 0 | | (rTempZ,iTempZ) | | (rTempX - rTempZ, iTempX - iTempZ) | + // | 0 1 0 j | | (rTempW,iTempW) | | (rTempY - iTempW, iTempY + rTempW) | + //---------------------------------------------------------------------------------- + inline void ButterflyDIT4_1 (_Inout_ XMVECTOR& r1, _Inout_ XMVECTOR& i1) noexcept + { + using namespace DirectX; + + // sign constants for radix-4 butterflies + static const XMVECTORF32 vDFT4SignBits1 = { { { 1.0f, -1.0f, 1.0f, -1.0f } } }; + static const XMVECTORF32 vDFT4SignBits2 = { { { 1.0f, 1.0f, -1.0f, -1.0f } } }; + static const XMVECTORF32 vDFT4SignBits3 = { { { 1.0f, -1.0f, -1.0f, 1.0f } } }; + + // calculating Temp + // [r1X| r1X|r1Y| r1Y] + [r1Z|-r1Z|r1W|-r1W] + // [i1X| i1X|i1Y| i1Y] + [i1Z|-i1Z|i1W|-i1W] + XMVECTOR r1L = XMVectorSwizzle<0,0,1,1>( r1 ); + XMVECTOR r1H = XMVectorSwizzle<2,2,3,3>( r1 ); + + XMVECTOR i1L = XMVectorSwizzle<0,0,1,1>( i1 ); + XMVECTOR i1H = XMVectorSwizzle<2,2,3,3>( i1 ); + + XMVECTOR rTemp = XMVectorMultiplyAdd( r1H, vDFT4SignBits1, r1L ); + XMVECTOR iTemp = XMVectorMultiplyAdd( i1H, vDFT4SignBits1, i1L ); + + // calculating Result + XMVECTOR rZrWiZiW = XMVectorPermute<2,3,6,7>(rTemp,iTemp); // [rTempZ|rTempW|iTempZ|iTempW] + XMVECTOR rZiWrZiW = XMVectorSwizzle<0,3,0,3>(rZrWiZiW); // [rTempZ|iTempW|rTempZ|iTempW] + XMVECTOR iZrWiZrW = XMVectorSwizzle<2,1,2,1>(rZrWiZiW); // [rTempZ|iTempW|rTempZ|iTempW] + + // [rTempX| rTempY| rTempX| rTempY] + [rTempZ| iTempW|-rTempZ|-iTempW] + // [iTempX| iTempY| iTempX| iTempY] + // [iTempZ|-rTempW|-iTempZ| rTempW] + XMVECTOR rTempL = XMVectorSwizzle<0,1,0,1>(rTemp); + XMVECTOR iTempL = XMVectorSwizzle<0,1,0,1>(iTemp); + + r1 = XMVectorMultiplyAdd( rZiWrZiW, vDFT4SignBits2, rTempL ); + i1 = XMVectorMultiplyAdd( iZrWiZrW, vDFT4SignBits3, iTempL ); + } + + //---------------------------------------------------------------------------------- + // Radix-4 decimation-in-time FFT butterfly. + // This version assumes that elements of the butterfly are + // in different vectors, so that each vector in the input + // contains elements from four different butterflies. + // The four separate butterflies are processed in parallel. + // + // The calculations here are the same as the ones in the single-vector + // radix-4 DFT, but instead of being done on a single vector (X,Y,Z,W) + // they are done in parallel on sixteen independent complex values. + // There is no interdependence between the vector elements: + // | 1 0 1 0 | | (rIn0,iIn0) | | (rIn0 + rIn2, iIn0 + iIn2) | + // | 1 0 -1 0 | * | (rIn1,iIn1) | = Temp = | (rIn0 - rIn2, iIn0 - iIn2) | + // | 0 1 0 1 | | (rIn2,iIn2) | | (rIn1 + rIn3, iIn1 + iIn3) | + // | 0 1 0 -1 | | (rIn3,iIn3) | | (rIn1 - rIn3, iIn1 - iIn3) | + // + // | 1 0 1 0 | | (rTemp0,iTemp0) | | (rTemp0 + rTemp2, iTemp0 + iTemp2) | + // Result = | 0 1 0 -j | * | (rTemp1,iTemp1) | = | (rTemp1 + iTemp3, iTemp1 - rTemp3) | + // | 1 0 -1 0 | | (rTemp2,iTemp2) | | (rTemp0 - rTemp2, iTemp0 - iTemp2) | + // | 0 1 0 j | | (rTemp3,iTemp3) | | (rTemp1 - iTemp3, iTemp1 + rTemp3) | + //---------------------------------------------------------------------------------- + inline void ButterflyDIT4_4( + _Inout_ XMVECTOR& r0, + _Inout_ XMVECTOR& r1, + _Inout_ XMVECTOR& r2, + _Inout_ XMVECTOR& r3, + _Inout_ XMVECTOR& i0, + _Inout_ XMVECTOR& i1, + _Inout_ XMVECTOR& i2, + _Inout_ XMVECTOR& i3, + _In_reads_(uStride * 4) const XMVECTOR* __restrict pUnityTableReal, + _In_reads_(uStride * 4) const XMVECTOR* __restrict pUnityTableImaginary, + _In_ size_t uStride, + _In_ const bool fLast) noexcept + { + using namespace DirectX; + + assert(pUnityTableReal); + assert(pUnityTableImaginary); + assert(reinterpret_cast(pUnityTableReal) % 16 == 0); + assert(reinterpret_cast(pUnityTableImaginary) % 16 == 0); + assert(ISPOWEROF2(uStride)); + + // calculating Temp + XMVECTOR rTemp0 = XMVectorAdd(r0, r2); + XMVECTOR iTemp0 = XMVectorAdd(i0, i2); + + XMVECTOR rTemp2 = XMVectorAdd(r1, r3); + XMVECTOR iTemp2 = XMVectorAdd(i1, i3); + + XMVECTOR rTemp1 = XMVectorSubtract(r0, r2); + XMVECTOR iTemp1 = XMVectorSubtract(i0, i2); + + XMVECTOR rTemp3 = XMVectorSubtract(r1, r3); + XMVECTOR iTemp3 = XMVectorSubtract(i1, i3); + + XMVECTOR rTemp4 = XMVectorAdd(rTemp0, rTemp2); + XMVECTOR iTemp4 = XMVectorAdd(iTemp0, iTemp2); + + XMVECTOR rTemp5 = XMVectorAdd(rTemp1, iTemp3); + XMVECTOR iTemp5 = XMVectorSubtract(iTemp1, rTemp3); + + XMVECTOR rTemp6 = XMVectorSubtract(rTemp0, rTemp2); + XMVECTOR iTemp6 = XMVectorSubtract(iTemp0, iTemp2); + + XMVECTOR rTemp7 = XMVectorSubtract(rTemp1, iTemp3); + XMVECTOR iTemp7 = XMVectorAdd(iTemp1, rTemp3); + + // calculating Result + // vmulComplex(rTemp0, iTemp0, rTemp0, iTemp0, pUnityTableReal[0], pUnityTableImaginary[0]); // first one is always trivial + vmulComplex(rTemp5, iTemp5, pUnityTableReal[uStride], pUnityTableImaginary[uStride]); + vmulComplex(rTemp6, iTemp6, pUnityTableReal[uStride * 2], pUnityTableImaginary[uStride * 2]); + vmulComplex(rTemp7, iTemp7, pUnityTableReal[uStride * 3], pUnityTableImaginary[uStride * 3]); + + if (fLast) + { + ButterflyDIT4_1(rTemp4, iTemp4); + ButterflyDIT4_1(rTemp5, iTemp5); + ButterflyDIT4_1(rTemp6, iTemp6); + ButterflyDIT4_1(rTemp7, iTemp7); + } + + r0 = rTemp4; i0 = iTemp4; + r1 = rTemp5; i1 = iTemp5; + r2 = rTemp6; i2 = iTemp6; + r3 = rTemp7; i3 = iTemp7; + } + + //================================================================================== + // F-U-N-C-T-I-O-N-S + //================================================================================== + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // 4-sample FFT. + // + // PARAMETERS: + // pReal - [inout] real components, must have at least uCount elements + // pImaginary - [inout] imaginary components, must have at least uCount elements + // uCount - [in] number of FFT iterations + //---------------------------------------------------------------------------------- + inline void FFT4( + _Inout_updates_(uCount) XMVECTOR* __restrict pReal, + _Inout_updates_(uCount) XMVECTOR* __restrict pImaginary, + const size_t uCount = 1) noexcept + { + assert(pReal); + assert(pImaginary); + assert(reinterpret_cast(pReal) % 16 == 0); + assert(reinterpret_cast(pImaginary) % 16 == 0); + assert(ISPOWEROF2(uCount)); + + for (size_t uIndex = 0; uIndex < uCount; ++uIndex) + { + ButterflyDIT4_1(pReal[uIndex], pImaginary[uIndex]); + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // 8-sample FFT. + // + // PARAMETERS: + // pReal - [inout] real components, must have at least uCount*2 elements + // pImaginary - [inout] imaginary components, must have at least uCount*2 elements + // uCount - [in] number of FFT iterations + //---------------------------------------------------------------------------------- + inline void FFT8( + _Inout_updates_(uCount * 2) XMVECTOR* __restrict pReal, + _Inout_updates_(uCount * 2) XMVECTOR* __restrict pImaginary, + _In_ const size_t uCount = 1) noexcept + { + using namespace DirectX; + + assert(pReal); + assert(pImaginary); + assert(reinterpret_cast(pReal) % 16 == 0); + assert(reinterpret_cast(pImaginary) % 16 == 0); + assert(ISPOWEROF2(uCount)); + + static const XMVECTORF32 wr1 = { { { 1.0f, 0.70710677f, 0.0f, -0.70710677f } } }; + static const XMVECTORF32 wi1 = { { { 0.0f, -0.70710677f, -1.0f, -0.70710677f } } }; + static const XMVECTORF32 wr2 = { { { -1.0f, -0.70710677f, 0.0f, 0.70710677f } } }; + static const XMVECTORF32 wi2 = { { { 0.0f, 0.70710677f, 1.0f, 0.70710677f } } }; + + for (size_t uIndex = 0; uIndex < uCount; ++uIndex) + { + XMVECTOR* __restrict pR = pReal + uIndex * 2; + XMVECTOR* __restrict pI = pImaginary + uIndex * 2; + + XMVECTOR oddsR = XMVectorPermute<1, 3, 5, 7>(pR[0], pR[1]); + XMVECTOR evensR = XMVectorPermute<0, 2, 4, 6>(pR[0], pR[1]); + XMVECTOR oddsI = XMVectorPermute<1, 3, 5, 7>(pI[0], pI[1]); + XMVECTOR evensI = XMVectorPermute<0, 2, 4, 6>(pI[0], pI[1]); + ButterflyDIT4_1(oddsR, oddsI); + ButterflyDIT4_1(evensR, evensI); + + XMVECTOR r, i; + vmulComplex(r, i, oddsR, oddsI, wr1, wi1); + pR[0] = XMVectorAdd(evensR, r); + pI[0] = XMVectorAdd(evensI, i); + + vmulComplex(r, i, oddsR, oddsI, wr2, wi2); + pR[1] = XMVectorAdd(evensR, r); + pI[1] = XMVectorAdd(evensI, i); + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // 16-sample FFT. + // + // PARAMETERS: + // pReal - [inout] real components, must have at least uCount*4 elements + // pImaginary - [inout] imaginary components, must have at least uCount*4 elements + // uCount - [in] number of FFT iterations + //---------------------------------------------------------------------------------- + inline void FFT16( + _Inout_updates_(uCount * 4) XMVECTOR* __restrict pReal, + _Inout_updates_(uCount * 4) XMVECTOR* __restrict pImaginary, + _In_ const size_t uCount = 1) noexcept + { + using namespace DirectX; + + assert(pReal); + assert(pImaginary); + assert(reinterpret_cast(pReal) % 16 == 0); + assert(reinterpret_cast(pImaginary) % 16 == 0); + assert(ISPOWEROF2(uCount)); + + static const XMVECTORF32 aUnityTableReal[4] = { + { { { 1.0f, 1.0f, 1.0f, 1.0f } } }, + { { { 1.0f, 0.92387950f, 0.70710677f, 0.38268343f } } }, + { { { 1.0f, 0.70710677f, -4.3711388e-008f, -0.70710677f } } }, + { { { 1.0f, 0.38268343f, -0.70710677f, -0.92387950f } } } + }; + static const XMVECTORF32 aUnityTableImaginary[4] = + { + { { { -0.0f, -0.0f, -0.0f, -0.0f } } }, + { { { -0.0f, -0.38268343f, -0.70710677f, -0.92387950f } } }, + { { { -0.0f, -0.70710677f, -1.0f, -0.70710677f } } }, + { { { -0.0f, -0.92387950f, -0.70710677f, 0.38268343f } } } + }; + + for (size_t uIndex = 0; uIndex < uCount; ++uIndex) + { + ButterflyDIT4_4(pReal[uIndex * 4], + pReal[uIndex * 4 + 1], + pReal[uIndex * 4 + 2], + pReal[uIndex * 4 + 3], + pImaginary[uIndex * 4], + pImaginary[uIndex * 4 + 1], + pImaginary[uIndex * 4 + 2], + pImaginary[uIndex * 4 + 3], + reinterpret_cast(aUnityTableReal), + reinterpret_cast(aUnityTableImaginary), + 1, true); + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // 2^N-sample FFT. + // + // REMARKS: + // For FFTs length 16 and below, call FFT16(), FFT8(), or FFT4(). + // + // PARAMETERS: + // pReal - [inout] real components, must have at least (uLength*uCount)/4 elements + // pImaginary - [inout] imaginary components, must have at least (uLength*uCount)/4 elements + // pUnityTable - [in] unity table, must have at least uLength*uCount elements, see FFTInitializeUnityTable() + // uLength - [in] FFT length in samples, must be a power of 2 > 16 + // uCount - [in] number of FFT iterations + //---------------------------------------------------------------------------------- + inline void FFT ( + _Inout_updates_((uLength*uCount)/4) XMVECTOR* __restrict pReal, + _Inout_updates_((uLength*uCount)/4) XMVECTOR* __restrict pImaginary, + _In_reads_(uLength*uCount) const XMVECTOR* __restrict pUnityTable, + _In_ const size_t uLength, + _In_ const size_t uCount=1) noexcept + { + assert(pReal); + assert(pImaginary); + assert(pUnityTable); + assert(reinterpret_cast(pReal) % 16 == 0); + assert(reinterpret_cast(pImaginary) % 16 == 0); + assert(reinterpret_cast(pUnityTable) % 16 == 0); + assert(uLength > 16); + _Analysis_assume_(uLength > 16); + assert(ISPOWEROF2(uLength)); + assert(ISPOWEROF2(uCount)); + + const XMVECTOR* __restrict pUnityTableReal = pUnityTable; + const XMVECTOR* __restrict pUnityTableImaginary = pUnityTable + (uLength>>2); + const size_t uTotal = uCount * uLength; + const size_t uTotal_vectors = uTotal >> 2; + const size_t uStage_vectors = uLength >> 2; + const size_t uStage_vectors_mask = uStage_vectors - 1; + const size_t uStride = uLength >> 4; // stride between butterfly elements + const size_t uStrideMask = uStride - 1; + const size_t uStride2 = uStride * 2; + const size_t uStride3 = uStride * 3; + const size_t uStrideInvMask = ~uStrideMask; + + for (size_t uIndex=0; uIndex < (uTotal_vectors>>2); ++uIndex) + { + const size_t n = ((uIndex & uStrideInvMask) << 2) + (uIndex & uStrideMask); + ButterflyDIT4_4(pReal[n], + pReal[n + uStride], + pReal[n + uStride2], + pReal[n + uStride3], + pImaginary[n ], + pImaginary[n + uStride], + pImaginary[n + uStride2], + pImaginary[n + uStride3], + pUnityTableReal + (n & uStage_vectors_mask), + pUnityTableImaginary + (n & uStage_vectors_mask), + uStride, false); + } + + if (uLength > 16*4) + { + FFT(pReal, pImaginary, pUnityTable+(uLength>>1), uLength>>2, uCount*4); + } + else if (uLength == 16*4) + { + FFT16(pReal, pImaginary, uCount*4); + } + else if (uLength == 8*4) + { + FFT8(pReal, pImaginary, uCount*4); + } + else if (uLength == 4*4) + { + FFT4(pReal, pImaginary, uCount*4); + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // Initializes unity roots lookup table used by FFT functions. + // Once initialized, the table need not be initialized again unless a + // different FFT length is desired. + // + // REMARKS: + // The unity tables of FFT length 16 and below are hard coded into the + // respective FFT functions and so need not be initialized. + // + // PARAMETERS: + // pUnityTable - [out] unity table, receives unity roots lookup table, must have at least uLength elements + // uLength - [in] FFT length in frames, must be a power of 2 > 16 + //---------------------------------------------------------------------------------- + inline void FFTInitializeUnityTable (_Out_writes_(uLength) XMVECTOR* __restrict pUnityTable, _In_ size_t uLength) noexcept + { + assert(pUnityTable); + assert(uLength > 16); + _Analysis_assume_(uLength > 16); + assert(ISPOWEROF2(uLength)); + + float* __restrict pfUnityTable = reinterpret_cast(pUnityTable); + + // initialize unity table for recursive FFT lengths: uLength, uLength/4, uLength/16... > 16 + do + { + float flStep = 6.283185307f / float(uLength); // 2PI / FFT length + uLength >>= 2; + + // pUnityTable[0 to uLength*4-1] contains real components for current FFT length + // pUnityTable[uLength*4 to uLength*8-1] contains imaginary components for current FFT length + for (size_t i=0; i<4; ++i) + { + for (size_t j=0; j 16); + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // The FFT functions generate output in bit reversed order. + // Use this function to re-arrange them into order of increasing frequency. + // + // REMARKS: + // + // PARAMETERS: + // pOutput - [out] output buffer, receives samples in order of increasing frequency, cannot overlap pInput, must have at least (1<= 2 + //---------------------------------------------------------------------------------- + inline void FFTUnswizzle ( + _Out_writes_((1<= 2); + _Analysis_assume_(uLog2Length >= 2); + + float* __restrict pfOutput = reinterpret_cast(pOutput); + const float* __restrict pfInput = reinterpret_cast(pInput); + const size_t uLength = size_t(1) << uLog2Length; + + if ((uLog2Length & 0x1) == 0) + { + // even powers of two + for (size_t uIndex=0; uIndex < uLength; ++uIndex) + { + size_t n = uIndex; + n = ( (n & 0xcccccccc) >> 2 ) | ( (n & 0x33333333) << 2 ); + n = ( (n & 0xf0f0f0f0) >> 4 ) | ( (n & 0x0f0f0f0f) << 4 ); + n = ( (n & 0xff00ff00) >> 8 ) | ( (n & 0x00ff00ff) << 8 ); + n = ( (n & 0xffff0000) >> 16 ) | ( (n & 0x0000ffff) << 16 ); + n >>= (32 - uLog2Length); + pfOutput[n] = pfInput[uIndex]; + } + } + else + { + // odd powers of two + for (size_t uIndex=0; uIndex < uLength; ++uIndex) + { + size_t n = (uIndex>>3); + n = ( (n & 0xcccccccc) >> 2 ) | ( (n & 0x33333333) << 2 ); + n = ( (n & 0xf0f0f0f0) >> 4 ) | ( (n & 0x0f0f0f0f) << 4 ); + n = ( (n & 0xff00ff00) >> 8 ) | ( (n & 0x00ff00ff) << 8 ); + n = ( (n & 0xffff0000) >> 16 ) | ( (n & 0x0000ffff) << 16 ); + n >>= (32 - (uLog2Length-3)); + n |= ((uIndex & 0x7) << (uLog2Length - 3)); + pfOutput[n] = pfInput[uIndex]; + } + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // Convert complex components to polar form. + // + // PARAMETERS: + // pOutput - [out] output buffer, receives samples in polar form, must have at least uLength/4 elements + // pInputReal - [in] input buffer (real components), must have at least uLength/4 elements + // pInputImaginary - [in] input buffer (imaginary components), must have at least uLength/4 elements + // uLength - [in] FFT length in samples, must be a power of 2 >= 4 + //---------------------------------------------------------------------------------- +#pragma warning(suppress: 6101) + inline void FFTPolar( + _Out_writes_(uLength/4) XMVECTOR* __restrict pOutput, + _In_reads_(uLength/4) const XMVECTOR* __restrict pInputReal, + _In_reads_(uLength/4) const XMVECTOR* __restrict pInputImaginary, + _In_ const size_t uLength) noexcept + { + using namespace DirectX; + + assert(pOutput); + assert(pInputReal); + assert(pInputImaginary); + assert(uLength >= 4); + _Analysis_assume_(uLength >= 4); + assert(ISPOWEROF2(uLength)); + + float flOneOverLength = 1.0f / float(uLength); + + // result = sqrtf((real/uLength)^2 + (imaginary/uLength)^2) * 2 + XMVECTOR vOneOverLength = XMVectorReplicate( flOneOverLength ); + + for (size_t uIndex=0; uIndex < (uLength>>2); ++uIndex) + { + XMVECTOR vReal = XMVectorMultiply(pInputReal[uIndex], vOneOverLength); + XMVECTOR vImaginary = XMVectorMultiply(pInputImaginary[uIndex], vOneOverLength); + XMVECTOR vRR = XMVectorMultiply(vReal, vReal); + XMVECTOR vII = XMVectorMultiply(vImaginary, vImaginary); + XMVECTOR vRRplusII = XMVectorAdd(vRR, vII); + XMVECTOR vTotal = XMVectorSqrt(vRRplusII); + pOutput[uIndex] = XMVectorAdd(vTotal, vTotal); + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // Deinterleaves audio samples + // + // REMARKS: + // For example, audio of the form [LRLRLR] becomes [LLLRRR]. + // + // PARAMETERS: + // pOutput - [out] output buffer, receives samples in deinterleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements + // pInput - [in] input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements + // uChannelCount - [in] number of channels, must be > 1 + // uFrameCount - [in] number of frames of valid data, must be > 0 + //---------------------------------------------------------------------------------- + inline void Deinterleave ( + _Out_writes_((uChannelCount*uFrameCount)/4) XMVECTOR* __restrict pOutput, + _In_reads_((uChannelCount*uFrameCount)/4) const XMVECTOR* __restrict pInput, + _In_ const size_t uChannelCount, + _In_ const size_t uFrameCount) noexcept + { + assert(pOutput); + assert(pInput); + assert(uChannelCount > 1); + assert(uFrameCount > 0); + + float* __restrict pfOutput = reinterpret_cast(pOutput); + const float* __restrict pfInput = reinterpret_cast(pInput); + + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + for (size_t uFrame=0; uFrame < uFrameCount; ++uFrame) + { + pfOutput[uChannel * uFrameCount + uFrame] = pfInput[uFrame * uChannelCount + uChannel]; + } + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // Interleaves audio samples + // + // REMARKS: + // For example, audio of the form [LLLRRR] becomes [LRLRLR]. + // + // PARAMETERS: + // pOutput - [out] output buffer, receives samples in interleaved form, cannot overlap pInput, must have at least (uChannelCount*uFrameCount)/4 elements + // pInput - [in] input buffer, cannot overlap pOutput, must have at least (uChannelCount*uFrameCount)/4 elements + // uChannelCount - [in] number of channels, must be > 1 + // uFrameCount - [in] number of frames of valid data, must be > 0 + //---------------------------------------------------------------------------------- + inline void Interleave( + _Out_writes_((uChannelCount*uFrameCount)/4) XMVECTOR* __restrict pOutput, + _In_reads_((uChannelCount*uFrameCount)/4) const XMVECTOR* __restrict pInput, + _In_ const size_t uChannelCount, + _In_ const size_t uFrameCount) noexcept + { + assert(pOutput); + assert(pInput); + assert(uChannelCount > 1); + assert(uFrameCount > 0); + + float* __restrict pfOutput = reinterpret_cast(pOutput); + const float* __restrict pfInput = reinterpret_cast(pInput); + + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + for (size_t uFrame=0; uFrame < uFrameCount; ++uFrame) + { + pfOutput[uFrame * uChannelCount + uChannel] = pfInput[uChannel * uFrameCount + uFrame]; + } + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // This function applies a 2^N-sample FFT and unswizzles the result such + // that the samples are in order of increasing frequency. + // Audio is first deinterleaved if multichannel. + // + // PARAMETERS: + // pReal - [inout] real components, must have at least (1<(pReal) % 16 == 0); + assert(reinterpret_cast(pImaginary) % 16 == 0); + assert(reinterpret_cast(pUnityTable) % 16 == 0); + assert(uChannelCount > 0 && uChannelCount <= 6); + assert(uLog2Length >= 2 && uLog2Length <= 9); + + XMVECTOR vRealTemp[768]; + XMVECTOR vImaginaryTemp[768]; + const size_t uLength = size_t(1) << uLog2Length; + + if (uChannelCount > 1) + { + Deinterleave(vRealTemp, pReal, uChannelCount, uLength); + } + else + { + memcpy_s(vRealTemp, sizeof(vRealTemp), pReal, (uLength>>2)*sizeof(XMVECTOR)); + } + + memset( vImaginaryTemp, 0, (uChannelCount*(uLength>>2)) * sizeof(XMVECTOR) ); + + if (uLength > 16) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)], pUnityTable, uLength); + } + } + else if (uLength == 16) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT16(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)]); + } + } + else if (uLength == 8) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT8(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)]); + } + } + else if (uLength == 4) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT4(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)]); + } + } + + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFTUnswizzle(&pReal[uChannel*(uLength>>2)], &vRealTemp[uChannel*(uLength>>2)], uLog2Length); + FFTUnswizzle(&pImaginary[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)], uLog2Length); + } + } + + //---------------------------------------------------------------------------------- + // DESCRIPTION: + // This function applies a 2^N-sample inverse FFT. + // Audio is interleaved if multichannel. + // + // PARAMETERS: + // pReal - [inout] real components, must have at least (1< 0 + // uLog2Length - [in] LOG (base 2) of FFT length in frames, must within [2, 9] + //---------------------------------------------------------------------------------- + inline void IFFTDeinterleaved( + _Inout_updates_(((1<(pReal) % 16 == 0); + assert(reinterpret_cast(pImaginary) % 16 == 0); + assert(reinterpret_cast(pUnityTable) % 16 == 0); + assert(uChannelCount > 0 && uChannelCount <= 6); + _Analysis_assume_(uChannelCount > 0 && uChannelCount <= 6); + assert(uLog2Length >= 2 && uLog2Length <= 9); + _Analysis_assume_(uLog2Length >= 2 && uLog2Length <= 9); + + XMVECTOR vRealTemp[768] = {}; + XMVECTOR vImaginaryTemp[768] = {}; + + const size_t uLength = size_t(1) << uLog2Length; + + const XMVECTOR vRnp = XMVectorReplicate(1.0f / float(uLength)); + const XMVECTOR vRnm = XMVectorReplicate(-1.0f / float(uLength)); + for (size_t u=0; u < uChannelCount*(uLength>>2); u++) + { + vRealTemp[u] = XMVectorMultiply(pReal[u], vRnp); + vImaginaryTemp[u] = XMVectorMultiply(pImaginary[u], vRnm); + } + + if (uLength > 16) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)], pUnityTable, uLength); + } + } + else if (uLength == 16) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT16(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)]); + } + } + else if (uLength == 8) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT8(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)]); + } + } + else if (uLength == 4) + { + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFT4(&vRealTemp[uChannel*(uLength>>2)], &vImaginaryTemp[uChannel*(uLength>>2)]); + } + } + + for (size_t uChannel=0; uChannel < uChannelCount; ++uChannel) + { + FFTUnswizzle(&vImaginaryTemp[uChannel*(uLength>>2)], &vRealTemp[uChannel*(uLength>>2)], uLog2Length); + } + + if (uChannelCount > 1) + { + Interleave(pReal, vImaginaryTemp, uChannelCount, uLength); + } + else + { + memcpy_s(pReal, uLength*uChannelCount*sizeof(float), vImaginaryTemp, (uLength>>2)*sizeof(XMVECTOR)); + } + } + +} // namespace XDSP + +#pragma warning(pop) diff --git a/Sdk/External/DirectXTK/.editorconfig b/Sdk/External/DirectXTK/.editorconfig new file mode 100644 index 0000000..f15501a --- /dev/null +++ b/Sdk/External/DirectXTK/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*.{cpp,h,inl,fx,fxh,hlsl,hlsli}] +indent_size = 4 +indent_style = space +trim_trailing_whitespace = true +insert_final_newline = true +end_of_line = crlf +charset = latin1 diff --git a/Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.nuspec b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.nuspec new file mode 100644 index 0000000..b5e5a8e --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.nuspec @@ -0,0 +1,78 @@ + + + + directxtk_desktop_2017 + 0.0.0-SpecifyVersionOnCommandline + DirectX Tool Kit (VS 2017/2019 Win32) + Microsoft + microsoft,directxtk + The DirectX Tool Kit (aka DirectXTK) is a collection of helper classes for writing Direct3D 11.x code in C++. + This version is for Windows desktop applications using Visual Studio 2017 or Visual Studio 2019. + +Features: +Audio - low-level audio API using XAudio2 +BufferHelpers - C++ helpers for creating D3D resources from CPU data +CommonStates - factory providing commonly used D3D state objects +DirectXHelpers - misc C++ helpers for D3D programming +DDSTextureLoader - light-weight DDS file texture loader +Effects - set of built-in shaders for common rendering tasks +GamePad - gamepad controller helper using XInput +GeometricPrimitive - draws basic shapes such as cubes and spheres +GraphicsMemory - helper for managing dynamic graphics memory allocation +Keyboard - keyboard state tracking helper +Model - draws meshes loaded from .CMO, .SDKMESH, or .VBO files +Mouse - mouse helper +PostProcess - set of built-in shaders for common post-processing operations +PrimitiveBatch - simple and efficient way to draw user primitives +ScreenGrab - light-weight screen shot saver +SimpleMath - simplified C++ wrapper for DirectXMath +SpriteBatch - simple & efficient 2D sprite rendering +SpriteFont - bitmap based text rendering +VertexTypes - structures for commonly used vertex data formats +WICTextureLoader - WIC-based image file texture loader + Matches the September 30, 2020 release on GitHub. + +DirectX Tool Kit for Audio in this package uses XAudio2Redist NuGet package to support Windows 7 or later. + http://go.microsoft.com/fwlink/?LinkId=248929 + images\icon.jpg + MIT + false + © Microsoft Corporation. All rights reserved. + DirectX DirectXTK native nativepackage + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.targets b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.targets new file mode 100644 index 0000000..760eeca --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_2017.targets @@ -0,0 +1,29 @@ + + + + + Debug + + + Release + + + + $(MSBuildThisFileDirectory)..\..\native\lib\$(Platform)\$(NuGetConfiguration) + + + + + $(directxtk-LibPath);%(AdditionalLibraryDirectories) + DirectXTK.lib;DirectXTKAudioWin7.lib;%(AdditionalDependencies) + + + + + + HAS_DIRECTXTK;%(PreprocessorDefinitions) + $(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories) + + + + diff --git a/Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.nuspec b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.nuspec new file mode 100644 index 0000000..1a42e75 --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.nuspec @@ -0,0 +1,73 @@ + + + + directxtk_desktop_win10 + 0.0.0-SpecifyVersionOnCommandline + DirectX Tool Kit (VS 2017/2019 Win32 for Windows 10) + Microsoft + microsoft,directxtk + The DirectX Tool Kit (aka DirectXTK) is a collection of helper classes for writing Direct3D 11.x code in C++. + This version is for Windows desktop applications using Visual Studio 2017 or Visual Studio 2019 on Windows 10. + +Features: +Audio - low-level audio API using XAudio2 +BufferHelpers - C++ helpers for creating D3D resources from CPU data +CommonStates - factory providing commonly used D3D state objects +DirectXHelpers - misc C++ helpers for D3D programming +DDSTextureLoader - light-weight DDS file texture loader +Effects - set of built-in shaders for common rendering tasks +GamePad - gamepad controller helper using XInput +GeometricPrimitive - draws basic shapes such as cubes and spheres +GraphicsMemory - helper for managing dynamic graphics memory allocation +Keyboard - keyboard state tracking helper +Model - draws meshes loaded from .CMO, .SDKMESH, or .VBO files +Mouse - mouse helper +PostProcess - set of built-in shaders for common post-processing operations +PrimitiveBatch - simple and efficient way to draw user primitives +ScreenGrab - light-weight screen shot saver +SimpleMath - simplified C++ wrapper for DirectXMath +SpriteBatch - simple & efficient 2D sprite rendering +SpriteFont - bitmap based text rendering +VertexTypes - structures for commonly used vertex data formats +WICTextureLoader - WIC-based image file texture loader + Matches the September 30, 2020 release on GitHub. + +DirectX Tool Kit for Audio in this package uses XAudio 2.9 which requires Windows 10. + http://go.microsoft.com/fwlink/?LinkId=248929 + images\icon.jpg + MIT + false + © Microsoft Corporation. All rights reserved. + DirectX DirectXTK native nativepackage + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.targets b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.targets new file mode 100644 index 0000000..e9c46cb --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/directxtk_desktop_win10.targets @@ -0,0 +1,29 @@ + + + + + Debug + + + Release + + + + $(MSBuildThisFileDirectory)..\..\native\lib\$(Platform)\$(NuGetConfiguration) + + + + + $(directxtk-LibPath);%(AdditionalLibraryDirectories) + DirectXTK.lib;%(AdditionalDependencies) + + + + + + HAS_DIRECTXTK;%(PreprocessorDefinitions) + $(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories) + + + + diff --git a/Sdk/External/DirectXTK/.nuget/directxtk_uwp.nuspec b/Sdk/External/DirectXTK/.nuget/directxtk_uwp.nuspec new file mode 100644 index 0000000..85152c1 --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/directxtk_uwp.nuspec @@ -0,0 +1,77 @@ + + + + directxtk_uwp + 0.0.0-SpecifyVersionOnCommandline + DirectX Tool Kit (UWP) + Microsoft + microsoft,directxtk + The DirectX Tool Kit (aka DirectXTK) is a collection of helper classes for writing Direct3D 11.x code in C++. + This version is for Universal Windows Platform apps on Windows 10 using Visual Studio 2017 or Visual Studio 2019. + +Features: +Audio - low-level audio API using XAudio2 +BufferHelpers - C++ helpers for creating D3D resources from CPU data +CommonStates - factory providing commonly used D3D state objects +DirectXHelpers - misc C++ helpers for D3D programming +DDSTextureLoader - light-weight DDS file texture loader +Effects - set of built-in shaders for common rendering tasks +GamePad - gamepad controller helper using XInput +GeometricPrimitive - draws basic shapes such as cubes and spheres +GraphicsMemory - helper for managing dynamic graphics memory allocation +Keyboard - keyboard state tracking helper +Model - draws meshes loaded from .CMO, .SDKMESH, or .VBO files +Mouse - mouse helper +PostProcess - set of built-in shaders for common post-processing operations +PrimitiveBatch - simple and efficient way to draw user primitives +ScreenGrab - light-weight screen shot saver +SimpleMath - simplified C++ wrapper for DirectXMath +SpriteBatch - simple & efficient 2D sprite rendering +SpriteFont - bitmap based text rendering +VertexTypes - structures for commonly used vertex data formats +WICTextureLoader - WIC-based image file texture loader + Matches the September 30, 2020 release on GitHub. + http://go.microsoft.com/fwlink/?LinkId=248929 + images\icon.jpg + MIT + false + © Microsoft Corporation. All rights reserved. + DirectX DirectXTK native nativepackage + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/.nuget/directxtk_uwp.targets b/Sdk/External/DirectXTK/.nuget/directxtk_uwp.targets new file mode 100644 index 0000000..e9c46cb --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/directxtk_uwp.targets @@ -0,0 +1,29 @@ + + + + + Debug + + + Release + + + + $(MSBuildThisFileDirectory)..\..\native\lib\$(Platform)\$(NuGetConfiguration) + + + + + $(directxtk-LibPath);%(AdditionalLibraryDirectories) + DirectXTK.lib;%(AdditionalDependencies) + + + + + + HAS_DIRECTXTK;%(PreprocessorDefinitions) + $(MSBuildThisFileDirectory)..\..\include;%(AdditionalIncludeDirectories) + + + + diff --git a/Sdk/External/DirectXTK/.nuget/icon.jpg b/Sdk/External/DirectXTK/.nuget/icon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..08fe1faeb7f6e45d796cf1e67bf1cbb1347c514a GIT binary patch literal 3479 zcmbW$XHe5?mjLj82q7Rr2u1}(Y6PT8@1RJ((ximmf=CfjB1P&|i4>8JSGXbwVrWvN zBb`vCw;<9%dPy)y2mxMvcjxZR&VJb4^UV45oO$N_<~-+Q;$#kBGtkl30YD%S;BN{b_aR={-OXhp_WW z-DEgt>I~(7gphuk_?}Tfv#jO(ogpGp#>Mv;)0qnwIXEv}6%-P_CL${*ub`-;d`nAP zM^{hZz|7pj5@ls=gLb{^cF*0z6Z6>5KOpc)(DMlFi^!{3zLA% z9pZTx-=Q-Xkh1evNxy0Tmi_Nw&;DPse_;RRngn2A(CP5Na6kjtrwV8Pc}BQcs3RN^ zqB7U+Ui?6PBs5OkNy(>k_)is2(G@0jHLtCOS0Y}8aNO!gr(rF$N^Zmt{cf&BrNx?( z9Mf?vlnyKwWC`Vsw*mYRsHZdoN?XxtnDTKSk(O9tSs;+c364gpHQ^ATofK3Y<&VYf zDPvP_;AF|$jI{K^lUTY(K zn7ol&+vJ?@x!p3k5Qg%{L_nLq_y)VA%^lJCoSmpImpHmZ3b6aSpC&W#*ReWo%~tZ9 zYy>Um(b`C+!7j-vP3_udwd#}+zpdFfuGw~R-gRZvcQgh$?x=1|-w{OGYN4?N`84mG zKLJ{`=M;-@$$r}@+T}~T=+5o$y9eQOa=rF>b=A|#uf^ZoWue)HNAh4FHl3C8Q0Jyu zT?!pZDXVw(;zM^3T3($1;OckHl@4^((ho!BM8T+ZVi<0iTbcxOxIoN3{v--G#~z>$ z`-52f8_~oEZ|nLtW<0}XJY_g@eWhn^-{-)RC3!8NrDpa`o!()FUieD^&x>3+EZOb2 zh#cgBQcl6bz_1qY&8^}bY=pEyKDUk(8=%v4M?q;HWABISiK{FBpC* z@~Fi%UCbw5^|Y~>#cGG3m=#sN#n9k+^{P6fQe@p=?)^=u3P>YN^45C8QM??%&UL!y zh}LoJ$55F4o$^wvrs_*H$iMf_B7@)+EB3JGB)Junj!IFrc;z^$eCy5NPuQd4d}qjB z?3!B9Dl*1g35IL1D#-|Aq%^LyH6k;csr8vP={kqkg!q-}QlW>%O5Bv=z0mUxbpd1C zp997qA+Hu^Ig86l7L{zI%G}si6^!%+?a45bY4^(8fc8*~9WJJki-PVD?kn)l z@_gSnp2D_4RzjrLmC9i{@7L>lV4Qy1@jI0Y?oP)hoB+*DF*3)*&Ib36+lkS9Z#cT> zrR6Omeb818{1ncGcv$~rB(}uX1gpf%xx0-BLM!?-|6H@P$c&Mw_|h%DC+e{&ky<9& z!rhih^;JHfFY}f@mG~?Oc-gj$}>%ZBKOsxb_9h5?*OCWMaL?K1B0w6=h>g$sw?hFN*f~$kIm~x z{Db};GeV8{;yuo&-}O=T!;8N?)*z7cD(&xx{((`$4_Y_Z*lF|tn+#8UTI-ijfCZN2 zkI#*UWSeGt+V0Od9Q4byVmXdjd;$aJd|s*=O3zSYmexuYga*`Imx2W!HbwIo^mCl0 z3JA{yuf~bC2OVYXM0v|y;mmj-UX`h4=C)#%>ZH6Bjt#}m9~o9=o?V+vZY4I1)ftfL zQibt!I;`*eFf#~>?YuD~ebQHB+n4Ppz+YdA^aVq8@6v$c{su;EO%dUp2y>V0XECDf zA=adM?~>f%5SfpL0U^a`Nq#N{E;fX(p{Z`!^o*^QZDsp3{-I{h)g`Aa6gI?!k~nAR zHch5fMD2Z;#I9H0x8a51=xJ(c$|0)yu}Y731@B({_V zIS*1Qa*aQrGY9AzQwBAyqS)Qsm-L$972((IoF0(-l~HiWRMJ)zHYDngP&_7$bw8u7 z>CfPwPcy~44!b56RiBCKqW0rd z^lB|CwnlfuK{TyeikDK^XuT3xl&gM5FK+I1n|yerss8&VSu{x!PaC(mw!MalR;`*h zP6atox*856#LrOt$B)HJMYJztmfqBV4GX>a3OCJI%wtJ3Ekmo-m$2n!NqM zI48cN_?-y5e~{D&zSTx;piHEohpWt05jAUO4hc`0^ZuV5XU>p#c8 zh-3}%lUoYA`g+G^{L|xLbGN$s3|IFr2T4v^kw>GVWU;Z;8L82nsyIC)J|Tk;06iSc zzl=I&_<>rhD?g^A_&c%%_V2f&HX6jDh4BwZy5EIEx_|W$j2L1J2dKwQb5|5aEbIF* zrbEjVGm@0p$CR(Gt2zC5o0EjN-iCi}J^^Os*2H;ZOpI<^<&uswf1aRQ5}qfQPdaSP z{LT%Jx2=v>u(7>MRJvZxa=^v&(JPW@S>YECYjyi->=A`@9(4*SG9I}Sl;E+L-yov< z-fGgvAaT{lu6calw{^XMb%(BG8b}_Qd>2(Nq1yZzO|PKCy+ZTcTJEea{T}It{R}M0 zW8zH{yQqXT%1HZ5cKO6ym4@CXWN(8xw>->Gu=ZX=%6g-H+E+0#9BO)^*YIk{BNG4j zTf^_;RLm4!55Syn+`u3s!Gm03+?Br|mGzdd%WQ8v<-^juKhMtN0PmK_P-KYn^;dG# zaryX*){UmM#U1xYqp#6wx)RK4+CP3FRqt)9P^eYdFc&o2S^F)1a%5&s>K`MbhNdwzh(4!tY~Jy zq~gW;q;UhH>x%K5rx0VGjlohsJiQ{3go_TboLscKc|Q(alg7bb;~mo^7RVjjAGoep zQqag1Xcef@ObvU}1sQTwUZwU-RsU!`LRiu}f@uO?3tr-$Jt}{Pm){mV6iRCpbnZI* zCIE#NE0!4G7==Z7BM|2kS*D8=#xXW_yh%)p?n}3ZQesaRzN=@KCQaJ@^kD`XY+5JN zIqTSPd2v;D*=vJ3U++jPY&(`CeDbq5Ew-3{+2cOv^vvX}&SESnc?4k~V$kawlT@xA zSW9})w9rk^BJM?eOZ{BPC@up-BToRvPBSW@j3z;mf?_|sv?kCBKRJaAX{oz!#g`l! z?+p6ymFN!E%VbmBMmFo@|AxH1xAVm7r^aFL6CeC|CHz@jYpl8d8R6`_tn3(J;D1Q) H$+v$2rQD|6 literal 0 HcmV?d00001 diff --git a/Sdk/External/DirectXTK/.nuget/signconfig_desktop.xml b/Sdk/External/DirectXTK/.nuget/signconfig_desktop.xml new file mode 100644 index 0000000..010666a --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/signconfig_desktop.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/.nuget/signconfig_uwp.xml b/Sdk/External/DirectXTK/.nuget/signconfig_uwp.xml new file mode 100644 index 0000000..0492e73 --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/signconfig_uwp.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/.nuget/versioninfo.ps1 b/Sdk/External/DirectXTK/.nuget/versioninfo.ps1 new file mode 100644 index 0000000..7f42079 --- /dev/null +++ b/Sdk/External/DirectXTK/.nuget/versioninfo.ps1 @@ -0,0 +1,6 @@ +param( +[string]$version +) +$versionComma = $version.Replace(".", ",") +$files = 'XWBTool\xwbtool.rc', 'MakeSpriteFont\Properties\AssemblyInfo.cs' +foreach ($file in $files) { (Get-Content $file).replace('1,0,0,0', $versionComma).replace('1.0.0.0', $version) | Set-Content $file } diff --git a/Sdk/External/DirectXTK/Audio/AudioEngine.cpp b/Sdk/External/DirectXTK/Audio/AudioEngine.cpp new file mode 100644 index 0000000..eb19342 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/AudioEngine.cpp @@ -0,0 +1,1653 @@ +//-------------------------------------------------------------------------------------- +// File: AudioEngine.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Audio.h" +#include "SoundCommon.h" + +#include + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +//#define VERBOSE_TRACE + +#ifdef VERBOSE_TRACE +#pragma message("NOTE: Verbose tracing enabled") +#endif + +namespace +{ + struct EngineCallback : public IXAudio2EngineCallback + { + EngineCallback() noexcept(false) + { + mCriticalError.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mCriticalError) + { + throw std::exception("CreateEvent"); + } + } + + EngineCallback(EngineCallback&&) = default; + EngineCallback& operator= (EngineCallback&&) = default; + + EngineCallback(EngineCallback const&) = delete; + EngineCallback& operator= (EngineCallback const&) = delete; + + virtual ~EngineCallback() = default; + + STDMETHOD_(void, OnProcessingPassStart) () override {} + STDMETHOD_(void, OnProcessingPassEnd)() override {} + + STDMETHOD_(void, OnCriticalError) (THIS_ HRESULT error) + { + #ifndef _DEBUG + UNREFERENCED_PARAMETER(error); + #endif + DebugTrace("ERROR: AudioEngine encountered critical error (%08X)\n", static_cast(error)); + SetEvent(mCriticalError.get()); + } + + ScopedHandle mCriticalError; + }; + + struct VoiceCallback : public IXAudio2VoiceCallback + { + VoiceCallback() noexcept(false) + { + mBufferEnd.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mBufferEnd) + { + throw std::exception("CreateEvent"); + } + } + + VoiceCallback(VoiceCallback&&) = default; + VoiceCallback& operator=(VoiceCallback&&) = default; + + VoiceCallback(const VoiceCallback&) = delete; + VoiceCallback& operator=(const VoiceCallback&) = delete; + + virtual ~VoiceCallback() + { + } + + STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) override {} + STDMETHOD_(void, OnVoiceProcessingPassEnd)() override {} + STDMETHOD_(void, OnStreamEnd)() override {} + STDMETHOD_(void, OnBufferStart)(void*) override {} + + STDMETHOD_(void, OnBufferEnd)(void* context) override + { + if (context) + { + auto inotify = static_cast(context); + inotify->OnBufferEnd(); + SetEvent(mBufferEnd.get()); + } + } + + STDMETHOD_(void, OnLoopEnd)(void*) override {} + STDMETHOD_(void, OnVoiceError)(void*, HRESULT) override {} + + ScopedHandle mBufferEnd; + }; + + static const XAUDIO2FX_REVERB_I3DL2_PARAMETERS gReverbPresets[] = + { + XAUDIO2FX_I3DL2_PRESET_DEFAULT, // Reverb_Off + XAUDIO2FX_I3DL2_PRESET_DEFAULT, // Reverb_Default + XAUDIO2FX_I3DL2_PRESET_GENERIC, // Reverb_Generic + XAUDIO2FX_I3DL2_PRESET_FOREST, // Reverb_Forest + XAUDIO2FX_I3DL2_PRESET_PADDEDCELL, // Reverb_PaddedCell + XAUDIO2FX_I3DL2_PRESET_ROOM, // Reverb_Room + XAUDIO2FX_I3DL2_PRESET_BATHROOM, // Reverb_Bathroom + XAUDIO2FX_I3DL2_PRESET_LIVINGROOM, // Reverb_LivingRoom + XAUDIO2FX_I3DL2_PRESET_STONEROOM, // Reverb_StoneRoom + XAUDIO2FX_I3DL2_PRESET_AUDITORIUM, // Reverb_Auditorium + XAUDIO2FX_I3DL2_PRESET_CONCERTHALL, // Reverb_ConcertHall + XAUDIO2FX_I3DL2_PRESET_CAVE, // Reverb_Cave + XAUDIO2FX_I3DL2_PRESET_ARENA, // Reverb_Arena + XAUDIO2FX_I3DL2_PRESET_HANGAR, // Reverb_Hangar + XAUDIO2FX_I3DL2_PRESET_CARPETEDHALLWAY, // Reverb_CarpetedHallway + XAUDIO2FX_I3DL2_PRESET_HALLWAY, // Reverb_Hallway + XAUDIO2FX_I3DL2_PRESET_STONECORRIDOR, // Reverb_StoneCorridor + XAUDIO2FX_I3DL2_PRESET_ALLEY, // Reverb_Alley + XAUDIO2FX_I3DL2_PRESET_CITY, // Reverb_City + XAUDIO2FX_I3DL2_PRESET_MOUNTAINS, // Reverb_Mountains + XAUDIO2FX_I3DL2_PRESET_QUARRY, // Reverb_Quarry + XAUDIO2FX_I3DL2_PRESET_PLAIN, // Reverb_Plain + XAUDIO2FX_I3DL2_PRESET_PARKINGLOT, // Reverb_ParkingLot + XAUDIO2FX_I3DL2_PRESET_SEWERPIPE, // Reverb_SewerPipe + XAUDIO2FX_I3DL2_PRESET_UNDERWATER, // Reverb_Underwater + XAUDIO2FX_I3DL2_PRESET_SMALLROOM, // Reverb_SmallRoom + XAUDIO2FX_I3DL2_PRESET_MEDIUMROOM, // Reverb_MediumRoom + XAUDIO2FX_I3DL2_PRESET_LARGEROOM, // Reverb_LargeRoom + XAUDIO2FX_I3DL2_PRESET_MEDIUMHALL, // Reverb_MediumHall + XAUDIO2FX_I3DL2_PRESET_LARGEHALL, // Reverb_LargeHall + XAUDIO2FX_I3DL2_PRESET_PLATE, // Reverb_Plate + }; + + inline unsigned int makeVoiceKey(_In_ const WAVEFORMATEX* wfx) noexcept + { + assert(IsValid(wfx)); + + if (wfx->nChannels > 0x7F) + return 0; + + // This hash does not use nSamplesPerSec because voice reuse can change the source sample rate. + + // nAvgBytesPerSec and nBlockAlign are derived from other values in XAudio2 supported formats. + + union KeyGen + { + struct + { + unsigned int tag : 9; + unsigned int channels : 7; + unsigned int bitsPerSample : 8; + } pcm; + + struct + { + unsigned int tag : 9; + unsigned int channels : 7; + unsigned int samplesPerBlock : 16; + } adpcm; + + #ifdef DIRECTX_ENABLE_XMA2 + struct + { + unsigned int tag : 9; + unsigned int channels : 7; + unsigned int encoderVersion : 8; + } xma; + #endif + + unsigned int key; + } result; + + static_assert(sizeof(KeyGen) == sizeof(unsigned int), "KeyGen is invalid"); + + result.key = 0; + + if (wfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) + { + // We reuse EXTENSIBLE only if it is equivalent to the standard form + auto wfex = reinterpret_cast(wfx); + if (wfex->Samples.wValidBitsPerSample != 0 && wfex->Samples.wValidBitsPerSample != wfx->wBitsPerSample) + return 0; + + if (wfex->dwChannelMask != 0 && wfex->dwChannelMask != GetDefaultChannelMask(wfx->nChannels)) + return 0; + } + + uint32_t tag = GetFormatTag(wfx); + switch (tag) + { + case WAVE_FORMAT_PCM: + static_assert(WAVE_FORMAT_PCM < 0x1ff, "KeyGen tag is too small"); + result.pcm.tag = WAVE_FORMAT_PCM; + result.pcm.channels = wfx->nChannels; + result.pcm.bitsPerSample = wfx->wBitsPerSample; + break; + + case WAVE_FORMAT_IEEE_FLOAT: + static_assert(WAVE_FORMAT_IEEE_FLOAT < 0x1ff, "KeyGen tag is too small"); + + if (wfx->wBitsPerSample != 32) + return 0; + + result.pcm.tag = WAVE_FORMAT_IEEE_FLOAT; + result.pcm.channels = wfx->nChannels; + result.pcm.bitsPerSample = 32; + break; + + case WAVE_FORMAT_ADPCM: + static_assert(WAVE_FORMAT_ADPCM < 0x1ff, "KeyGen tag is too small"); + result.adpcm.tag = WAVE_FORMAT_ADPCM; + result.adpcm.channels = wfx->nChannels; + + { + auto wfadpcm = reinterpret_cast(wfx); + result.adpcm.samplesPerBlock = wfadpcm->wSamplesPerBlock; + } + break; + + #ifdef DIRECTX_ENABLE_XMA2 + case WAVE_FORMAT_XMA2: + static_assert(WAVE_FORMAT_XMA2 < 0x1ff, "KeyGen tag is too small"); + result.xma.tag = WAVE_FORMAT_XMA2; + result.xma.channels = wfx->nChannels; + + { + auto xmaFmt = reinterpret_cast(wfx); + + if ((xmaFmt->LoopBegin > 0) + || (xmaFmt->PlayBegin > 0)) + return 0; + + result.xma.encoderVersion = xmaFmt->EncoderVersion; + } + break; + #endif + + default: + return 0; + } + + return result.key; + } +} + +static_assert(_countof(gReverbPresets) == Reverb_MAX, "AUDIO_ENGINE_REVERB enum mismatch"); + + +//====================================================================================== +// AudioEngine +//====================================================================================== + +#define SAFE_DESTROY_VOICE(voice) if ( voice ) { voice->DestroyVoice(); voice = nullptr; } + +// Internal object implementation class. +class AudioEngine::Impl +{ +public: + Impl() noexcept : + mMasterVoice(nullptr), + mReverbVoice(nullptr), + masterChannelMask(0), + masterChannels(0), + masterRate(0), + defaultRate(44100), + maxVoiceOneshots(SIZE_MAX), + maxVoiceInstances(SIZE_MAX), + mMasterVolume(1.f), + mX3DAudio{}, + mCriticalError(false), + mReverbEnabled(false), + mEngineFlags(AudioEngine_Default), + mCategory(AudioCategory_GameEffects), + mVoiceInstances(0) + { + } + + ~Impl() = default; + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + HRESULT Initialize(AUDIO_ENGINE_FLAGS flags, + _In_opt_ const WAVEFORMATEX* wfx, + _In_opt_z_ const wchar_t* deviceId, + AUDIO_STREAM_CATEGORY category); + + HRESULT Reset(_In_opt_ const WAVEFORMATEX* wfx, _In_opt_z_ const wchar_t* deviceId); + + void SetSilentMode(); + + void Shutdown() noexcept; + + bool Update(); + + void SetReverb(_In_opt_ const XAUDIO2FX_REVERB_PARAMETERS* native) noexcept; + + void SetMasteringLimit(int release, int loudness); + + AudioStatistics GetStatistics() const; + + void TrimVoicePool(); + + void AllocateVoice(_In_ const WAVEFORMATEX* wfx, + SOUND_EFFECT_INSTANCE_FLAGS flags, bool oneshot, + _Outptr_result_maybenull_ IXAudio2SourceVoice** voice); + void DestroyVoice(_In_ IXAudio2SourceVoice* voice) noexcept; + + void RegisterNotify(_In_ IVoiceNotify* notify, bool usesUpdate); + void UnregisterNotify(_In_ IVoiceNotify* notify, bool oneshots, bool usesUpdate); + + ComPtr xaudio2; + IXAudio2MasteringVoice* mMasterVoice; + IXAudio2SubmixVoice* mReverbVoice; + + uint32_t masterChannelMask; + uint32_t masterChannels; + uint32_t masterRate; + + int defaultRate; + size_t maxVoiceOneshots; + size_t maxVoiceInstances; + float mMasterVolume; + + X3DAUDIO_HANDLE mX3DAudio; + + bool mCriticalError; + bool mReverbEnabled; + + AUDIO_ENGINE_FLAGS mEngineFlags; + +private: + using notifylist_t = std::set; + using oneshotlist_t = std::list>; + using voicepool_t = std::unordered_multimap; + + AUDIO_STREAM_CATEGORY mCategory; + ComPtr mReverbEffect; + ComPtr mVolumeLimiter; + oneshotlist_t mOneShots; + voicepool_t mVoicePool; + notifylist_t mNotifyObjects; + notifylist_t mNotifyUpdates; + size_t mVoiceInstances; + VoiceCallback mVoiceCallback; + EngineCallback mEngineCallback; +}; + + +_Use_decl_annotations_ +HRESULT AudioEngine::Impl::Initialize( + AUDIO_ENGINE_FLAGS flags, + const WAVEFORMATEX* wfx, + const wchar_t* deviceId, + AUDIO_STREAM_CATEGORY category) +{ + mEngineFlags = flags; + mCategory = category; + + return Reset(wfx, deviceId); +} + + +_Use_decl_annotations_ +HRESULT AudioEngine::Impl::Reset(const WAVEFORMATEX* wfx, const wchar_t* deviceId) +{ + if (wfx) + { + if (wfx->wFormatTag != WAVE_FORMAT_PCM) + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + if (!wfx->nChannels || wfx->nChannels > XAUDIO2_MAX_AUDIO_CHANNELS) + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + if (wfx->nSamplesPerSec < XAUDIO2_MIN_SAMPLE_RATE || wfx->nSamplesPerSec > XAUDIO2_MAX_SAMPLE_RATE) + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + // We don't use other data members of WAVEFORMATEX here to describe the device format, so no need to fully validate + } + + assert(!xaudio2); + assert(mMasterVoice == nullptr); + assert(mReverbVoice == nullptr); + + masterChannelMask = masterChannels = masterRate = 0; + + memset(&mX3DAudio, 0, X3DAUDIO_HANDLE_BYTESIZE); + + mCriticalError = false; + mReverbEnabled = false; + + // + // Create XAudio2 engine + // + HRESULT hr = XAudio2Create(xaudio2.ReleaseAndGetAddressOf(), 0u); + if (FAILED(hr)) + return hr; + + if (mEngineFlags & AudioEngine_Debug) + { + XAUDIO2_DEBUG_CONFIGURATION debug = {}; + debug.TraceMask = XAUDIO2_LOG_ERRORS | XAUDIO2_LOG_WARNINGS; + debug.BreakMask = XAUDIO2_LOG_ERRORS; + xaudio2->SetDebugConfiguration(&debug, nullptr); + #ifdef USING_XAUDIO2_9 + DebugTrace("INFO: XAudio 2.9 debugging enabled\n"); + #else // USING_XAUDIO2_8 + // To see the trace output, you need to view ETW logs for this application: + // Go to Control Panel, Administrative Tools, Event Viewer. + // View->Show Analytic and Debug Logs. + // Applications and Services Logs / Microsoft / Windows / XAudio2. + // Right click on Microsoft Windows XAudio2 debug logging, Properties, then Enable Logging, and hit OK + DebugTrace("INFO: XAudio 2.8 debugging enabled\n"); + #endif + } + + if (mEngineFlags & AudioEngine_DisableVoiceReuse) + { + DebugTrace("INFO: Voice reuse is disabled\n"); + } + + hr = xaudio2->RegisterForCallbacks(&mEngineCallback); + if (FAILED(hr)) + { + xaudio2.Reset(); + return hr; + } + + // + // Create mastering voice for device + // + hr = xaudio2->CreateMasteringVoice(&mMasterVoice, + (wfx) ? wfx->nChannels : 0u /*XAUDIO2_DEFAULT_CHANNELS */, + (wfx) ? wfx->nSamplesPerSec : 0u /* XAUDIO2_DEFAULT_SAMPLERATE */, + 0u, deviceId, nullptr, mCategory); + if (FAILED(hr)) + { + xaudio2.Reset(); + return hr; + } + + DWORD dwChannelMask; + hr = mMasterVoice->GetChannelMask(&dwChannelMask); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mMasterVoice) + xaudio2.Reset(); + return hr; + } + + XAUDIO2_VOICE_DETAILS details; + mMasterVoice->GetVoiceDetails(&details); + + masterChannelMask = dwChannelMask; + masterChannels = details.InputChannels; + masterRate = details.InputSampleRate; + + DebugTrace("INFO: mastering voice has %u channels, %u sample rate, %08X channel mask\n", + masterChannels, masterRate, masterChannelMask); + + if (mMasterVolume != 1.f) + { + hr = mMasterVoice->SetVolume(mMasterVolume); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mMasterVoice) + xaudio2.Reset(); + return hr; + } + } + + // + // Setup mastering volume limiter (optional) + // + if (mEngineFlags & AudioEngine_UseMasteringLimiter) + { + FXMASTERINGLIMITER_PARAMETERS params = {}; + params.Release = FXMASTERINGLIMITER_DEFAULT_RELEASE; + params.Loudness = FXMASTERINGLIMITER_DEFAULT_LOUDNESS; + + hr = CreateFX(__uuidof(FXMasteringLimiter), mVolumeLimiter.ReleaseAndGetAddressOf(), ¶ms, sizeof(params)); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mMasterVoice) + xaudio2.Reset(); + return hr; + } + + XAUDIO2_EFFECT_DESCRIPTOR desc = {}; + desc.InitialState = TRUE; + desc.OutputChannels = masterChannels; + desc.pEffect = mVolumeLimiter.Get(); + + XAUDIO2_EFFECT_CHAIN chain = { 1, &desc }; + hr = mMasterVoice->SetEffectChain(&chain); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mMasterVoice) + mVolumeLimiter.Reset(); + xaudio2.Reset(); + return hr; + } + + DebugTrace("INFO: Mastering volume limiter enabled\n"); + } + + // + // Setup environmental reverb for 3D audio (optional) + // + if (mEngineFlags & AudioEngine_EnvironmentalReverb) + { + hr = XAudio2CreateReverb(mReverbEffect.ReleaseAndGetAddressOf(), 0u); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mMasterVoice) + mVolumeLimiter.Reset(); + xaudio2.Reset(); + return hr; + } + + XAUDIO2_EFFECT_DESCRIPTOR effects[] = { { mReverbEffect.Get(), TRUE, 1 } }; + XAUDIO2_EFFECT_CHAIN effectChain = { 1, effects }; + + mReverbEnabled = true; + + hr = xaudio2->CreateSubmixVoice(&mReverbVoice, 1, masterRate, + (mEngineFlags & AudioEngine_ReverbUseFilters) ? XAUDIO2_VOICE_USEFILTER : 0u, 0u, + nullptr, &effectChain); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mMasterVoice) + mReverbEffect.Reset(); + mVolumeLimiter.Reset(); + xaudio2.Reset(); + return hr; + } + + XAUDIO2FX_REVERB_PARAMETERS native; + ReverbConvertI3DL2ToNative(&gReverbPresets[Reverb_Default], &native); + hr = mReverbVoice->SetEffectParameters(0, &native, sizeof(XAUDIO2FX_REVERB_PARAMETERS)); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mReverbVoice) + SAFE_DESTROY_VOICE(mMasterVoice) + mReverbEffect.Reset(); + mVolumeLimiter.Reset(); + xaudio2.Reset(); + return hr; + } + + DebugTrace("INFO: I3DL2 reverb effect enabled for 3D positional audio\n"); + } + + // + // Setup 3D audio + // + constexpr float SPEEDOFSOUND = X3DAUDIO_SPEED_OF_SOUND; + + hr = X3DAudioInitialize(masterChannelMask, SPEEDOFSOUND, mX3DAudio); + if (FAILED(hr)) + { + SAFE_DESTROY_VOICE(mReverbVoice) + SAFE_DESTROY_VOICE(mMasterVoice) + mReverbEffect.Reset(); + mVolumeLimiter.Reset(); + xaudio2.Reset(); + return hr; + } + + // + // Inform any notify objects we are ready to go again + // + for (auto it = mNotifyObjects.begin(); it != mNotifyObjects.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnReset(); + } + + return S_OK; +} + + +void AudioEngine::Impl::SetSilentMode() +{ + for (auto it = mNotifyObjects.begin(); it != mNotifyObjects.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnCriticalError(); + } + + for (auto it = mOneShots.begin(); it != mOneShots.end(); ++it) + { + assert(it->second != nullptr); + it->second->DestroyVoice(); + } + mOneShots.clear(); + + for (auto it = mVoicePool.begin(); it != mVoicePool.end(); ++it) + { + assert(it->second != nullptr); + it->second->DestroyVoice(); + } + mVoicePool.clear(); + + mVoiceInstances = 0; + + SAFE_DESTROY_VOICE(mReverbVoice) + SAFE_DESTROY_VOICE(mMasterVoice) + + mReverbEffect.Reset(); + mVolumeLimiter.Reset(); + xaudio2.Reset(); +} + + +void AudioEngine::Impl::Shutdown() noexcept +{ + for (auto it = mNotifyObjects.begin(); it != mNotifyObjects.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnDestroyEngine(); + } + + if (xaudio2) + { + xaudio2->UnregisterForCallbacks(&mEngineCallback); + + xaudio2->StopEngine(); + + for (auto it = mOneShots.begin(); it != mOneShots.end(); ++it) + { + assert(it->second != nullptr); + it->second->DestroyVoice(); + } + mOneShots.clear(); + + for (auto it = mVoicePool.begin(); it != mVoicePool.end(); ++it) + { + assert(it->second != nullptr); + it->second->DestroyVoice(); + } + mVoicePool.clear(); + + mVoiceInstances = 0; + + SAFE_DESTROY_VOICE(mReverbVoice) + SAFE_DESTROY_VOICE(mMasterVoice) + + mReverbEffect.Reset(); + mVolumeLimiter.Reset(); + xaudio2.Reset(); + + masterChannelMask = masterChannels = masterRate = 0; + + mCriticalError = false; + mReverbEnabled = false; + + memset(&mX3DAudio, 0, X3DAUDIO_HANDLE_BYTESIZE); + } +} + + +bool AudioEngine::Impl::Update() +{ + if (!xaudio2) + return false; + + HANDLE events[2] = { mEngineCallback.mCriticalError.get(), mVoiceCallback.mBufferEnd.get() }; + switch (WaitForMultipleObjectsEx(_countof(events), events, FALSE, 0, FALSE)) + { + default: + case WAIT_TIMEOUT: + break; + + case WAIT_OBJECT_0: // OnCritialError + mCriticalError = true; + + SetSilentMode(); + return false; + + case WAIT_OBJECT_0 + 1: // OnBufferEnd + // Scan for completed one-shot voices + for (auto it = mOneShots.begin(); it != mOneShots.end(); ) + { + assert(it->second != nullptr); + + XAUDIO2_VOICE_STATE xstate; + it->second->GetState(&xstate, XAUDIO2_VOICE_NOSAMPLESPLAYED); + + if (!xstate.BuffersQueued) + { + (void)it->second->Stop(0); + if (it->first) + { + // Put voice back into voice pool for reuse since it has a non-zero voiceKey + #ifdef VERBOSE_TRACE + DebugTrace("INFO: One-shot voice being saved for reuse (%08X)\n", it->first); + #endif + voicepool_t::value_type v(it->first, it->second); + mVoicePool.emplace(v); + } + else + { + // Voice is to be destroyed rather than reused + #ifdef VERBOSE_TRACE + DebugTrace("INFO: Destroying one-shot voice\n"); + #endif + it->second->DestroyVoice(); + } + it = mOneShots.erase(it); + } + else + ++it; + } + break; + + case WAIT_FAILED: + throw std::exception("WaitForMultipleObjects"); + } + + // + // Inform any notify objects of updates + // + for (auto it = mNotifyUpdates.begin(); it != mNotifyUpdates.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnUpdate(); + } + + return true; +} + + +_Use_decl_annotations_ +void AudioEngine::Impl::SetReverb(const XAUDIO2FX_REVERB_PARAMETERS* native) noexcept +{ + if (!mReverbVoice) + return; + + if (native) + { + if (!mReverbEnabled) + { + mReverbEnabled = true; + (void)mReverbVoice->EnableEffect(0); + } + + (void)mReverbVoice->SetEffectParameters(0, native, sizeof(XAUDIO2FX_REVERB_PARAMETERS)); + } + else if (mReverbEnabled) + { + mReverbEnabled = false; + (void)mReverbVoice->DisableEffect(0); + } +} + + +void AudioEngine::Impl::SetMasteringLimit(int release, int loudness) +{ + if (!mVolumeLimiter || !mMasterVoice) + return; + + if ((release < FXMASTERINGLIMITER_MIN_RELEASE) || (release > FXMASTERINGLIMITER_MAX_RELEASE)) + throw std::out_of_range("AudioEngine::SetMasteringLimit"); + + if ((loudness < FXMASTERINGLIMITER_MIN_LOUDNESS) || (loudness > FXMASTERINGLIMITER_MAX_LOUDNESS)) + throw std::out_of_range("AudioEngine::SetMasteringLimit"); + + FXMASTERINGLIMITER_PARAMETERS params = {}; + params.Release = static_cast(release); + params.Loudness = static_cast(loudness); + + HRESULT hr = mMasterVoice->SetEffectParameters(0, ¶ms, sizeof(params)); + ThrowIfFailed(hr); +} + + +AudioStatistics AudioEngine::Impl::GetStatistics() const +{ + AudioStatistics stats = {}; + + stats.allocatedVoices = stats.allocatedVoicesOneShot = mOneShots.size() + mVoicePool.size(); + stats.allocatedVoicesIdle = mVoicePool.size(); + + for (auto it = mNotifyObjects.begin(); it != mNotifyObjects.end(); ++it) + { + assert(*it != nullptr); + (*it)->GatherStatistics(stats); + } + + assert(stats.allocatedVoices == (mOneShots.size() + mVoicePool.size() + mVoiceInstances)); + + return stats; +} + + +void AudioEngine::Impl::TrimVoicePool() +{ + for (auto it = mNotifyObjects.begin(); it != mNotifyObjects.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnTrim(); + } + + for (auto it = mVoicePool.begin(); it != mVoicePool.end(); ++it) + { + assert(it->second != nullptr); + it->second->DestroyVoice(); + } + mVoicePool.clear(); +} + + +_Use_decl_annotations_ +void AudioEngine::Impl::AllocateVoice( + const WAVEFORMATEX* wfx, + SOUND_EFFECT_INSTANCE_FLAGS flags, + bool oneshot, + IXAudio2SourceVoice** voice) +{ + if (!wfx) + throw std::exception("Wave format is required\n"); + + // No need to call IsValid on wfx because CreateSourceVoice will do that + + if (!voice) + throw std::exception("Voice pointer must be non-null"); + + *voice = nullptr; + + if (!xaudio2 || mCriticalError) + return; + +#ifndef NDEBUG + const float maxFrequencyRatio = XAudio2SemitonesToFrequencyRatio(12); + assert(maxFrequencyRatio <= XAUDIO2_DEFAULT_FREQ_RATIO); +#endif + + unsigned int voiceKey = 0; + if (oneshot) + { + if (flags & (SoundEffectInstance_Use3D | SoundEffectInstance_ReverbUseFilters | SoundEffectInstance_NoSetPitch)) + { + DebugTrace((flags & SoundEffectInstance_NoSetPitch) + ? "ERROR: One-shot voices must support pitch-shifting for voice reuse\n" + : "ERROR: One-use voices cannot use 3D positional audio\n"); + throw std::exception("Invalid flags for one-shot voice"); + } + + #ifdef VERBOSE_TRACE + if (wfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) + { + DebugTrace("INFO: Requesting one-shot: Format Tag EXTENSIBLE %u, %u channels, %u-bit, %u blkalign, %u Hz\n", + GetFormatTag(wfx), wfx->nChannels, wfx->wBitsPerSample, wfx->nBlockAlign, wfx->nSamplesPerSec); + } + else + { + DebugTrace("INFO: Requesting one-shot: Format Tag %u, %u channels, %u-bit, %u blkalign, %u Hz\n", + wfx->wFormatTag, wfx->nChannels, wfx->wBitsPerSample, wfx->nBlockAlign, wfx->nSamplesPerSec); + } + #endif + + if (!(mEngineFlags & AudioEngine_DisableVoiceReuse)) + { + voiceKey = makeVoiceKey(wfx); + if (voiceKey != 0) + { + auto it = mVoicePool.find(voiceKey); + if (it != mVoicePool.end()) + { + // Found a matching (stopped) voice to reuse + assert(it->second != nullptr); + *voice = it->second; + mVoicePool.erase(it); + + // Reset any volume/pitch-shifting + HRESULT hr = (*voice)->SetVolume(1.f); + ThrowIfFailed(hr); + + hr = (*voice)->SetFrequencyRatio(1.f); + ThrowIfFailed(hr); + + if (wfx->nChannels == 1 || wfx->nChannels == 2) + { + // Reset any panning + float matrix[16] = {}; + ComputePan(0.f, wfx->nChannels, matrix); + + hr = (*voice)->SetOutputMatrix(nullptr, wfx->nChannels, masterChannels, matrix); + ThrowIfFailed(hr); + } + } + else if ((mVoicePool.size() + mOneShots.size() + 1) >= maxVoiceOneshots) + { + DebugTrace("WARNING: Too many one-shot voices in use (%zu + %zu >= %zu); one-shot not played\n", + mVoicePool.size(), mOneShots.size() + 1, maxVoiceOneshots); + return; + } + else + { + // makeVoiceKey already constrained the supported wfx formats to those supported for reuse + + char buff[64] = {}; + auto wfmt = reinterpret_cast(buff); + + uint32_t tag = GetFormatTag(wfx); + switch (tag) + { + case WAVE_FORMAT_PCM: + CreateIntegerPCM(wfmt, defaultRate, wfx->nChannels, wfx->wBitsPerSample); + break; + + case WAVE_FORMAT_IEEE_FLOAT: + CreateFloatPCM(wfmt, defaultRate, wfx->nChannels); + break; + + case WAVE_FORMAT_ADPCM: + { + auto wfadpcm = reinterpret_cast(wfx); + CreateADPCM(wfmt, sizeof(buff), defaultRate, wfx->nChannels, wfadpcm->wSamplesPerBlock); + } + break; + + #ifdef DIRECTX_ENABLE_XMA2 + case WAVE_FORMAT_XMA2: + CreateXMA2(wfmt, sizeof(buff), defaultRate, wfx->nChannels, 65536, 2, 0); + break; + #endif + } + + #ifdef VERBOSE_TRACE + DebugTrace("INFO: Allocate reuse voice: Format Tag %u, %u channels, %u-bit, %u blkalign, %u Hz\n", + wfmt->wFormatTag, wfmt->nChannels, wfmt->wBitsPerSample, wfmt->nBlockAlign, wfmt->nSamplesPerSec); + #endif + + assert(voiceKey == makeVoiceKey(wfmt)); + + HRESULT hr = xaudio2->CreateSourceVoice(voice, wfmt, 0, XAUDIO2_DEFAULT_FREQ_RATIO, &mVoiceCallback, nullptr, nullptr); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateSourceVoice (reuse) failed with error %08X\n", static_cast(hr)); + throw std::exception("CreateSourceVoice"); + } + } + + assert(*voice != nullptr); + HRESULT hr = (*voice)->SetSourceSampleRate(wfx->nSamplesPerSec); + if (FAILED(hr)) + { + DebugTrace("ERROR: SetSourceSampleRate failed with error %08X\n", static_cast(hr)); + throw std::exception("SetSourceSampleRate"); + } + } + } + } + + if (!*voice) + { + if (oneshot) + { + if ((mVoicePool.size() + mOneShots.size() + 1) >= maxVoiceOneshots) + { + DebugTrace("WARNING: Too many one-shot voices in use (%zu + %zu >= %zu); one-shot not played; see TrimVoicePool\n", + mVoicePool.size(), mOneShots.size() + 1, maxVoiceOneshots); + return; + } + } + else if ((mVoiceInstances + 1) >= maxVoiceInstances) + { + DebugTrace("ERROR: Too many instance voices (%zu >= %zu); see TrimVoicePool\n", + mVoiceInstances + 1, maxVoiceInstances); + throw std::exception("Too many instance voices"); + } + + UINT32 vflags = (flags & SoundEffectInstance_NoSetPitch) ? XAUDIO2_VOICE_NOPITCH : 0u; + + HRESULT hr; + if (flags & SoundEffectInstance_Use3D) + { + XAUDIO2_SEND_DESCRIPTOR sendDescriptors[2] = {}; + sendDescriptors[0].Flags = sendDescriptors[1].Flags = (flags & SoundEffectInstance_ReverbUseFilters) + ? XAUDIO2_SEND_USEFILTER : 0u; + sendDescriptors[0].pOutputVoice = mMasterVoice; + sendDescriptors[1].pOutputVoice = mReverbVoice; + const XAUDIO2_VOICE_SENDS sendList = { mReverbVoice ? 2U : 1U, sendDescriptors }; + + #ifdef VERBOSE_TRACE + DebugTrace("INFO: Allocate voice 3D: Format Tag %u, %u channels, %u-bit, %u blkalign, %u Hz\n", + wfx->wFormatTag, wfx->nChannels, wfx->wBitsPerSample, wfx->nBlockAlign, wfx->nSamplesPerSec); + #endif + + hr = xaudio2->CreateSourceVoice(voice, wfx, vflags, XAUDIO2_DEFAULT_FREQ_RATIO, &mVoiceCallback, &sendList, nullptr); + } + else + { + #ifdef VERBOSE_TRACE + DebugTrace("INFO: Allocate voice: Format Tag %u, %u channels, %u-bit, %u blkalign, %u Hz\n", + wfx->wFormatTag, wfx->nChannels, wfx->wBitsPerSample, wfx->nBlockAlign, wfx->nSamplesPerSec); + #endif + + hr = xaudio2->CreateSourceVoice(voice, wfx, vflags, XAUDIO2_DEFAULT_FREQ_RATIO, &mVoiceCallback, nullptr, nullptr); + } + + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateSourceVoice failed with error %08X\n", static_cast(hr)); + throw std::exception("CreateSourceVoice"); + } + else if (!oneshot) + { + ++mVoiceInstances; + } + } + + if (oneshot) + { + assert(*voice != nullptr); + mOneShots.emplace_back(std::make_pair(voiceKey, *voice)); + } +} + + +void AudioEngine::Impl::DestroyVoice(_In_ IXAudio2SourceVoice* voice) noexcept +{ + if (!voice) + return; + +#ifndef NDEBUG + for (auto it = mOneShots.cbegin(); it != mOneShots.cend(); ++it) + { + if (it->second == voice) + { + DebugTrace("ERROR: DestroyVoice should not be called for a one-shot voice\n"); + return; + } + } + + for (auto it = mVoicePool.cbegin(); it != mVoicePool.cend(); ++it) + { + if (it->second == voice) + { + DebugTrace("ERROR: DestroyVoice should not be called for a one-shot voice; see TrimVoicePool\n"); + return; + } + } +#endif + + assert(mVoiceInstances > 0); + --mVoiceInstances; + voice->DestroyVoice(); +} + + +void AudioEngine::Impl::RegisterNotify(_In_ IVoiceNotify* notify, bool usesUpdate) +{ + assert(notify != nullptr); + mNotifyObjects.insert(notify); + + if (usesUpdate) + { + mNotifyUpdates.insert(notify); + } +} + + +void AudioEngine::Impl::UnregisterNotify(_In_ IVoiceNotify* notify, bool usesOneShots, bool usesUpdate) +{ + assert(notify != nullptr); + mNotifyObjects.erase(notify); + + // Check for any pending one-shots for this notification object + if (usesOneShots) + { + bool setevent = false; + + for (auto it = mOneShots.begin(); it != mOneShots.end(); ++it) + { + assert(it->second != nullptr); + + XAUDIO2_VOICE_STATE state; + it->second->GetState(&state, XAUDIO2_VOICE_NOSAMPLESPLAYED); + + if (state.pCurrentBufferContext == notify) + { + (void)it->second->Stop(0); + (void)it->second->FlushSourceBuffers(); + setevent = true; + } + } + + if (setevent) + { + // Trigger scan on next call to Update... + SetEvent(mVoiceCallback.mBufferEnd.get()); + } + } + + if (usesUpdate) + { + mNotifyUpdates.erase(notify); + } +} + + +//-------------------------------------------------------------------------------------- +// AudioEngine +//-------------------------------------------------------------------------------------- + +// Public constructor. +_Use_decl_annotations_ +AudioEngine::AudioEngine( + AUDIO_ENGINE_FLAGS flags, + const WAVEFORMATEX* wfx, + const wchar_t* deviceId, + AUDIO_STREAM_CATEGORY category) noexcept(false) + : pImpl(std::make_unique()) +{ + HRESULT hr = pImpl->Initialize(flags, wfx, deviceId, category); + if (FAILED(hr)) + { + if (hr == HRESULT_FROM_WIN32(ERROR_NOT_FOUND)) + { + if (flags & AudioEngine_ThrowOnNoAudioHW) + { + DebugTrace("ERROR: AudioEngine found no default audio device\n"); + throw std::exception("AudioEngineNoAudioHW"); + } + else + { + DebugTrace("WARNING: AudioEngine found no default audio device; running in 'silent mode'\n"); + } + } + else + { + DebugTrace("ERROR: AudioEngine failed (%08X) to initialize using device [%ls]\n", + static_cast(hr), + (deviceId) ? deviceId : L"default"); + throw std::exception("AudioEngine"); + } + } +} + + +// Move constructor. +AudioEngine::AudioEngine(AudioEngine&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +AudioEngine& AudioEngine::operator= (AudioEngine&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +AudioEngine::~AudioEngine() +{ + if (pImpl) + { + pImpl->Shutdown(); + } +} + + +// Public methods. +bool AudioEngine::Update() +{ + return pImpl->Update(); +} + + +_Use_decl_annotations_ +bool AudioEngine::Reset(const WAVEFORMATEX* wfx, const wchar_t* deviceId) +{ + if (pImpl->xaudio2) + { + DebugTrace("WARNING: Called Reset for active audio graph; going silent in preparation for migration\n"); + pImpl->SetSilentMode(); + } + + HRESULT hr = pImpl->Reset(wfx, deviceId); + if (FAILED(hr)) + { + if (hr == HRESULT_FROM_WIN32(ERROR_NOT_FOUND)) + { + if (pImpl->mEngineFlags & AudioEngine_ThrowOnNoAudioHW) + { + DebugTrace("ERROR: AudioEngine found no default audio device on Reset\n"); + throw std::exception("AudioEngineNoAudioHW"); + } + else + { + DebugTrace("WARNING: AudioEngine found no default audio device on Reset; running in 'silent mode'\n"); + return false; + } + } + else + { + DebugTrace("ERROR: AudioEngine failed (%08X) to Reset using device [%ls]\n", + static_cast(hr), (deviceId) ? deviceId : L"default"); + throw std::exception("AudioEngine::Reset"); + } + } + + DebugTrace("INFO: AudioEngine Reset using device [%ls]\n", (deviceId) ? deviceId : L"default"); + + return true; +} + + +void AudioEngine::Suspend() noexcept +{ + if (!pImpl->xaudio2) + return; + + pImpl->xaudio2->StopEngine(); +} + + +void AudioEngine::Resume() +{ + if (!pImpl->xaudio2) + return; + + HRESULT hr = pImpl->xaudio2->StartEngine(); + ThrowIfFailed(hr); +} + + +float AudioEngine::GetMasterVolume() const noexcept +{ + return pImpl->mMasterVolume; +} + + +void AudioEngine::SetMasterVolume(float volume) +{ + assert(volume >= -XAUDIO2_MAX_VOLUME_LEVEL && volume <= XAUDIO2_MAX_VOLUME_LEVEL); + + pImpl->mMasterVolume = volume; + + if (pImpl->mMasterVoice) + { + HRESULT hr = pImpl->mMasterVoice->SetVolume(volume); + ThrowIfFailed(hr); + } +} + + +void AudioEngine::SetReverb(AUDIO_ENGINE_REVERB reverb) +{ + if (reverb >= Reverb_MAX) + throw std::out_of_range("AudioEngine::SetReverb"); + + if (reverb == Reverb_Off) + { + pImpl->SetReverb(nullptr); + } + else + { + XAUDIO2FX_REVERB_PARAMETERS native; + ReverbConvertI3DL2ToNative(&gReverbPresets[reverb], &native); + pImpl->SetReverb(&native); + } +} + + +_Use_decl_annotations_ +void AudioEngine::SetReverb(const XAUDIO2FX_REVERB_PARAMETERS* native) +{ + pImpl->SetReverb(native); +} + + +void AudioEngine::SetMasteringLimit(int release, int loudness) +{ + pImpl->SetMasteringLimit(release, loudness); +} + + +// Public accessors. +AudioStatistics AudioEngine::GetStatistics() const +{ + return pImpl->GetStatistics(); +} + + +WAVEFORMATEXTENSIBLE AudioEngine::GetOutputFormat() const noexcept +{ + WAVEFORMATEXTENSIBLE wfx = {}; + + if (!pImpl->xaudio2) + return wfx; + + wfx.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; + wfx.Format.wBitsPerSample = wfx.Samples.wValidBitsPerSample = 16; // This is a guess + wfx.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); + + wfx.Format.nChannels = static_cast(pImpl->masterChannels); + wfx.Format.nSamplesPerSec = pImpl->masterRate; + wfx.dwChannelMask = pImpl->masterChannelMask; + + wfx.Format.nBlockAlign = static_cast(wfx.Format.nChannels * wfx.Format.wBitsPerSample / 8); + wfx.Format.nAvgBytesPerSec = wfx.Format.nSamplesPerSec * wfx.Format.nBlockAlign; + + static const GUID s_pcm = { WAVE_FORMAT_PCM, 0x0000, 0x0010, { 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } }; + memcpy(&wfx.SubFormat, &s_pcm, sizeof(GUID)); + + return wfx; +} + + +uint32_t AudioEngine::GetChannelMask() const noexcept +{ + return pImpl->masterChannelMask; +} + + +unsigned int AudioEngine::GetOutputChannels() const noexcept +{ + return pImpl->masterChannels; +} + + +bool AudioEngine::IsAudioDevicePresent() const noexcept +{ + return pImpl->xaudio2 && !pImpl->mCriticalError; +} + + +bool AudioEngine::IsCriticalError() const noexcept +{ + return pImpl->mCriticalError; +} + + +// Voice management. +void AudioEngine::SetDefaultSampleRate(int sampleRate) +{ + if ((sampleRate < XAUDIO2_MIN_SAMPLE_RATE) || (sampleRate > XAUDIO2_MAX_SAMPLE_RATE)) + throw std::exception("Default sample rate is out of range"); + + pImpl->defaultRate = sampleRate; +} + + +void AudioEngine::SetMaxVoicePool(size_t maxOneShots, size_t maxInstances) +{ + if (maxOneShots > 0) + pImpl->maxVoiceOneshots = maxOneShots; + + if (maxInstances > 0) + pImpl->maxVoiceInstances = maxInstances; +} + + +void AudioEngine::TrimVoicePool() +{ + pImpl->TrimVoicePool(); +} + + +_Use_decl_annotations_ +void AudioEngine::AllocateVoice( + const WAVEFORMATEX* wfx, + SOUND_EFFECT_INSTANCE_FLAGS flags, + bool oneshot, + IXAudio2SourceVoice** voice) +{ + pImpl->AllocateVoice(wfx, flags, oneshot, voice); +} + + +void AudioEngine::DestroyVoice(_In_ IXAudio2SourceVoice* voice) noexcept +{ + pImpl->DestroyVoice(voice); +} + + +void AudioEngine::RegisterNotify(_In_ IVoiceNotify* notify, bool usesUpdate) +{ + pImpl->RegisterNotify(notify, usesUpdate); +} + + +void AudioEngine::UnregisterNotify(_In_ IVoiceNotify* notify, bool oneshots, bool usesUpdate) +{ + pImpl->UnregisterNotify(notify, oneshots, usesUpdate); +} + + +IXAudio2* AudioEngine::GetInterface() const noexcept +{ + return pImpl->xaudio2.Get(); +} + + +IXAudio2MasteringVoice* AudioEngine::GetMasterVoice() const noexcept +{ + return pImpl->mMasterVoice; +} + + +IXAudio2SubmixVoice* AudioEngine::GetReverbVoice() const noexcept +{ + return pImpl->mReverbVoice; +} + + +X3DAUDIO_HANDLE& AudioEngine::Get3DHandle() const noexcept +{ + return pImpl->mX3DAudio; +} + + +// Static methods. +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) +#include +#elif defined(_XBOX_ONE) +#include +#include +#elif defined(USING_XAUDIO2_REDIST) || defined(_GAMING_DESKTOP) +#include +#include +#elif (_WIN32_WINNT >= _WIN32_WINNT_WIN8) +#pragma comment(lib,"runtimeobject.lib") +#pragma warning(push) +#pragma warning(disable: 4471 5204) +#include +#pragma warning(pop) +#include +#endif + +std::vector AudioEngine::GetRendererDetails() +{ + std::vector list; + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + + ComPtr devEnum; + HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(devEnum.GetAddressOf())); + ThrowIfFailed(hr); + + ComPtr devices; + hr = devEnum->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &devices); + ThrowIfFailed(hr); + + ComPtr endpoint; + ThrowIfFailed(devices->Item(0, endpoint.GetAddressOf())); + + LPWSTR id = nullptr; + ThrowIfFailed(endpoint->GetId(&id)); + + RendererDetail device; + device.deviceId = id; + device.description = L"Default"; + + CoTaskMemFree(id); + + list.emplace_back(device); + +#elif defined(_XBOX_ONE) + + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::Media::Devices; + + ComPtr mdStatics; + HRESULT hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_Media_Devices_MediaDevice).Get(), &mdStatics); + ThrowIfFailed(hr); + + HString id; + hr = mdStatics->GetDefaultAudioRenderId(AudioDeviceRole_Default, id.GetAddressOf()); + ThrowIfFailed(hr); + + RendererDetail device; + device.deviceId = id.GetRawBuffer(nullptr); + device.description = L"Default"; + list.emplace_back(device); + +#elif defined(USING_XAUDIO2_REDIST) || defined(_GAMING_DESKTOP) + + ComPtr devEnum; + HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(devEnum.GetAddressOf())); + ThrowIfFailed(hr); + + ComPtr devices; + hr = devEnum->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &devices); + ThrowIfFailed(hr); + + UINT count = 0; + ThrowIfFailed(devices->GetCount(&count)); + + if (!count) + return list; + + for (UINT j = 0; j < count; ++j) + { + ComPtr endpoint; + hr = devices->Item(j, endpoint.GetAddressOf()); + ThrowIfFailed(hr); + + LPWSTR id = nullptr; + ThrowIfFailed(endpoint->GetId(&id)); + + RendererDetail device; + device.deviceId = id; + CoTaskMemFree(id); + + ComPtr props; + if (SUCCEEDED(endpoint->OpenPropertyStore(STGM_READ, props.GetAddressOf()))) + { + PROPVARIANT var; + PropVariantInit(&var); + + if (SUCCEEDED(props->GetValue(PKEY_Device_FriendlyName, &var))) + { + if (var.vt == VT_LPWSTR) + { + device.description = var.pwszVal; + } + PropVariantClear(&var); + } + } + + list.emplace_back(device); + } + +#elif (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + +#if defined(__cplusplus_winrt) + + // Enumerating with WinRT using C++/CX (Windows Store apps) + using Windows::Devices::Enumeration::DeviceClass; + using Windows::Devices::Enumeration::DeviceInformation; + using Windows::Devices::Enumeration::DeviceInformationCollection; + + auto operation = DeviceInformation::FindAllAsync(DeviceClass::AudioRender); + while (operation->Status == Windows::Foundation::AsyncStatus::Started) { Sleep(100); } + if (operation->Status != Windows::Foundation::AsyncStatus::Completed) + { + throw std::exception("FindAllAsync"); + } + + DeviceInformationCollection^ devices = operation->GetResults(); + + for (unsigned i = 0; i < devices->Size; ++i) + { + using Windows::Devices::Enumeration::DeviceInformation; + + DeviceInformation^ d = devices->GetAt(i); + + RendererDetail device; + device.deviceId = d->Id->Data(); + device.description = d->Name->Data(); + list.emplace_back(device); + } +#else + + // Enumerating with WinRT using WRL (Win32 desktop app for Windows 8.x) + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::Foundation::Collections; + using namespace ABI::Windows::Devices::Enumeration; + +#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) + RoInitializeWrapper initialize(RO_INIT_MULTITHREADED); + ThrowIfFailed(initialize); +#endif + + ComPtr diFactory; + HRESULT hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_Devices_Enumeration_DeviceInformation).Get(), &diFactory); + ThrowIfFailed(hr); + + ComPtr> operation; + hr = diFactory->FindAllAsyncDeviceClass(DeviceClass_AudioRender, operation.GetAddressOf()); + ThrowIfFailed(hr); + + ComPtr asyncinfo; + hr = operation.As(&asyncinfo); + ThrowIfFailed(hr); + + AsyncStatus status; + hr = asyncinfo->get_Status(&status); + ThrowIfFailed(hr); + + while (status == ABI::Windows::Foundation::AsyncStatus::Started) + { + Sleep(100); + hr = asyncinfo->get_Status(&status); + ThrowIfFailed(hr); + } + + if (status != ABI::Windows::Foundation::AsyncStatus::Completed) + { + throw std::exception("FindAllAsyncDeviceClass"); + } + + ComPtr> devices; + hr = operation->GetResults(devices.GetAddressOf()); + ThrowIfFailed(hr); + + unsigned int count = 0; + hr = devices->get_Size(&count); + ThrowIfFailed(hr); + + if (!count) + return list; + + for (unsigned int j = 0; j < count; ++j) + { + ComPtr deviceInfo; + hr = devices->GetAt(j, deviceInfo.GetAddressOf()); + if (SUCCEEDED(hr)) + { + RendererDetail device; + + HString id; + if (SUCCEEDED(deviceInfo->get_Id(id.GetAddressOf()))) + { + device.deviceId = id.GetRawBuffer(nullptr); + } + + HString name; + if (SUCCEEDED(deviceInfo->get_Name(name.GetAddressOf()))) + { + device.description = name.GetRawBuffer(nullptr); + } + + list.emplace_back(device); + } + } +#endif +#else +#error DirectX Tool Kit for Audio not supported on this platform +#endif + + return list; +} diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj new file mode 100644 index 0000000..0f3abc8 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj @@ -0,0 +1,207 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + + + + {4F150A30-CECB-49D1-8283-6A3F57438CF5} + Win32Proj + DirectXTKAudioWin7 + 10.0.17763.0 + + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + + + + + + + + + + + + + + + + + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj.filters b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj.filters new file mode 100644 index 0000000..788464d --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win7.vcxproj.filters @@ -0,0 +1,59 @@ + + + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + Inc + + + Inc + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj new file mode 100644 index 0000000..23f685f --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj @@ -0,0 +1,197 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + {4F150A30-CECB-49D1-8283-6A3F57438CF5} + Win32Proj + DirectXTKAudio + 10.0.17763.0 + + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + + + + + + + + + + + + + + + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0602;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0602;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0602;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0602;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj.filters b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj.filters new file mode 100644 index 0000000..e42a519 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2017_Win8.vcxproj.filters @@ -0,0 +1,56 @@ + + + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + Inc + + + Inc + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj new file mode 100644 index 0000000..bb000d9 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj @@ -0,0 +1,211 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + + + + {4F150A30-CECB-49D1-8283-6A3F57438CF5} + Win32Proj + DirectXTKAudioWin7 + 10.0 + + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + + + + + + + + + + + + + + + + + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win7\$(Platform)\$(Configuration)\ + DirectXTKAudioWin7 + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj.filters b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj.filters new file mode 100644 index 0000000..788464d --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win7.vcxproj.filters @@ -0,0 +1,59 @@ + + + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + Inc + + + Inc + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj new file mode 100644 index 0000000..a3ff04c --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj @@ -0,0 +1,201 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + {4F150A30-CECB-49D1-8283-6A3F57438CF5} + Win32Proj + DirectXTKAudio + 10.0 + + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + + + + + + + + + + + + + + + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTKAudioWin8 + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0602;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0602;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0602;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + StreamingSIMDExtensions2 + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0602;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + ..\Inc;..\Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + pch.h + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj.filters b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj.filters new file mode 100644 index 0000000..e42a519 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DirectXTKAudio_Desktop_2019_Win8.vcxproj.filters @@ -0,0 +1,56 @@ + + + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + Inc + + + Inc + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Audio/DynamicSoundEffectInstance.cpp b/Sdk/External/DirectXTK/Audio/DynamicSoundEffectInstance.cpp new file mode 100644 index 0000000..4dc9ee7 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/DynamicSoundEffectInstance.cpp @@ -0,0 +1,387 @@ +//-------------------------------------------------------------------------------------- +// File: DynamicSoundEffectInstance.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "SoundCommon.h" + +using namespace DirectX; + + +//====================================================================================== +// DynamicSoundEffectInstance +//====================================================================================== + +// Internal object implementation class. +class DynamicSoundEffectInstance::Impl : public IVoiceNotify +{ +public: + Impl(_In_ AudioEngine* engine, + _In_ DynamicSoundEffectInstance* object, + std::function& bufferNeeded, + int sampleRate, int channels, int sampleBits, + SOUND_EFFECT_INSTANCE_FLAGS flags) : + mBase(), + mBufferNeeded(nullptr), + mObject(object) + { + if ((sampleRate < XAUDIO2_MIN_SAMPLE_RATE) + || (sampleRate > XAUDIO2_MAX_SAMPLE_RATE)) + { + DebugTrace("DynamicSoundEffectInstance sampleRate must be in range %u...%u\n", XAUDIO2_MIN_SAMPLE_RATE, XAUDIO2_MAX_SAMPLE_RATE); + throw std::invalid_argument("DynamicSoundEffectInstance"); + } + + if (!channels || (channels > 8)) + { + DebugTrace("DynamicSoundEffectInstance channels must be in range 1...8\n"); + throw std::invalid_argument("DynamicSoundEffectInstance"); + } + + switch (sampleBits) + { + case 8: + case 16: + break; + + default: + DebugTrace("DynamicSoundEffectInstance sampleBits must be 8-bit or 16-bit\n"); + throw std::invalid_argument("DynamicSoundEffectInstance"); + } + + mBufferEvent.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mBufferEvent) + { + throw std::exception("CreateEvent"); + } + + CreateIntegerPCM(&mWaveFormat, sampleRate, channels, sampleBits); + + assert(engine != nullptr); + engine->RegisterNotify(this, true); + + mBase.Initialize(engine, &mWaveFormat, flags); + + mBufferNeeded = bufferNeeded; + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() override + { + mBase.DestroyVoice(); + + if (mBase.engine) + { + mBase.engine->UnregisterNotify(this, false, true); + mBase.engine = nullptr; + } + } + + void Play(); + + void Resume(); + + void SubmitBuffer(_In_reads_bytes_(audioBytes) const uint8_t* pAudioData, uint32_t offset, size_t audioBytes); + + const WAVEFORMATEX* GetFormat() const noexcept { return &mWaveFormat; } + + // IVoiceNotify + void __cdecl OnBufferEnd() override + { + SetEvent(mBufferEvent.get()); + } + + void __cdecl OnCriticalError() override + { + mBase.OnCriticalError(); + } + + void __cdecl OnReset() override + { + mBase.OnReset(); + } + + void __cdecl OnUpdate() override; + + void __cdecl OnDestroyEngine() noexcept override + { + mBase.OnDestroy(); + } + + void __cdecl OnTrim() override + { + mBase.OnTrim(); + } + + void __cdecl GatherStatistics(AudioStatistics& stats) const noexcept override + { + mBase.GatherStatistics(stats); + } + + void __cdecl OnDestroyParent() noexcept override + { + } + + SoundEffectInstanceBase mBase; + +private: + ScopedHandle mBufferEvent; + std::function mBufferNeeded; + DynamicSoundEffectInstance* mObject; + WAVEFORMATEX mWaveFormat; +}; + + +void DynamicSoundEffectInstance::Impl::Play() +{ + if (!mBase.voice) + { + mBase.AllocateVoice(&mWaveFormat); + } + + (void)mBase.Play(); + + if (mBase.voice && (mBase.state == PLAYING) && (mBase.GetPendingBufferCount() <= 2)) + { + SetEvent(mBufferEvent.get()); + } +} + + +void DynamicSoundEffectInstance::Impl::Resume() +{ + if (mBase.voice && (mBase.state == PAUSED)) + { + mBase.Resume(); + + if ((mBase.state == PLAYING) && (mBase.GetPendingBufferCount() <= 2)) + { + SetEvent(mBufferEvent.get()); + } + } +} + + +_Use_decl_annotations_ +void DynamicSoundEffectInstance::Impl::SubmitBuffer(const uint8_t* pAudioData, uint32_t offset, size_t audioBytes) +{ + if (!pAudioData || !audioBytes) + throw std::exception("Invalid audio data buffer"); + + if (audioBytes > UINT32_MAX) + throw std::out_of_range("SubmitBuffer"); + + XAUDIO2_BUFFER buffer = {}; + buffer.AudioBytes = static_cast(audioBytes); + buffer.pAudioData = pAudioData; + + if (offset) + { + assert(mWaveFormat.wFormatTag == WAVE_FORMAT_PCM); + buffer.PlayBegin = offset / mWaveFormat.nBlockAlign; + buffer.PlayLength = static_cast((audioBytes - offset) / mWaveFormat.nBlockAlign); + } + + buffer.pContext = this; + + HRESULT hr = mBase.voice->SubmitSourceBuffer(&buffer, nullptr); + if (FAILED(hr)) + { + #ifdef _DEBUG + DebugTrace("ERROR: DynamicSoundEffectInstance failed (%08X) when submitting buffer:\n", static_cast(hr)); + + DebugTrace("\tFormat Tag %u, %u channels, %u-bit, %u Hz, %zu bytes [%u offset)\n", + mWaveFormat.wFormatTag, mWaveFormat.nChannels, mWaveFormat.wBitsPerSample, mWaveFormat.nSamplesPerSec, audioBytes, offset); + #endif + throw std::exception("SubmitSourceBuffer"); + } +} + + +void DynamicSoundEffectInstance::Impl::OnUpdate() +{ + DWORD result = WaitForSingleObjectEx(mBufferEvent.get(), 0, FALSE); + switch (result) + { + case WAIT_TIMEOUT: + break; + + case WAIT_OBJECT_0: + if (mBufferNeeded) + { + // This callback happens on the same thread that called AudioEngine::Update() + mBufferNeeded(mObject); + } + break; + + case WAIT_FAILED: + throw std::exception("WaitForSingleObjectEx"); + } +} + + + +//-------------------------------------------------------------------------------------- +// DynamicSoundEffectInstance +//-------------------------------------------------------------------------------------- + +#pragma warning( disable : 4355 ) + +// Public constructors +_Use_decl_annotations_ +DynamicSoundEffectInstance::DynamicSoundEffectInstance( + AudioEngine* engine, + std::function bufferNeeded, + int sampleRate, + int channels, + int sampleBits, + SOUND_EFFECT_INSTANCE_FLAGS flags) : + pImpl(std::make_unique(engine, this, bufferNeeded, sampleRate, channels, sampleBits, flags)) +{ +} + + +// Move constructor. +DynamicSoundEffectInstance::DynamicSoundEffectInstance(DynamicSoundEffectInstance&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +DynamicSoundEffectInstance& DynamicSoundEffectInstance::operator= (DynamicSoundEffectInstance&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +DynamicSoundEffectInstance::~DynamicSoundEffectInstance() +{ +} + + +// Public methods. +void DynamicSoundEffectInstance::Play() +{ + pImpl->Play(); +} + + +void DynamicSoundEffectInstance::Stop(bool immediate) noexcept +{ + bool looped = false; + pImpl->mBase.Stop(immediate, looped); +} + + +void DynamicSoundEffectInstance::Pause() noexcept +{ + pImpl->mBase.Pause(); +} + + +void DynamicSoundEffectInstance::Resume() +{ + pImpl->Resume(); +} + + +void DynamicSoundEffectInstance::SetVolume(float volume) +{ + pImpl->mBase.SetVolume(volume); +} + + +void DynamicSoundEffectInstance::SetPitch(float pitch) +{ + pImpl->mBase.SetPitch(pitch); +} + + +void DynamicSoundEffectInstance::SetPan(float pan) +{ + pImpl->mBase.SetPan(pan); +} + + +void DynamicSoundEffectInstance::Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords) +{ + pImpl->mBase.Apply3D(listener, emitter, rhcoords); +} + + +_Use_decl_annotations_ +void DynamicSoundEffectInstance::SubmitBuffer(const uint8_t* pAudioData, size_t audioBytes) +{ + pImpl->SubmitBuffer(pAudioData, 0, audioBytes); +} + + +_Use_decl_annotations_ +void DynamicSoundEffectInstance::SubmitBuffer(const uint8_t* pAudioData, uint32_t offset, size_t audioBytes) +{ + pImpl->SubmitBuffer(pAudioData, offset, audioBytes); +} + + +// Public accessors. +SoundState DynamicSoundEffectInstance::GetState() noexcept +{ + return pImpl->mBase.GetState(false); +} + + +size_t DynamicSoundEffectInstance::GetSampleDuration(size_t bytes) const noexcept +{ + auto wfx = pImpl->GetFormat(); + if (!wfx || !wfx->wBitsPerSample || !wfx->nChannels) + return 0; + + return static_cast((uint64_t(bytes) * 8) + / (uint64_t(wfx->wBitsPerSample) * uint64_t(wfx->nChannels))); +} + + +size_t DynamicSoundEffectInstance::GetSampleDurationMS(size_t bytes) const noexcept +{ + auto wfx = pImpl->GetFormat(); + if (!wfx || !wfx->nAvgBytesPerSec) + return 0; + + return static_cast((uint64_t(bytes) * 1000) / wfx->nAvgBytesPerSec); +} + + +size_t DynamicSoundEffectInstance::GetSampleSizeInBytes(uint64_t duration) const noexcept +{ + auto wfx = pImpl->GetFormat(); + if (!wfx || !wfx->nSamplesPerSec) + return 0; + + return static_cast(((duration * wfx->nSamplesPerSec) / 1000) * wfx->nBlockAlign); +} + + +int DynamicSoundEffectInstance::GetPendingBufferCount() const noexcept +{ + return pImpl->mBase.GetPendingBufferCount(); +} + + +const WAVEFORMATEX* DynamicSoundEffectInstance::GetFormat() const noexcept +{ + return pImpl->GetFormat(); +} diff --git a/Sdk/External/DirectXTK/Audio/SoundCommon.cpp b/Sdk/External/DirectXTK/Audio/SoundCommon.cpp new file mode 100644 index 0000000..ef204d5 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/SoundCommon.cpp @@ -0,0 +1,798 @@ +//-------------------------------------------------------------------------------------- +// File: SoundCommon.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "SoundCommon.h" + +using namespace DirectX; + + +namespace +{ + template WORD ChannelsSpecifiedInMask(T x) noexcept + { + WORD bitCount = 0; + while (x) { ++bitCount; x &= (x - 1); } + return bitCount; + } +} + + +//====================================================================================== +// Wave format utilities +//====================================================================================== + +bool DirectX::IsValid(_In_ const WAVEFORMATEX* wfx) noexcept +{ + if (!wfx) + return false; + + if (!wfx->nChannels) + { + DebugTrace("ERROR: Wave format must have at least 1 channel\n"); + return false; + } + + if (wfx->nChannels > XAUDIO2_MAX_AUDIO_CHANNELS) + { + DebugTrace("ERROR: Wave format must have less than %u channels (%u)\n", XAUDIO2_MAX_AUDIO_CHANNELS, wfx->nChannels); + return false; + } + + if (!wfx->nSamplesPerSec) + { + DebugTrace("ERROR: Wave format cannot have a sample rate of 0\n"); + return false; + } + + if ((wfx->nSamplesPerSec < XAUDIO2_MIN_SAMPLE_RATE) + || (wfx->nSamplesPerSec > XAUDIO2_MAX_SAMPLE_RATE)) + { + DebugTrace("ERROR: Wave format channel count must be in range %u..%u (%u)\n", + XAUDIO2_MIN_SAMPLE_RATE, XAUDIO2_MAX_SAMPLE_RATE, wfx->nSamplesPerSec); + return false; + } + + switch (wfx->wFormatTag) + { + case WAVE_FORMAT_PCM: + + switch (wfx->wBitsPerSample) + { + case 8: + case 16: + case 24: + case 32: + break; + + default: + DebugTrace("ERROR: Wave format integer PCM must have 8, 16, 24, or 32 bits per sample (%u)\n", wfx->wBitsPerSample); + return false; + } + + if (wfx->nBlockAlign != (wfx->nChannels * wfx->wBitsPerSample / 8)) + { + DebugTrace("ERROR: Wave format integer PCM - nBlockAlign (%u) != nChannels (%u) * wBitsPerSample (%u) / 8\n", + wfx->nBlockAlign, wfx->nChannels, wfx->wBitsPerSample); + return false; + } + + if (wfx->nAvgBytesPerSec != (wfx->nSamplesPerSec * wfx->nBlockAlign)) + { + DebugTrace("ERROR: Wave format integer PCM - nAvgBytesPerSec (%lu) != nSamplesPerSec (%lu) * nBlockAlign (%u)\n", + wfx->nAvgBytesPerSec, wfx->nSamplesPerSec, wfx->nBlockAlign); + return false; + } + + return true; + + case WAVE_FORMAT_IEEE_FLOAT: + + if (wfx->wBitsPerSample != 32) + { + DebugTrace("ERROR: Wave format float PCM must have 32-bits per sample (%u)\n", wfx->wBitsPerSample); + return false; + } + + if (wfx->nBlockAlign != (wfx->nChannels * wfx->wBitsPerSample / 8)) + { + DebugTrace("ERROR: Wave format float PCM - nBlockAlign (%u) != nChannels (%u) * wBitsPerSample (%u) / 8\n", + wfx->nBlockAlign, wfx->nChannels, wfx->wBitsPerSample); + return false; + } + + if (wfx->nAvgBytesPerSec != (wfx->nSamplesPerSec * wfx->nBlockAlign)) + { + DebugTrace("ERROR: Wave format float PCM - nAvgBytesPerSec (%lu) != nSamplesPerSec (%lu) * nBlockAlign (%u)\n", + wfx->nAvgBytesPerSec, wfx->nSamplesPerSec, wfx->nBlockAlign); + return false; + } + + return true; + + case WAVE_FORMAT_ADPCM: + + if ((wfx->nChannels != 1) && (wfx->nChannels != 2)) + { + DebugTrace("ERROR: Wave format ADPCM must have 1 or 2 channels (%u)\n", wfx->nChannels); + return false; + } + + if (wfx->wBitsPerSample != 4 /*MSADPCM_BITS_PER_SAMPLE*/) + { + DebugTrace("ERROR: Wave format ADPCM must have 4 bits per sample (%u)\n", wfx->wBitsPerSample); + return false; + } + + if (wfx->cbSize != 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/) + { + DebugTrace("ERROR: Wave format ADPCM must have cbSize = 32 (%u)\n", wfx->cbSize); + return false; + } + else + { + auto wfadpcm = reinterpret_cast(wfx); + + if (wfadpcm->wNumCoef != 7 /*MSADPCM_NUM_COEFFICIENTS*/) + { + DebugTrace("ERROR: Wave format ADPCM must have 7 coefficients (%u)\n", wfadpcm->wNumCoef); + return false; + } + + bool valid = true; + for (int j = 0; j < 7 /*MSADPCM_NUM_COEFFICIENTS*/; ++j) + { + // Microsoft ADPCM standard encoding coefficients + static const short g_pAdpcmCoefficients1[] = { 256, 512, 0, 192, 240, 460, 392 }; + static const short g_pAdpcmCoefficients2[] = { 0, -256, 0, 64, 0, -208, -232 }; + + if (wfadpcm->aCoef[j].iCoef1 != g_pAdpcmCoefficients1[j] + || wfadpcm->aCoef[j].iCoef2 != g_pAdpcmCoefficients2[j]) + { + valid = false; + } + } + + if (!valid) + { + DebugTrace("ERROR: Wave formt ADPCM found non-standard coefficients\n"); + return false; + } + + if ((wfadpcm->wSamplesPerBlock < 4 /*MSADPCM_MIN_SAMPLES_PER_BLOCK*/) + || (wfadpcm->wSamplesPerBlock > 64000 /*MSADPCM_MAX_SAMPLES_PER_BLOCK*/)) + { + DebugTrace("ERROR: Wave format ADPCM wSamplesPerBlock must be 4..64000 (%u)\n", wfadpcm->wSamplesPerBlock); + return false; + } + + if (wfadpcm->wfx.nChannels == 1 && (wfadpcm->wSamplesPerBlock % 2)) + { + DebugTrace("ERROR: Wave format ADPCM mono files must have even wSamplesPerBlock\n"); + return false; + } + + int nHeaderBytes = 7 /*MSADPCM_HEADER_LENGTH*/ * wfx->nChannels; + int nBitsPerFrame = 4 /*MSADPCM_BITS_PER_SAMPLE*/ * wfx->nChannels; + int nPcmFramesPerBlock = (wfx->nBlockAlign - nHeaderBytes) * 8 / nBitsPerFrame + 2; + + if (wfadpcm->wSamplesPerBlock != nPcmFramesPerBlock) + { + DebugTrace("ERROR: Wave format ADPCM %u-channel with nBlockAlign = %u must have wSamplesPerBlock = %d (%u)\n", + wfx->nChannels, wfx->nBlockAlign, nPcmFramesPerBlock, wfadpcm->wSamplesPerBlock); + return false; + } + } + return true; + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + + #ifdef DIRECTX_ENABLE_XWMA + + if (wfx->wBitsPerSample != 16) + { + DebugTrace("ERROR: Wave format xWMA only supports 16-bit data\n"); + return false; + } + + if (!wfx->nBlockAlign) + { + DebugTrace("ERROR: Wave format xWMA must have a non-zero nBlockAlign\n"); + return false; + } + + if (!wfx->nAvgBytesPerSec) + { + DebugTrace("ERROR: Wave format xWMA must have a non-zero nAvgBytesPerSec\n"); + return false; + } + + return true; + + #else + DebugTrace("ERROR: Wave format xWMA not supported by this version of DirectXTK for Audio\n"); + return false; + #endif + + case 0x166 /* WAVE_FORMAT_XMA2 */: + + #ifdef DIRECTX_ENABLE_XMA2 + + static_assert(WAVE_FORMAT_XMA2 == 0x166, "Unrecognized XMA2 tag"); + + if (wfx->nBlockAlign != wfx->nChannels * XMA_OUTPUT_SAMPLE_BYTES) + { + DebugTrace("ERROR: Wave format XMA2 - nBlockAlign (%u) != nChannels(%u) * %u\n", wfx->nBlockAlign, wfx->nChannels, XMA_OUTPUT_SAMPLE_BYTES); + return false; + } + + if (wfx->wBitsPerSample != XMA_OUTPUT_SAMPLE_BITS) + { + DebugTrace("ERROR: Wave format XMA2 wBitsPerSample (%u) should be %u\n", wfx->wBitsPerSample, XMA_OUTPUT_SAMPLE_BITS); + return false; + } + + if (wfx->cbSize != (sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX))) + { + DebugTrace("ERROR: Wave format XMA2 - cbSize must be %zu (%u)\n", (sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX)), wfx->cbSize); + return false; + } + else + { + auto xmaFmt = reinterpret_cast(wfx); + + if (xmaFmt->EncoderVersion < 3) + { + DebugTrace("ERROR: Wave format XMA2 encoder version (%u) - 3 or higher is required\n", xmaFmt->EncoderVersion); + return false; + } + + if (!xmaFmt->BlockCount) + { + DebugTrace("ERROR: Wave format XMA2 BlockCount must be non-zero\n"); + return false; + } + + if (!xmaFmt->BytesPerBlock || (xmaFmt->BytesPerBlock > XMA_READBUFFER_MAX_BYTES)) + { + DebugTrace("ERROR: Wave format XMA2 BytesPerBlock (%u) is invalid\n", xmaFmt->BytesPerBlock); + return false; + } + + if (xmaFmt->ChannelMask) + { + auto channelBits = ChannelsSpecifiedInMask(xmaFmt->ChannelMask); + if (channelBits != wfx->nChannels) + { + DebugTrace("ERROR: Wave format XMA2 - nChannels=%u but ChannelMask (%08X) has %u bits set\n", + xmaFmt->ChannelMask, wfx->nChannels, channelBits); + return false; + } + } + + if (xmaFmt->NumStreams != ((wfx->nChannels + 1) / 2)) + { + DebugTrace("ERROR: Wave format XMA2 - NumStreams (%u) != ( nChannels(%u) + 1 ) / 2\n", + xmaFmt->NumStreams, wfx->nChannels); + return false; + } + + if ((xmaFmt->PlayBegin + xmaFmt->PlayLength) > xmaFmt->SamplesEncoded) + { + DebugTrace("ERROR: Wave format XMA2 play region too large (%u + %u > %u)\n", + xmaFmt->PlayBegin, xmaFmt->PlayLength, xmaFmt->SamplesEncoded); + return false; + } + + if ((xmaFmt->LoopBegin + xmaFmt->LoopLength) > xmaFmt->SamplesEncoded) + { + DebugTrace("ERROR: Wave format XMA2 loop region too large (%u + %u > %u)\n", + xmaFmt->LoopBegin, xmaFmt->LoopLength, xmaFmt->SamplesEncoded); + return false; + } + } + return true; + + #else + DebugTrace("ERROR: Wave format XMA2 not supported by this version of DirectXTK for Audio\n"); + return false; + #endif + + case WAVE_FORMAT_EXTENSIBLE: + if (wfx->cbSize < (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX))) + { + DebugTrace("ERROR: Wave format WAVE_FORMAT_EXTENSIBLE - cbSize must be %zu (%u)\n", + (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX)), wfx->cbSize); + return false; + } + else + { + static const GUID s_wfexBase = { 0x00000000, 0x0000, 0x0010, { 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } }; + + auto wfex = reinterpret_cast(wfx); + + if (memcmp(reinterpret_cast(&wfex->SubFormat) + sizeof(DWORD), + reinterpret_cast(&s_wfexBase) + sizeof(DWORD), sizeof(GUID) - sizeof(DWORD)) != 0) + { + DebugTrace("ERROR: Wave format WAVEFORMATEXTENSIBLE encountered with unknown GUID ({%8.8lX-%4.4X-%4.4X-%2.2X%2.2X-%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X})\n", + wfex->SubFormat.Data1, wfex->SubFormat.Data2, wfex->SubFormat.Data3, + wfex->SubFormat.Data4[0], wfex->SubFormat.Data4[1], wfex->SubFormat.Data4[2], wfex->SubFormat.Data4[3], + wfex->SubFormat.Data4[4], wfex->SubFormat.Data4[5], wfex->SubFormat.Data4[6], wfex->SubFormat.Data4[7]); + return false; + } + + switch (wfex->SubFormat.Data1) + { + case WAVE_FORMAT_PCM: + + switch (wfx->wBitsPerSample) + { + case 8: + case 16: + case 24: + case 32: + break; + + default: + DebugTrace("ERROR: Wave format integer PCM must have 8, 16, 24, or 32 bits per sample (%u)\n", + wfx->wBitsPerSample); + return false; + } + + switch (wfex->Samples.wValidBitsPerSample) + { + case 0: + case 8: + case 16: + case 20: + case 24: + case 32: + break; + + default: + DebugTrace("ERROR: Wave format integer PCM must have 8, 16, 20, 24, or 32 valid bits per sample (%u)\n", + wfex->Samples.wValidBitsPerSample); + return false; + } + + if (wfex->Samples.wValidBitsPerSample + && (wfex->Samples.wValidBitsPerSample > wfx->wBitsPerSample)) + { + DebugTrace("ERROR: Wave format ingter PCM wValidBitsPerSample (%u) is greater than wBitsPerSample (%u)\n", + wfex->Samples.wValidBitsPerSample, wfx->wBitsPerSample); + return false; + } + + if (wfx->nBlockAlign != (wfx->nChannels * wfx->wBitsPerSample / 8)) + { + DebugTrace("ERROR: Wave format integer PCM - nBlockAlign (%u) != nChannels (%u) * wBitsPerSample (%u) / 8\n", + wfx->nBlockAlign, wfx->nChannels, wfx->wBitsPerSample); + return false; + } + + if (wfx->nAvgBytesPerSec != (wfx->nSamplesPerSec * wfx->nBlockAlign)) + { + DebugTrace("ERROR: Wave format integer PCM - nAvgBytesPerSec (%lu) != nSamplesPerSec (%lu) * nBlockAlign (%u)\n", + wfx->nAvgBytesPerSec, wfx->nSamplesPerSec, wfx->nBlockAlign); + return false; + } + + break; + + case WAVE_FORMAT_IEEE_FLOAT: + + if (wfx->wBitsPerSample != 32) + { + DebugTrace("ERROR: Wave format float PCM must have 32-bits per sample (%u)\n", wfx->wBitsPerSample); + return false; + } + + switch (wfex->Samples.wValidBitsPerSample) + { + case 0: + case 32: + break; + + default: + DebugTrace("ERROR: Wave format float PCM must have 32 valid bits per sample (%u)\n", + wfex->Samples.wValidBitsPerSample); + return false; + } + + if (wfx->nBlockAlign != (wfx->nChannels * wfx->wBitsPerSample / 8)) + { + DebugTrace("ERROR: Wave format float PCM - nBlockAlign (%u) != nChannels (%u) * wBitsPerSample (%u) / 8\n", + wfx->nBlockAlign, wfx->nChannels, wfx->wBitsPerSample); + return false; + } + + if (wfx->nAvgBytesPerSec != (wfx->nSamplesPerSec * wfx->nBlockAlign)) + { + DebugTrace("ERROR: Wave format float PCM - nAvgBytesPerSec (%lu) != nSamplesPerSec (%lu) * nBlockAlign (%u)\n", + wfx->nAvgBytesPerSec, wfx->nSamplesPerSec, wfx->nBlockAlign); + return false; + } + + break; + + case WAVE_FORMAT_ADPCM: + DebugTrace("ERROR: Wave format ADPCM is not supported as a WAVEFORMATEXTENSIBLE\n"); + return false; + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + + #ifdef DIRECTX_ENABLE_XWMA + + if (wfx->wBitsPerSample != 16) + { + DebugTrace("ERROR: Wave format xWMA only supports 16-bit data\n"); + return false; + } + + if (!wfx->nBlockAlign) + { + DebugTrace("ERROR: Wave format xWMA must have a non-zero nBlockAlign\n"); + return false; + } + + if (!wfx->nAvgBytesPerSec) + { + DebugTrace("ERROR: Wave format xWMA must have a non-zero nAvgBytesPerSec\n"); + return false; + } + + break; + + #else + DebugTrace("ERROR: Wave format xWMA not supported by this version of DirectXTK for Audio\n"); + return false; + #endif + + case 0x166 /* WAVE_FORMAT_XMA2 */: + DebugTrace("ERROR: Wave format XMA2 is not supported as a WAVEFORMATEXTENSIBLE\n"); + return false; + + default: + DebugTrace("ERROR: Unknown WAVEFORMATEXTENSIBLE format tag (%u)\n", wfex->SubFormat.Data1); + return false; + } + + if (wfex->dwChannelMask) + { + auto channelBits = ChannelsSpecifiedInMask(wfex->dwChannelMask); + if (channelBits != wfx->nChannels) + { + DebugTrace("ERROR: WAVEFORMATEXTENSIBLE: nChannels=%u but ChannelMask has %u bits set\n", + wfx->nChannels, channelBits); + return false; + } + } + + return true; + } + + default: + DebugTrace("ERROR: Unknown WAVEFORMATEX format tag (%u)\n", wfx->wFormatTag); + return false; + } +} + + +uint32_t DirectX::GetDefaultChannelMask(int channels) noexcept +{ + switch (channels) + { + case 1: return SPEAKER_MONO; + case 2: return SPEAKER_STEREO; + case 3: return SPEAKER_2POINT1; + case 4: return SPEAKER_QUAD; + case 5: return SPEAKER_4POINT1; + case 6: return SPEAKER_5POINT1; + case 7: return SPEAKER_5POINT1 | SPEAKER_BACK_CENTER; + case 8: return SPEAKER_7POINT1; + default: return 0; + } +} + + +_Use_decl_annotations_ +void DirectX::CreateIntegerPCM(WAVEFORMATEX* wfx, int sampleRate, int channels, int sampleBits) noexcept +{ + int blockAlign = channels * sampleBits / 8; + + wfx->wFormatTag = WAVE_FORMAT_PCM; + wfx->nChannels = static_cast(channels); + wfx->nSamplesPerSec = static_cast(sampleRate); + wfx->nAvgBytesPerSec = static_cast(blockAlign * sampleRate); + wfx->nBlockAlign = static_cast(blockAlign); + wfx->wBitsPerSample = static_cast(sampleBits); + wfx->cbSize = 0; + + assert(IsValid(wfx)); +} + + +_Use_decl_annotations_ +void DirectX::CreateFloatPCM(WAVEFORMATEX* wfx, int sampleRate, int channels) noexcept +{ + int blockAlign = channels * 4; + + wfx->wFormatTag = WAVE_FORMAT_IEEE_FLOAT; + wfx->nChannels = static_cast(channels); + wfx->nSamplesPerSec = static_cast(sampleRate); + wfx->nAvgBytesPerSec = static_cast(blockAlign * sampleRate); + wfx->nBlockAlign = static_cast(blockAlign); + wfx->wBitsPerSample = 32; + wfx->cbSize = 0; + + assert(IsValid(wfx)); +} + + +_Use_decl_annotations_ +void DirectX::CreateADPCM(WAVEFORMATEX* wfx, size_t wfxSize, int sampleRate, int channels, int samplesPerBlock) noexcept(false) +{ + if (wfxSize < (sizeof(WAVEFORMATEX) + 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/)) + { + DebugTrace("CreateADPCM needs at least %zu bytes for the result\n", + (sizeof(WAVEFORMATEX) + 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/)); + throw std::invalid_argument("ADPCMWAVEFORMAT"); + } + + if (!samplesPerBlock) + { + DebugTrace("CreateADPCM needs a non-zero samples per block count\n"); + throw std::invalid_argument("ADPCMWAVEFORMAT"); + } + + int blockAlign = (7 /*MSADPCM_HEADER_LENGTH*/) * channels + + (samplesPerBlock - 2) * (4 /* MSADPCM_BITS_PER_SAMPLE */) * channels / 8; + + wfx->wFormatTag = WAVE_FORMAT_ADPCM; + wfx->nChannels = static_cast(channels); + wfx->nSamplesPerSec = static_cast(sampleRate); + wfx->nAvgBytesPerSec = static_cast(blockAlign * sampleRate / samplesPerBlock); + wfx->nBlockAlign = static_cast(blockAlign); + wfx->wBitsPerSample = 4 /* MSADPCM_BITS_PER_SAMPLE */; + wfx->cbSize = 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/; + + auto adpcm = reinterpret_cast(wfx); + adpcm->wSamplesPerBlock = static_cast(samplesPerBlock); + adpcm->wNumCoef = 7 /* MSADPCM_NUM_COEFFICIENTS */; + + static ADPCMCOEFSET aCoef[7] = { { 256, 0}, {512, -256}, {0,0}, {192,64}, {240,0}, {460, -208}, {392,-232} }; + memcpy(&adpcm->aCoef, aCoef, sizeof(aCoef)); + + assert(IsValid(wfx)); +} + + +#ifdef DIRECTX_ENABLE_XWMA +_Use_decl_annotations_ +void DirectX::CreateXWMA(WAVEFORMATEX* wfx, int sampleRate, int channels, int blockAlign, int avgBytes, bool wma3) noexcept +{ + wfx->wFormatTag = static_cast((wma3) ? WAVE_FORMAT_WMAUDIO3 : WAVE_FORMAT_WMAUDIO2); + wfx->nChannels = static_cast(channels); + wfx->nSamplesPerSec = static_cast(sampleRate); + wfx->nAvgBytesPerSec = static_cast(avgBytes); + wfx->nBlockAlign = static_cast(blockAlign); + wfx->wBitsPerSample = 16; + wfx->cbSize = 0; + + assert(IsValid(wfx)); +} +#endif + + +#ifdef DIRECTX_ENABLE_XMA2 +_Use_decl_annotations_ +void DirectX::CreateXMA2(WAVEFORMATEX* wfx, size_t wfxSize, int sampleRate, int channels, int bytesPerBlock, int blockCount, int samplesEncoded) noexcept(false) +{ + if (wfxSize < sizeof(XMA2WAVEFORMATEX)) + { + DebugTrace("XMA2 needs at least %zu bytes for the result\n", sizeof(XMA2WAVEFORMATEX)); + throw std::invalid_argument("XMA2WAVEFORMATEX"); + } + + if ((bytesPerBlock < 1) || (bytesPerBlock > int(XMA_READBUFFER_MAX_BYTES))) + { + DebugTrace("XMA2 needs a valid bytes per block\n"); + throw std::invalid_argument("XMA2WAVEFORMATEX"); + } + + int blockAlign = (channels * (16 /*XMA_OUTPUT_SAMPLE_BITS*/) / 8); + + wfx->wFormatTag = WAVE_FORMAT_XMA2; + wfx->nChannels = static_cast(channels); + wfx->nSamplesPerSec = static_cast(sampleRate); + wfx->nAvgBytesPerSec = static_cast(blockAlign * sampleRate); + wfx->nBlockAlign = static_cast(blockAlign); + wfx->wBitsPerSample = 16 /* XMA_OUTPUT_SAMPLE_BITS */; + wfx->cbSize = sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX); + + auto xmaFmt = reinterpret_cast(wfx); + + xmaFmt->NumStreams = static_cast((channels + 1) / 2); + + xmaFmt->ChannelMask = GetDefaultChannelMask(channels); + + xmaFmt->SamplesEncoded = static_cast(samplesEncoded); + xmaFmt->BytesPerBlock = static_cast(bytesPerBlock); + xmaFmt->PlayBegin = xmaFmt->PlayLength = + xmaFmt->LoopBegin = xmaFmt->LoopLength = xmaFmt->LoopCount = 0; + xmaFmt->EncoderVersion = 4 /* XMAENCODER_VERSION_XMA2 */; + xmaFmt->BlockCount = static_cast(blockCount); + + assert(IsValid(wfx)); +} +#endif // XMA2 + + +_Use_decl_annotations_ +bool DirectX::ComputePan(float pan, unsigned int channels, float* matrix) noexcept +{ + memset(matrix, 0, sizeof(float) * 16); + + if (channels == 1) + { + // Mono panning + float left = (pan >= 0) ? (1.f - pan) : 1.f; + left = std::min(1.f, left); + left = std::max(-1.f, left); + + float right = (pan <= 0) ? (-pan - 1.f) : 1.f; + right = std::min(1.f, right); + right = std::max(-1.f, right); + + matrix[0] = left; + matrix[1] = right; + } + else if (channels == 2) + { + // Stereo panning + if (-1.f <= pan && pan <= 0.f) + { + matrix[0] = .5f * pan + 1.f; // .5 when pan is -1, 1 when pan is 0 + matrix[1] = .5f * -pan; // .5 when pan is -1, 0 when pan is 0 + matrix[2] = 0.f; // 0 when pan is -1, 0 when pan is 0 + matrix[3] = pan + 1.f; // 0 when pan is -1, 1 when pan is 0 + } + else + { + matrix[0] = -pan + 1.f; // 1 when pan is 0, 0 when pan is 1 + matrix[1] = 0.f; // 0 when pan is 0, 0 when pan is 1 + matrix[2] = .5f * pan; // 0 when pan is 0, .5f when pan is 1 + matrix[3] = .5f * -pan + 1.f; // 1 when pan is 0. .5f when pan is 1 + } + } + else + { + if (pan != 0.f) + { + DebugTrace("WARNING: Only supports panning on mono or stereo source data, ignored\n"); + } + return false; + } + + return true; +} + + +//====================================================================================== +// SoundEffectInstanceBase +//====================================================================================== + +void SoundEffectInstanceBase::SetPan(float pan) +{ + assert(pan >= -1.f && pan <= 1.f); + + mPan = pan; + + if (!voice) + return; + + float matrix[16]; + if (ComputePan(pan, mDSPSettings.SrcChannelCount, matrix)) + { + HRESULT hr = voice->SetOutputMatrix(nullptr, mDSPSettings.SrcChannelCount, mDSPSettings.DstChannelCount, matrix); + ThrowIfFailed(hr); + } +} + + +void SoundEffectInstanceBase::Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords) +{ + if (!voice) + return; + + if (!(mFlags & SoundEffectInstance_Use3D)) + { + DebugTrace("ERROR: Apply3D called for an instance created without SoundEffectInstance_Use3D set\n"); + throw std::exception("Apply3D"); + } + + DWORD dwCalcFlags = X3DAUDIO_CALCULATE_MATRIX | X3DAUDIO_CALCULATE_DOPPLER | X3DAUDIO_CALCULATE_LPF_DIRECT; + + if (mFlags & SoundEffectInstance_UseRedirectLFE) + { + // On devices with an LFE channel, allow the mono source data to be routed to the LFE destination channel. + dwCalcFlags |= X3DAUDIO_CALCULATE_REDIRECT_TO_LFE; + } + + auto reverb = mReverbVoice; + if (reverb) + { + dwCalcFlags |= X3DAUDIO_CALCULATE_LPF_REVERB | X3DAUDIO_CALCULATE_REVERB; + } + + float matrix[XAUDIO2_MAX_AUDIO_CHANNELS * 8] = {}; + assert(mDSPSettings.SrcChannelCount <= XAUDIO2_MAX_AUDIO_CHANNELS); + assert(mDSPSettings.DstChannelCount <= 8); + mDSPSettings.pMatrixCoefficients = matrix; + + assert(engine != nullptr); + if (rhcoords) + { + X3DAUDIO_EMITTER lhEmitter; + memcpy(&lhEmitter, &emitter, sizeof(X3DAUDIO_EMITTER)); + lhEmitter.OrientFront.z = -emitter.OrientFront.z; + lhEmitter.OrientTop.z = -emitter.OrientTop.z; + lhEmitter.Position.z = -emitter.Position.z; + lhEmitter.Velocity.z = -emitter.Velocity.z; + + X3DAUDIO_LISTENER lhListener; + memcpy(&lhListener, &listener, sizeof(X3DAUDIO_LISTENER)); + lhListener.OrientFront.z = -listener.OrientFront.z; + lhListener.OrientTop.z = -listener.OrientTop.z; + lhListener.Position.z = -listener.Position.z; + lhListener.Velocity.z = -listener.Velocity.z; + + X3DAudioCalculate(engine->Get3DHandle(), &lhListener, &lhEmitter, dwCalcFlags, &mDSPSettings); + } + else + { + X3DAudioCalculate(engine->Get3DHandle(), &listener, &emitter, dwCalcFlags, &mDSPSettings); + } + + mDSPSettings.pMatrixCoefficients = nullptr; + + (void)voice->SetFrequencyRatio(mFreqRatio * mDSPSettings.DopplerFactor); + + auto direct = mDirectVoice; + assert(direct != nullptr); + (void)voice->SetOutputMatrix(direct, mDSPSettings.SrcChannelCount, mDSPSettings.DstChannelCount, matrix); + + if (reverb) + { + for (size_t j = 0; (j < mDSPSettings.SrcChannelCount) && (j < XAUDIO2_MAX_AUDIO_CHANNELS); ++j) + { + matrix[j] = mDSPSettings.ReverbLevel; + } + (void)voice->SetOutputMatrix(reverb, mDSPSettings.SrcChannelCount, 1, matrix); + } + + if (mFlags & SoundEffectInstance_ReverbUseFilters) + { + XAUDIO2_FILTER_PARAMETERS filterDirect = { LowPassFilter, 2.0f * sinf(X3DAUDIO_PI / 6.0f * mDSPSettings.LPFDirectCoefficient), 1.0f }; + // see XAudio2CutoffFrequencyToRadians() in XAudio2.h for more information on the formula used here + (void)voice->SetOutputFilterParameters(direct, &filterDirect); + + if (reverb) + { + XAUDIO2_FILTER_PARAMETERS filterReverb = { LowPassFilter, 2.0f * sinf(X3DAUDIO_PI / 6.0f * mDSPSettings.LPFReverbCoefficient), 1.0f }; + // see XAudio2CutoffFrequencyToRadians() in XAudio2.h for more information on the formula used here + (void)voice->SetOutputFilterParameters(reverb, &filterReverb); + } + } +} + + diff --git a/Sdk/External/DirectXTK/Audio/SoundCommon.h b/Sdk/External/DirectXTK/Audio/SoundCommon.h new file mode 100644 index 0000000..a9696b5 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/SoundCommon.h @@ -0,0 +1,383 @@ +//-------------------------------------------------------------------------------------- +// File: SoundCommon.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include "Audio.h" +#include "PlatformHelpers.h" + +#ifdef USING_XAUDIO2_9 +#define DIRECTX_ENABLE_XWMA +#endif + +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) +#define DIRECTX_ENABLE_XMA2 +#endif + +#if defined(DIRECTX_ENABLE_XWMA) || defined(DIRECTX_ENABLE_XMA2) +#define DIRECTX_ENABLE_SEEK_TABLES +#endif + +namespace DirectX +{ + // Helper for getting a format tag from a WAVEFORMATEX + inline uint32_t GetFormatTag(const WAVEFORMATEX* wfx) noexcept + { + if (wfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) + { + if (wfx->cbSize < (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX))) + return 0; + + static const GUID s_wfexBase = { 0x00000000, 0x0000, 0x0010, { 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } }; + + auto wfex = reinterpret_cast(wfx); + + if (memcmp(reinterpret_cast(&wfex->SubFormat) + sizeof(DWORD), + reinterpret_cast(&s_wfexBase) + sizeof(DWORD), sizeof(GUID) - sizeof(DWORD)) != 0) + { + return 0; + } + + return wfex->SubFormat.Data1; + } + else + { + return wfx->wFormatTag; + } + } + + + // Helper for validating wave format structure + bool IsValid(_In_ const WAVEFORMATEX* wfx) noexcept; + + + // Helper for getting a default channel mask from channels + uint32_t GetDefaultChannelMask(int channels) noexcept; + + + // Helpers for creating various wave format structures + void CreateIntegerPCM(_Out_ WAVEFORMATEX* wfx, int sampleRate, int channels, int sampleBits) noexcept; + void CreateFloatPCM(_Out_ WAVEFORMATEX* wfx, int sampleRate, int channels) noexcept; + void CreateADPCM(_Out_writes_bytes_(wfxSize) WAVEFORMATEX* wfx, size_t wfxSize, int sampleRate, int channels, int samplesPerBlock) noexcept(false); +#ifdef DIRECTX_ENABLE_XWMA + void CreateXWMA(_Out_ WAVEFORMATEX* wfx, int sampleRate, int channels, int blockAlign, int avgBytes, bool wma3) noexcept; +#endif +#ifdef DIRECTX_ENABLE_XMA2 + void CreateXMA2(_Out_writes_bytes_(wfxSize) WAVEFORMATEX* wfx, size_t wfxSize, int sampleRate, int channels, int bytesPerBlock, int blockCount, int samplesEncoded) noexcept(false); +#endif + + // Helper for computing pan volume matrix + bool ComputePan(float pan, unsigned int channels, _Out_writes_(16) float* matrix) noexcept; + + // Helper class for implementing SoundEffectInstance + class SoundEffectInstanceBase + { + public: + SoundEffectInstanceBase() noexcept : + voice(nullptr), + state(STOPPED), + engine(nullptr), + mVolume(1.f), + mPitch(0.f), + mFreqRatio(1.f), + mPan(0.f), + mFlags(SoundEffectInstance_Default), + mDirectVoice(nullptr), + mReverbVoice(nullptr), + mDSPSettings{} + { + } + + SoundEffectInstanceBase(SoundEffectInstanceBase&&) = default; + SoundEffectInstanceBase& operator= (SoundEffectInstanceBase&&) = default; + + SoundEffectInstanceBase(SoundEffectInstanceBase const&) = delete; + SoundEffectInstanceBase& operator= (SoundEffectInstanceBase const&) = delete; + + ~SoundEffectInstanceBase() + { + assert(voice == nullptr); + } + + void Initialize(_In_ AudioEngine* eng, _In_ const WAVEFORMATEX* wfx, SOUND_EFFECT_INSTANCE_FLAGS flags) noexcept + { + assert(eng != nullptr); + engine = eng; + mDirectVoice = eng->GetMasterVoice(); + mReverbVoice = eng->GetReverbVoice(); + + if (eng->GetChannelMask() & SPEAKER_LOW_FREQUENCY) + mFlags = flags | SoundEffectInstance_UseRedirectLFE; + else + mFlags = flags & ~SoundEffectInstance_UseRedirectLFE; + + memset(&mDSPSettings, 0, sizeof(X3DAUDIO_DSP_SETTINGS)); + assert(wfx != nullptr); + mDSPSettings.SrcChannelCount = wfx->nChannels; + mDSPSettings.DstChannelCount = eng->GetOutputChannels(); + } + + void AllocateVoice(_In_ const WAVEFORMATEX* wfx) + { + if (voice) + return; + + assert(engine != nullptr); + engine->AllocateVoice(wfx, mFlags, false, &voice); + } + + void DestroyVoice() noexcept + { + if (voice) + { + assert(engine != nullptr); + engine->DestroyVoice(voice); + voice = nullptr; + } + } + + bool Play() // Returns true if STOPPED -> PLAYING + { + if (voice) + { + if (state == PAUSED) + { + HRESULT hr = voice->Start(0); + ThrowIfFailed(hr); + state = PLAYING; + } + else if (state != PLAYING) + { + if (mVolume != 1.f) + { + HRESULT hr = voice->SetVolume(mVolume); + ThrowIfFailed(hr); + } + + if (mPitch != 0.f) + { + mFreqRatio = XAudio2SemitonesToFrequencyRatio(mPitch * 12.f); + + HRESULT hr = voice->SetFrequencyRatio(mFreqRatio); + ThrowIfFailed(hr); + } + + if (mPan != 0.f) + { + SetPan(mPan); + } + + HRESULT hr = voice->Start(0); + ThrowIfFailed(hr); + state = PLAYING; + return true; + } + } + return false; + } + + void Stop(bool immediate, bool& looped) noexcept + { + if (!voice) + { + state = STOPPED; + return; + } + + if (immediate) + { + state = STOPPED; + (void)voice->Stop(0); + (void)voice->FlushSourceBuffers(); + } + else if (looped) + { + looped = false; + (void)voice->ExitLoop(); + } + else + { + (void)voice->Stop(XAUDIO2_PLAY_TAILS); + } + } + + void Pause() noexcept + { + if (voice && state == PLAYING) + { + state = PAUSED; + + (void)voice->Stop(0); + } + } + + void Resume() + { + if (voice && state == PAUSED) + { + HRESULT hr = voice->Start(0); + ThrowIfFailed(hr); + state = PLAYING; + } + } + + void SetVolume(float volume) + { + assert(volume >= -XAUDIO2_MAX_VOLUME_LEVEL && volume <= XAUDIO2_MAX_VOLUME_LEVEL); + + mVolume = volume; + + if (voice) + { + HRESULT hr = voice->SetVolume(volume); + ThrowIfFailed(hr); + } + } + + void SetPitch(float pitch) + { + assert(pitch >= -1.f && pitch <= 1.f); + + if ((mFlags & SoundEffectInstance_NoSetPitch) && pitch != 0.f) + { + DebugTrace("ERROR: Sound effect instance was created with the NoSetPitch flag\n"); + throw std::exception("SetPitch"); + } + + mPitch = pitch; + + if (voice) + { + mFreqRatio = XAudio2SemitonesToFrequencyRatio(mPitch * 12.f); + + HRESULT hr = voice->SetFrequencyRatio(mFreqRatio); + ThrowIfFailed(hr); + } + } + + void SetPan(float pan); + + void Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords); + + SoundState GetState(bool autostop) noexcept + { + if (autostop && voice && (state == PLAYING)) + { + XAUDIO2_VOICE_STATE xstate; + voice->GetState(&xstate, XAUDIO2_VOICE_NOSAMPLESPLAYED); + + if (!xstate.BuffersQueued) + { + // Automatic stop if the buffer has finished playing + (void)voice->Stop(); + state = STOPPED; + } + } + + return state; + } + + int GetPendingBufferCount() const noexcept + { + if (!voice) + return 0; + + XAUDIO2_VOICE_STATE xstate; + voice->GetState(&xstate, XAUDIO2_VOICE_NOSAMPLESPLAYED); + return static_cast(xstate.BuffersQueued); + } + + void OnCriticalError() noexcept + { + if (voice) + { + voice->DestroyVoice(); + voice = nullptr; + } + state = STOPPED; + mDirectVoice = nullptr; + mReverbVoice = nullptr; + } + + void OnReset() noexcept + { + assert(engine != nullptr); + mDirectVoice = engine->GetMasterVoice(); + mReverbVoice = engine->GetReverbVoice(); + + if (engine->GetChannelMask() & SPEAKER_LOW_FREQUENCY) + mFlags = mFlags | SoundEffectInstance_UseRedirectLFE; + else + mFlags = mFlags & ~SoundEffectInstance_UseRedirectLFE; + + mDSPSettings.DstChannelCount = engine->GetOutputChannels(); + } + + void OnDestroy() noexcept + { + if (voice) + { + (void)voice->Stop(0); + (void)voice->FlushSourceBuffers(); + voice->DestroyVoice(); + voice = nullptr; + } + state = STOPPED; + engine = nullptr; + mDirectVoice = nullptr; + mReverbVoice = nullptr; + } + + void OnTrim() + { + if (voice && (state == STOPPED)) + { + engine->DestroyVoice(voice); + voice = nullptr; + } + } + + void GatherStatistics(AudioStatistics& stats) const noexcept + { + ++stats.allocatedInstances; + if (voice) + { + ++stats.allocatedVoices; + + if (mFlags & SoundEffectInstance_Use3D) + ++stats.allocatedVoices3d; + + if (state == PLAYING) + ++stats.playingInstances; + } + } + + IXAudio2SourceVoice* voice; + SoundState state; + AudioEngine* engine; + + private: + float mVolume; + float mPitch; + float mFreqRatio; + float mPan; + SOUND_EFFECT_INSTANCE_FLAGS mFlags; + IXAudio2Voice* mDirectVoice; + IXAudio2Voice* mReverbVoice; + X3DAUDIO_DSP_SETTINGS mDSPSettings; + }; + + struct WaveBankSeekData + { + uint32_t seekCount; + const uint32_t* seekTable; + uint32_t tag; + }; +} diff --git a/Sdk/External/DirectXTK/Audio/SoundEffect.cpp b/Sdk/External/DirectXTK/Audio/SoundEffect.cpp new file mode 100644 index 0000000..246b087 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/SoundEffect.cpp @@ -0,0 +1,622 @@ +//-------------------------------------------------------------------------------------- +// File: SoundEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "WAVFileReader.h" +#include "SoundCommon.h" + +#include + +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) +#include +#include +#endif + +using namespace DirectX; + + +//====================================================================================== +// SoundEffect +//====================================================================================== + +// Internal object implementation class. +class SoundEffect::Impl : public IVoiceNotify +{ +public: + explicit Impl(_In_ AudioEngine* engine) : + mWaveFormat(nullptr), + mStartAudio(nullptr), + mAudioBytes(0), + mLoopStart(0), + mLoopLength(0), + mEngine(engine), + mOneShots(0) + #ifdef DIRECTX_ENABLE_SEEK_TABLES + , mSeekCount(0) + , mSeekTable(nullptr) + #endif + #ifdef DIRECTX_ENABLE_XMA2 + , mXMAMemory(nullptr) + #endif + { + assert(mEngine != nullptr); + mEngine->RegisterNotify(this, false); + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() override + { + if (!mInstances.empty()) + { + DebugTrace("WARNING: Destroying SoundEffect with %zu outstanding SoundEffectInstances\n", mInstances.size()); + + for (auto it = mInstances.begin(); it != mInstances.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnDestroyParent(); + } + + mInstances.clear(); + } + + if (mOneShots > 0) + { + DebugTrace("WARNING: Destroying SoundEffect with %u outstanding one shot effects\n", mOneShots); + } + + if (mEngine) + { + mEngine->UnregisterNotify(this, true, false); + mEngine = nullptr; + } + + #ifdef DIRECTX_ENABLE_XMA2 + if (mXMAMemory) + { + ApuFree(mXMAMemory); + mXMAMemory = nullptr; + } + #endif + } + + HRESULT Initialize(_In_ AudioEngine* engine, _Inout_ std::unique_ptr& wavData, + _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes, + #ifdef DIRECTX_ENABLE_SEEK_TABLES + _In_reads_opt_(seekCount) const uint32_t* seekTable, size_t seekCount, + #endif + uint32_t loopStart, uint32_t loopLength) noexcept; + + void Play(float volume, float pitch, float pan); + + // IVoiceNotify + void __cdecl OnBufferEnd() override + { + InterlockedDecrement(&mOneShots); + } + + void __cdecl OnCriticalError() override + { + mOneShots = 0; + } + + void __cdecl OnReset() override + { + // No action required + } + + void __cdecl OnUpdate() override + { + // We do not register for update notification + assert(false); + } + + void __cdecl OnDestroyEngine() noexcept override + { + mEngine = nullptr; + mOneShots = 0; + } + + void __cdecl OnTrim() override + { + // No action required + } + + void __cdecl GatherStatistics(AudioStatistics& stats) const noexcept override + { + stats.playingOneShots += mOneShots; + stats.audioBytes += mAudioBytes; + + #ifdef DIRECTX_ENABLE_XMA2 + if (mXMAMemory) + stats.xmaAudioBytes += mAudioBytes; + #endif + } + + void __cdecl OnDestroyParent() noexcept override + { + } + + const WAVEFORMATEX* mWaveFormat; + const uint8_t* mStartAudio; + uint32_t mAudioBytes; + uint32_t mLoopStart; + uint32_t mLoopLength; + AudioEngine* mEngine; + std::list mInstances; + uint32_t mOneShots; + +#ifdef DIRECTX_ENABLE_SEEK_TABLES + uint32_t mSeekCount; + const uint32_t* mSeekTable; +#endif + +private: + std::unique_ptr mWavData; + +#ifdef DIRECTX_ENABLE_XMA2 + void* mXMAMemory; +#endif +}; + + +_Use_decl_annotations_ +HRESULT SoundEffect::Impl::Initialize(AudioEngine* engine, std::unique_ptr& wavData, + const WAVEFORMATEX* wfx, const uint8_t* startAudio, size_t audioBytes, + #ifdef DIRECTX_ENABLE_SEEK_TABLES + const uint32_t* seekTable, size_t seekCount, + #endif + uint32_t loopStart, uint32_t loopLength) noexcept +{ + if (!engine || !IsValid(wfx) || !startAudio || !audioBytes || !wavData) + return E_INVALIDARG; + + if (audioBytes > UINT32_MAX) + return E_INVALIDARG; + + switch (GetFormatTag(wfx)) + { + case WAVE_FORMAT_PCM: + case WAVE_FORMAT_IEEE_FLOAT: + case WAVE_FORMAT_ADPCM: + // Take ownership of the buffer + mWavData.reset(wavData.release()); + + // WARNING: We assume the wfx and startAudio parameters are pointers into the wavData memory buffer + mWaveFormat = wfx; + mStartAudio = startAudio; + break; + + #ifdef DIRECTX_ENABLE_XWMA + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + if (!seekCount || !seekTable) + { + DebugTrace("ERROR: SoundEffect format xWMA requires seek table\n"); + return E_FAIL; + } + + if (seekCount > UINT32_MAX) + return E_INVALIDARG; + + // Take ownership of the buffer + mWavData.reset(wavData.release()); + + // WARNING: We assume the wfx, startAudio, and mSeekTable parameters are pointers into the wavData memory buffer + mWaveFormat = wfx; + mStartAudio = startAudio; + mSeekCount = static_cast(seekCount); + mSeekTable = seekTable; + break; + + #endif // xWMA + + #ifdef DIRECTX_ENABLE_XMA2 + + case WAVE_FORMAT_XMA2: + if (!seekCount || !seekTable) + { + DebugTrace("ERROR: SoundEffect format XMA2 requires seek table\n"); + return E_FAIL; + } + + if (seekCount > UINT32_MAX) + return E_INVALIDARG; + + { + HRESULT hr = ApuAlloc(&mXMAMemory, nullptr, + static_cast(audioBytes), SHAPE_XMA_INPUT_BUFFER_ALIGNMENT); + if (FAILED(hr)) + { + DebugTrace("ERROR: ApuAlloc failed. Did you allocate a large enough heap with ApuCreateHeap for all your XMA wave data?\n"); + return hr; + } + } + + memcpy(mXMAMemory, startAudio, audioBytes); + mStartAudio = reinterpret_cast(mXMAMemory); + + mWavData.reset(new (std::nothrow) uint8_t[sizeof(XMA2WAVEFORMATEX) + (seekCount * sizeof(uint32_t))]); + if (!mWavData) + return E_OUTOFMEMORY; + + memcpy(mWavData.get(), wfx, sizeof(XMA2WAVEFORMATEX)); + mWaveFormat = reinterpret_cast(mWavData.get()); + + // XMA seek table is Big-Endian + { + auto dest = reinterpret_cast(mWavData.get() + sizeof(XMA2WAVEFORMATEX)); + for (size_t k = 0; k < seekCount; ++k) + { + dest[k] = _byteswap_ulong(seekTable[k]); + } + } + + mSeekCount = static_cast(seekCount); + mSeekTable = reinterpret_cast(mWavData.get() + sizeof(XMA2WAVEFORMATEX)); + + wavData.reset(); + break; + + #endif // XMA2 + + default: + { + DebugTrace("ERROR: SoundEffect encountered an unsupported format tag (%u)\n", wfx->wFormatTag); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + } + + mAudioBytes = static_cast(audioBytes); + mLoopStart = loopStart; + mLoopLength = loopLength; + + return S_OK; +} + + +void SoundEffect::Impl::Play(float volume, float pitch, float pan) +{ + assert(volume >= -XAUDIO2_MAX_VOLUME_LEVEL && volume <= XAUDIO2_MAX_VOLUME_LEVEL); + assert(pitch >= -1.f && pitch <= 1.f); + assert(pan >= -1.f && pan <= 1.f); + + IXAudio2SourceVoice* voice = nullptr; + mEngine->AllocateVoice(mWaveFormat, SoundEffectInstance_Default, true, &voice); + + if (!voice) + return; + + if (volume != 1.f) + { + HRESULT hr = voice->SetVolume(volume); + ThrowIfFailed(hr); + } + + if (pitch != 0.f) + { + float fr = XAudio2SemitonesToFrequencyRatio(pitch * 12.f); + + HRESULT hr = voice->SetFrequencyRatio(fr); + ThrowIfFailed(hr); + } + + if (pan != 0.f) + { + float matrix[16]; + if (ComputePan(pan, mWaveFormat->nChannels, matrix)) + { + HRESULT hr = voice->SetOutputMatrix(nullptr, mWaveFormat->nChannels, mEngine->GetOutputChannels(), matrix); + ThrowIfFailed(hr); + } + } + + HRESULT hr = voice->Start(0); + ThrowIfFailed(hr); + + XAUDIO2_BUFFER buffer = {}; + buffer.AudioBytes = mAudioBytes; + buffer.pAudioData = mStartAudio; + buffer.Flags = XAUDIO2_END_OF_STREAM; + buffer.pContext = this; + + #ifdef DIRECTX_ENABLE_XWMA + uint32_t tag = GetFormatTag(mWaveFormat); + if (tag == WAVE_FORMAT_WMAUDIO2 || tag == WAVE_FORMAT_WMAUDIO3) + { + XAUDIO2_BUFFER_WMA wmaBuffer = {}; + wmaBuffer.PacketCount = mSeekCount; + wmaBuffer.pDecodedPacketCumulativeBytes = mSeekTable; + + hr = voice->SubmitSourceBuffer(&buffer, &wmaBuffer); + } + else + #endif // xWMA + { + hr = voice->SubmitSourceBuffer(&buffer, nullptr); + } + if (FAILED(hr)) + { + DebugTrace("ERROR: SoundEffect failed (%08X) when submitting buffer:\n", static_cast(hr)); + DebugTrace("\tFormat Tag %u, %u channels, %u-bit, %u Hz, %u bytes\n", + mWaveFormat->wFormatTag, mWaveFormat->nChannels, mWaveFormat->wBitsPerSample, mWaveFormat->nSamplesPerSec, mAudioBytes); + throw std::exception("SubmitSourceBuffer"); + } + + InterlockedIncrement(&mOneShots); +} + + +//-------------------------------------------------------------------------------------- +// SoundEffect +//-------------------------------------------------------------------------------------- + +// Public constructors. +_Use_decl_annotations_ +SoundEffect::SoundEffect(AudioEngine* engine, const wchar_t* waveFileName) + : pImpl(std::make_unique(engine)) +{ + WAVData wavInfo; + std::unique_ptr wavData; + HRESULT hr = LoadWAVAudioFromFileEx(waveFileName, wavData, wavInfo); + if (FAILED(hr)) + { + DebugTrace("ERROR: SoundEffect failed (%08X) to load from .wav file \"%ls\"\n", + static_cast(hr), waveFileName); + throw std::exception("SoundEffect"); + } + +#ifdef DIRECTX_ENABLE_SEEK_TABLES + hr = pImpl->Initialize(engine, wavData, wavInfo.wfx, wavInfo.startAudio, wavInfo.audioBytes, + wavInfo.seek, wavInfo.seekCount, + wavInfo.loopStart, wavInfo.loopLength); +#else + hr = pImpl->Initialize(engine, wavData, wavInfo.wfx, wavInfo.startAudio, wavInfo.audioBytes, + wavInfo.loopStart, wavInfo.loopLength); +#endif + + if (FAILED(hr)) + { + DebugTrace("ERROR: SoundEffect failed (%08X) to intialize from .wav file \"%ls\"\n", + static_cast(hr), waveFileName); + throw std::exception("SoundEffect"); + } +} + + +_Use_decl_annotations_ +SoundEffect::SoundEffect(AudioEngine* engine, std::unique_ptr& wavData, + const WAVEFORMATEX* wfx, const uint8_t* startAudio, size_t audioBytes) + : pImpl(std::make_unique(engine)) +{ +#ifdef DIRECTX_ENABLE_SEEK_TABLES + HRESULT hr = pImpl->Initialize(engine, wavData, wfx, startAudio, audioBytes, nullptr, 0, 0, 0); +#else + HRESULT hr = pImpl->Initialize(engine, wavData, wfx, startAudio, audioBytes, 0, 0); +#endif + if (FAILED(hr)) + { + DebugTrace("ERROR: SoundEffect failed (%08X) to intialize\n", static_cast(hr)); + throw std::exception("SoundEffect"); + } +} + + +_Use_decl_annotations_ +SoundEffect::SoundEffect(AudioEngine* engine, std::unique_ptr& wavData, + const WAVEFORMATEX* wfx, const uint8_t* startAudio, size_t audioBytes, + uint32_t loopStart, uint32_t loopLength) + : pImpl(std::make_unique(engine)) +{ +#ifdef DIRECTX_ENABLE_SEEK_TABLES + HRESULT hr = pImpl->Initialize(engine, wavData, wfx, startAudio, audioBytes, nullptr, 0, loopStart, loopLength); +#else + HRESULT hr = pImpl->Initialize(engine, wavData, wfx, startAudio, audioBytes, loopStart, loopLength); +#endif + if (FAILED(hr)) + { + DebugTrace("ERROR: SoundEffect failed (%08X) to intialize\n", static_cast(hr)); + throw std::exception("SoundEffect"); + } +} + + +#ifdef DIRECTX_ENABLE_SEEK_TABLES + +_Use_decl_annotations_ +SoundEffect::SoundEffect(AudioEngine* engine, std::unique_ptr& wavData, + const WAVEFORMATEX* wfx, const uint8_t* startAudio, size_t audioBytes, + const uint32_t* seekTable, size_t seekCount) +{ + HRESULT hr = pImpl->Initialize(engine, wavData, wfx, startAudio, audioBytes, seekTable, seekCount, 0, 0); + if (FAILED(hr)) + { + DebugTrace("ERROR: SoundEffect failed (%08X) to intialize\n", static_cast(hr)); + throw std::exception("SoundEffect"); + } +} + +#endif + + +// Move constructor. +SoundEffect::SoundEffect(SoundEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +SoundEffect& SoundEffect::operator= (SoundEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +SoundEffect::~SoundEffect() +{ +} + + +// Public methods. +void SoundEffect::Play() +{ + pImpl->Play(1.f, 0.f, 0.f); +} + + +void SoundEffect::Play(float volume, float pitch, float pan) +{ + pImpl->Play(volume, pitch, pan); +} + + +std::unique_ptr SoundEffect::CreateInstance(SOUND_EFFECT_INSTANCE_FLAGS flags) +{ + auto effect = new SoundEffectInstance(pImpl->mEngine, this, flags); + assert(effect != nullptr); + pImpl->mInstances.emplace_back(effect->GetVoiceNotify()); + return std::unique_ptr(effect); +} + + +void SoundEffect::UnregisterInstance(_In_ IVoiceNotify* instance) +{ + auto it = std::find(pImpl->mInstances.begin(), pImpl->mInstances.end(), instance); + if (it == pImpl->mInstances.end()) + return; + + pImpl->mInstances.erase(it); +} + + +// Public accessors. +bool SoundEffect::IsInUse() const noexcept +{ + return (pImpl->mOneShots > 0) || !pImpl->mInstances.empty(); +} + + +size_t SoundEffect::GetSampleSizeInBytes() const noexcept +{ + return pImpl->mAudioBytes; +} + + +size_t SoundEffect::GetSampleDuration() const noexcept +{ + if (!pImpl->mWaveFormat || !pImpl->mWaveFormat->nChannels) + return 0; + + switch (GetFormatTag(pImpl->mWaveFormat)) + { + case WAVE_FORMAT_ADPCM: + { + auto adpcmFmt = reinterpret_cast(pImpl->mWaveFormat); + + uint64_t duration = uint64_t(pImpl->mAudioBytes / adpcmFmt->wfx.nBlockAlign) * adpcmFmt->wSamplesPerBlock; + unsigned int partial = pImpl->mAudioBytes % adpcmFmt->wfx.nBlockAlign; + if (partial) + { + if (partial >= (7u * adpcmFmt->wfx.nChannels)) + duration += (uint64_t(partial) * 2 / uint64_t(adpcmFmt->wfx.nChannels - 12)); + } + return static_cast(duration); + } + + #ifdef DIRECTX_ENABLE_XWMA + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + if (pImpl->mSeekTable && pImpl->mSeekCount > 0) + { + return pImpl->mSeekTable[pImpl->mSeekCount - 1] / uint32_t(2 * pImpl->mWaveFormat->nChannels); + } + break; + + #endif + + #ifdef DIRECTX_ENABLE_XMA2 + + case WAVE_FORMAT_XMA2: + return reinterpret_cast(pImpl->mWaveFormat)->SamplesEncoded; + + #endif + + default: + if (pImpl->mWaveFormat->wBitsPerSample > 0) + { + return static_cast((uint64_t(pImpl->mAudioBytes) * 8) + / (uint64_t(pImpl->mWaveFormat->wBitsPerSample) * uint64_t(pImpl->mWaveFormat->nChannels))); + } + } + + return 0; +} + + +size_t SoundEffect::GetSampleDurationMS() const noexcept +{ + if (!pImpl->mWaveFormat || !pImpl->mWaveFormat->nSamplesPerSec) + return 0; + + uint64_t samples = GetSampleDuration(); + return static_cast((samples * 1000) / pImpl->mWaveFormat->nSamplesPerSec); +} + + +const WAVEFORMATEX* SoundEffect::GetFormat() const noexcept +{ + return pImpl->mWaveFormat; +} + + +#ifdef DIRECTX_ENABLE_XWMA + +bool SoundEffect::FillSubmitBuffer(_Out_ XAUDIO2_BUFFER& buffer, _Out_ XAUDIO2_BUFFER_WMA& wmaBuffer) const +{ + memset(&buffer, 0, sizeof(buffer)); + memset(&wmaBuffer, 0, sizeof(wmaBuffer)); + + buffer.AudioBytes = pImpl->mAudioBytes; + buffer.pAudioData = pImpl->mStartAudio; + buffer.LoopBegin = pImpl->mLoopStart; + buffer.LoopLength = pImpl->mLoopLength; + + uint32_t tag = GetFormatTag(pImpl->mWaveFormat); + if (tag == WAVE_FORMAT_WMAUDIO2 || tag == WAVE_FORMAT_WMAUDIO3) + { + wmaBuffer.PacketCount = pImpl->mSeekCount; + wmaBuffer.pDecodedPacketCumulativeBytes = pImpl->mSeekTable; + return true; + } + + return false; +} + +#else // !xWMA + +void SoundEffect::FillSubmitBuffer(_Out_ XAUDIO2_BUFFER& buffer) const +{ + memset(&buffer, 0, sizeof(buffer)); + buffer.AudioBytes = pImpl->mAudioBytes; + buffer.pAudioData = pImpl->mStartAudio; + buffer.LoopBegin = pImpl->mLoopStart; + buffer.LoopLength = pImpl->mLoopLength; +} + +#endif diff --git a/Sdk/External/DirectXTK/Audio/SoundEffectInstance.cpp b/Sdk/External/DirectXTK/Audio/SoundEffectInstance.cpp new file mode 100644 index 0000000..36920f5 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/SoundEffectInstance.cpp @@ -0,0 +1,341 @@ +//-------------------------------------------------------------------------------------- +// File: SoundEffectInstance.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "SoundCommon.h" + +using namespace DirectX; + + +//====================================================================================== +// SoundEffectInstance +//====================================================================================== + +// Internal object implementation class. +class SoundEffectInstance::Impl : public IVoiceNotify +{ +public: + Impl(_In_ AudioEngine* engine, _In_ SoundEffect* effect, SOUND_EFFECT_INSTANCE_FLAGS flags) : + mBase(), + mEffect(effect), + mWaveBank(nullptr), + mIndex(0), + mLooped(false) + { + assert(engine != nullptr); + engine->RegisterNotify(this, false); + + assert(mEffect != nullptr); + mBase.Initialize(engine, effect->GetFormat(), flags); + } + + Impl(_In_ AudioEngine* engine, _In_ WaveBank* waveBank, uint32_t index, SOUND_EFFECT_INSTANCE_FLAGS flags) : + mBase(), + mEffect(nullptr), + mWaveBank(waveBank), + mIndex(index), + mLooped(false) + { + assert(engine != nullptr); + engine->RegisterNotify(this, false); + + char buff[64] = {}; + auto wfx = reinterpret_cast(buff); + assert(mWaveBank != nullptr); + mBase.Initialize(engine, mWaveBank->GetFormat(index, wfx, sizeof(buff)), flags); + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() override + { + mBase.DestroyVoice(); + + if (mBase.engine) + { + mBase.engine->UnregisterNotify(this, false, false); + mBase.engine = nullptr; + } + } + + void Play(bool loop); + + // IVoiceNotify + void __cdecl OnBufferEnd() override + { + // We don't register for this notification for SoundEffectInstances, so this should not be invoked + assert(false); + } + + void __cdecl OnCriticalError() override + { + mBase.OnCriticalError(); + } + + void __cdecl OnReset() override + { + mBase.OnReset(); + } + + void __cdecl OnUpdate() override + { + // We do not register for update notification + assert(false); + } + + void __cdecl OnDestroyEngine() noexcept override + { + mBase.OnDestroy(); + } + + void __cdecl OnTrim() override + { + mBase.OnTrim(); + } + + void __cdecl GatherStatistics(AudioStatistics& stats) const noexcept override + { + mBase.GatherStatistics(stats); + } + + void __cdecl OnDestroyParent() noexcept override + { + mBase.OnDestroy(); + mWaveBank = nullptr; + mEffect = nullptr; + } + + SoundEffectInstanceBase mBase; + SoundEffect* mEffect; + WaveBank* mWaveBank; + uint32_t mIndex; + bool mLooped; +}; + + +void SoundEffectInstance::Impl::Play(bool loop) +{ + if (!mBase.voice) + { + if (mWaveBank) + { + char buff[64] = {}; + auto wfx = reinterpret_cast(buff); + mBase.AllocateVoice(mWaveBank->GetFormat(mIndex, wfx, sizeof(buff))); + } + else + { + assert(mEffect != nullptr); + mBase.AllocateVoice(mEffect->GetFormat()); + } + } + + if (!mBase.Play()) + return; + + // Submit audio data for STOPPED -> PLAYING state transition + XAUDIO2_BUFFER buffer = {}; + +#ifdef DIRECTX_ENABLE_XWMA + + bool iswma = false; + XAUDIO2_BUFFER_WMA wmaBuffer = {}; + if (mWaveBank) + { + iswma = mWaveBank->FillSubmitBuffer(mIndex, buffer, wmaBuffer); + } + else + { + assert(mEffect != nullptr); + iswma = mEffect->FillSubmitBuffer(buffer, wmaBuffer); + } + +#else // !xWMA + + if (mWaveBank) + { + mWaveBank->FillSubmitBuffer(mIndex, buffer); + } + else + { + assert(mEffect != nullptr); + mEffect->FillSubmitBuffer(buffer); + } + +#endif + + buffer.Flags = XAUDIO2_END_OF_STREAM; + if (loop) + { + mLooped = true; + buffer.LoopCount = XAUDIO2_LOOP_INFINITE; + } + else + { + mLooped = false; + buffer.LoopCount = buffer.LoopBegin = buffer.LoopLength = 0; + } + buffer.pContext = nullptr; + + HRESULT hr; + #ifdef DIRECTX_ENABLE_XWMA + if (iswma) + { + hr = mBase.voice->SubmitSourceBuffer(&buffer, &wmaBuffer); + } + else + #endif + { + hr = mBase.voice->SubmitSourceBuffer(&buffer, nullptr); + } + + if (FAILED(hr)) + { + #ifdef _DEBUG + DebugTrace("ERROR: SoundEffectInstance failed (%08X) when submitting buffer:\n", static_cast(hr)); + + char buff[64] = {}; + auto wfx = (mWaveBank) ? mWaveBank->GetFormat(mIndex, reinterpret_cast(buff), sizeof(buff)) + : mEffect->GetFormat(); + + size_t length = (mWaveBank) ? mWaveBank->GetSampleSizeInBytes(mIndex) : mEffect->GetSampleSizeInBytes(); + + DebugTrace("\tFormat Tag %u, %u channels, %u-bit, %u Hz, %zu bytes\n", + wfx->wFormatTag, wfx->nChannels, wfx->wBitsPerSample, wfx->nSamplesPerSec, length); + #endif + mBase.Stop(true, mLooped); + throw std::exception("SubmitSourceBuffer"); + } +} + + +//-------------------------------------------------------------------------------------- +// SoundEffectInstance +//-------------------------------------------------------------------------------------- + +// Private constructors +_Use_decl_annotations_ +SoundEffectInstance::SoundEffectInstance(AudioEngine* engine, SoundEffect* effect, SOUND_EFFECT_INSTANCE_FLAGS flags) : + pImpl(std::make_unique(engine, effect, flags)) +{ +} + +_Use_decl_annotations_ +SoundEffectInstance::SoundEffectInstance(AudioEngine* engine, WaveBank* waveBank, unsigned int index, SOUND_EFFECT_INSTANCE_FLAGS flags) : + pImpl(std::make_unique(engine, waveBank, index, flags)) +{ +} + + +// Move constructor. +SoundEffectInstance::SoundEffectInstance(SoundEffectInstance&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +SoundEffectInstance& SoundEffectInstance::operator= (SoundEffectInstance&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +SoundEffectInstance::~SoundEffectInstance() +{ + if (pImpl) + { + if (pImpl->mWaveBank) + { + pImpl->mWaveBank->UnregisterInstance(pImpl.get()); + pImpl->mWaveBank = nullptr; + } + + if (pImpl->mEffect) + { + pImpl->mEffect->UnregisterInstance(pImpl.get()); + pImpl->mEffect = nullptr; + } + } +} + + +// Public methods. +void SoundEffectInstance::Play(bool loop) +{ + pImpl->Play(loop); +} + + +void SoundEffectInstance::Stop(bool immediate) noexcept +{ + pImpl->mBase.Stop(immediate, pImpl->mLooped); +} + + +void SoundEffectInstance::Pause() noexcept +{ + pImpl->mBase.Pause(); +} + + +void SoundEffectInstance::Resume() +{ + pImpl->mBase.Resume(); +} + + +void SoundEffectInstance::SetVolume(float volume) +{ + pImpl->mBase.SetVolume(volume); +} + + +void SoundEffectInstance::SetPitch(float pitch) +{ + pImpl->mBase.SetPitch(pitch); +} + + +void SoundEffectInstance::SetPan(float pan) +{ + pImpl->mBase.SetPan(pan); +} + + +void SoundEffectInstance::Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords) +{ + pImpl->mBase.Apply3D(listener, emitter, rhcoords); +} + + +// Public accessors. +bool SoundEffectInstance::IsLooped() const noexcept +{ + return pImpl->mLooped; +} + + +SoundState SoundEffectInstance::GetState() noexcept +{ + return pImpl->mBase.GetState(true); +} + + +IVoiceNotify* SoundEffectInstance::GetVoiceNotify() const noexcept +{ + return pImpl.get(); +} diff --git a/Sdk/External/DirectXTK/Audio/SoundStreamInstance.cpp b/Sdk/External/DirectXTK/Audio/SoundStreamInstance.cpp new file mode 100644 index 0000000..8d05bcb --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/SoundStreamInstance.cpp @@ -0,0 +1,849 @@ +//-------------------------------------------------------------------------------------- +// File: SoundStreamInstance.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "DirectXHelpers.h" +#include "WaveBankReader.h" +#include "PlatformHelpers.h" +#include "SoundCommon.h" + +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) +#include +#include +#endif + +using namespace DirectX; + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wcovered-switch-default" +#endif + +#pragma warning(disable : 4061 4062) + +//#define VERBOSE_TRACE + +#ifdef VERBOSE_TRACE +#pragma message("NOTE: Verbose tracing enabled") +#endif + +namespace +{ + const size_t DVD_SECTOR_SIZE = 2048; + const size_t MEMORY_ALLOC_SIZE = 4096; + const size_t MAX_BUFFER_COUNT = 3; + + #ifdef DIRECTX_ENABLE_SEEK_TABLES + const size_t MAX_STREAMING_SEEK_PACKETS = 2048; + #endif + + #ifdef DIRECTX_ENABLE_XMA2 + const size_t XMA2_64KBLOCKINBYTES = 65536; + + struct apu_deleter { void operator()(void* p) noexcept { if (p) ApuFree(p); } }; + #endif + + size_t ComputeAsyncPacketSize(_In_ const WAVEFORMATEX* wfx, uint32_t tag) + { + if (!wfx) + return 0; + + size_t buffer = size_t(wfx->nAvgBytesPerSec) * 2u; + + #ifdef DIRECTX_ENABLE_XMA2 + if (tag == WAVE_FORMAT_XMA2) + { + buffer = AlignUp(buffer, XMA2_64KBLOCKINBYTES); + buffer = std::max(XMA2_64KBLOCKINBYTES, buffer); + return buffer; + } + #else + UNREFERENCED_PARAMETER(tag); + #endif + + buffer = AlignUp(buffer, MEMORY_ALLOC_SIZE); + buffer = std::max(65536u, buffer); + return buffer; + } + + static_assert(MEMORY_ALLOC_SIZE >= DVD_SECTOR_SIZE, "Memory size should be larger than sector size"); + static_assert(MEMORY_ALLOC_SIZE >= DVD_SECTOR_SIZE || (MEMORY_ALLOC_SIZE% DVD_SECTOR_SIZE) == 0, "Memory size should be multiples of sector size"); +} + + +//====================================================================================== +// SoundStreamInstance +//====================================================================================== + +// Internal object implementation class. +class SoundStreamInstance::Impl : public IVoiceNotify +{ +public: + Impl(_In_ AudioEngine* engine, + WaveBank* waveBank, + uint32_t index, + SOUND_EFFECT_INSTANCE_FLAGS flags) noexcept(false) : + mBase(), + mWaveBank(waveBank), + mIndex(index), + mPlaying(false), + mLooped(false), + mEndStream(false), + mPrefetch(false), + mSitching(false), + mPackets{}, + mCurrentDiskReadBuffer(0), + mCurrentPlayBuffer(0), + mBlockAlign(0), + mCurrentPosition(0), + mOffsetBytes(0), + mLengthInBytes(0), + mPacketSize(0), + mTotalSize(0) + #ifdef DIRECTX_ENABLE_SEEK_TABLES + , mSeekCount(0), + mSeekTable(nullptr), + mSeekTableCopy{} + #endif + { + assert(engine != nullptr); + engine->RegisterNotify(this, true); + + char buff[64] = {}; + auto wfx = reinterpret_cast(buff); + assert(mWaveBank != nullptr); + mBase.Initialize(engine, mWaveBank->GetFormat(index, wfx, sizeof(buff)), flags); + + WaveBankReader::Metadata metadata = {}; + (void)mWaveBank->GetPrivateData(index, &metadata, sizeof(metadata)); + + mOffsetBytes = metadata.offsetBytes; + mLengthInBytes = metadata.lengthBytes; + + #ifdef DIRECTX_ENABLE_SEEK_TABLES + WaveBankSeekData seekData = {}; + (void)mWaveBank->GetPrivateData(index, &seekData, sizeof(seekData)); + if (seekData.tag == WAVE_FORMAT_WMAUDIO2 || seekData.tag == WAVE_FORMAT_WMAUDIO3) + { + mSeekCount = seekData.seekCount; + mSeekTable = seekData.seekTable; + } + #endif + + mBufferEnd.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + mBufferRead.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mBufferEnd || !mBufferRead) + { + throw std::exception("CreateEvent"); + } + + ThrowIfFailed(AllocateStreamingBuffers(wfx)); + +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): packet size %zu, play length %zu\n", mPacketSize, mLengthInBytes); +#endif + + mPrefetch = true; + ThrowIfFailed(ReadBuffers()); + } + + virtual ~Impl() override + { + mBase.DestroyVoice(); + + if (mWaveBank && mWaveBank->GetAsyncHandle()) + { + for (size_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + (void)CancelIoEx(mWaveBank->GetAsyncHandle(), &mPackets[j].request); + } + } + + if (mBase.engine) + { + mBase.engine->UnregisterNotify(this, false, true); + mBase.engine = nullptr; + } + + for (size_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + mPackets[j] = {}; + } + mPacketSize = 0; + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + void Play(bool loop) + { + if (!mBase.voice) + { + if (!mWaveBank) + return; + + char buff[64] = {}; + auto wfx = reinterpret_cast(buff); + mBase.AllocateVoice(mWaveBank->GetFormat(mIndex, wfx, sizeof(buff))); + } + + if (!mBase.Play()) + return; + + mLooped = loop; + mEndStream = false; + + if (!mPrefetch) + { + mCurrentPosition = 0; + } + + ThrowIfFailed(PlayBuffers()); + } + + // IVoiceNotify + virtual void __cdecl OnBufferEnd() override + { + // Not used + } + + virtual void __cdecl OnCriticalError() override + { + mBase.OnCriticalError(); + } + + virtual void __cdecl OnReset() override + { + mBase.OnReset(); + } + + virtual void __cdecl OnUpdate() override + { + if (!mPlaying) + return; + + HANDLE events[] = { mBufferRead.get(), mBufferEnd.get() }; + switch (WaitForMultipleObjectsEx(_countof(events), events, FALSE, 0, FALSE)) + { + case WAIT_TIMEOUT: + break; + + case WAIT_OBJECT_0: // Read completed +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): Playing... (readpos %zu) [", mCurrentPosition); + for (uint32_t k = 0; k < MAX_BUFFER_COUNT; ++k) + { + DebugTrace("%ls ", s_debugState[static_cast(mPackets[k].state)]); + } + DebugTrace("]\n"); +#endif + mPrefetch = false; + ThrowIfFailed(PlayBuffers()); + break; + + case (WAIT_OBJECT_0 + 1): // Play completed +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): Reading... (readpos %zu) [", mCurrentPosition); + for (uint32_t k = 0; k < MAX_BUFFER_COUNT; ++k) + { + DebugTrace("%ls ", s_debugState[static_cast(mPackets[k].state)]); + } + DebugTrace("]\n"); +#endif + ThrowIfFailed(ReadBuffers()); + break; + + case WAIT_FAILED: + throw std::exception("WaitForMultipleObjects"); + } + } + + virtual void __cdecl OnDestroyEngine() noexcept override + { + mBase.OnDestroy(); + } + + virtual void __cdecl OnTrim() override + { + mBase.OnTrim(); + } + + virtual void __cdecl GatherStatistics(AudioStatistics& stats) const noexcept override + { + mBase.GatherStatistics(stats); + + stats.streamingBytes += mPacketSize * MAX_BUFFER_COUNT; + } + + virtual void __cdecl OnDestroyParent() noexcept override + { + mBase.OnDestroy(); + mWaveBank = nullptr; + } + + SoundEffectInstanceBase mBase; + WaveBank* mWaveBank; + uint32_t mIndex; + bool mPlaying; + bool mLooped; + bool mEndStream; + bool mPrefetch; + bool mSitching; + + ScopedHandle mBufferEnd; + ScopedHandle mBufferRead; + + enum class State : uint32_t + { + FREE = 0, + PENDING, + READY, + PLAYING, + }; + +#ifdef VERBOSE_TRACE + static const wchar_t* s_debugState[4]; +#endif + + struct BufferNotify : public IVoiceNotify + { + BufferNotify() : mParent(nullptr), mIndex(0) {} + + void Set(SoundStreamInstance::Impl* parent, size_t index) noexcept(true) { mParent = parent; mIndex = index; } + + void __cdecl OnBufferEnd() override + { + assert(mParent != nullptr); + mParent->mPackets[mIndex].state = State::FREE; + SetEvent(mParent->mBufferEnd.get()); + } + + void __cdecl OnCriticalError() override { assert(mParent != nullptr); mParent->OnCriticalError(); } + void __cdecl OnReset() override { assert(mParent != nullptr); mParent->OnReset(); } + void __cdecl OnUpdate() override { assert(mParent != nullptr); mParent->OnUpdate(); } + void __cdecl OnDestroyEngine() noexcept override { assert(mParent != nullptr); mParent->OnDestroyEngine(); } + void __cdecl OnTrim() override { assert(mParent != nullptr); mParent->OnTrim(); } + void __cdecl GatherStatistics(AudioStatistics& stats) const override { assert(mParent != nullptr); mParent->GatherStatistics(stats); } + void __cdecl OnDestroyParent() noexcept override { assert(mParent != nullptr); mParent->OnDestroyParent(); } + + private: + SoundStreamInstance::Impl* mParent; + size_t mIndex; + }; + + struct Packets + { + State state; + uint8_t* buffer; + uint8_t* stitchBuffer; + uint32_t valid; + uint32_t audioBytes; + uint32_t startPosition; + OVERLAPPED request; + BufferNotify notify; + + Packets() : + state(State::FREE), + buffer(nullptr), + stitchBuffer(nullptr), + valid(0), + audioBytes(0), + startPosition(0), + request{}, + notify{} {} + }; + + Packets mPackets[MAX_BUFFER_COUNT]; + +private: + uint32_t mCurrentDiskReadBuffer; + uint32_t mCurrentPlayBuffer; + uint32_t mBlockAlign; + size_t mCurrentPosition; + size_t mOffsetBytes; + size_t mLengthInBytes; + + size_t mPacketSize; + size_t mTotalSize; + std::unique_ptr mStreamBuffer; + +#ifdef DIRECTX_ENABLE_SEEK_TABLES + uint32_t mSeekCount; + const uint32_t* mSeekTable; + uint32_t mSeekTableCopy[MAX_STREAMING_SEEK_PACKETS]; +#endif + +#ifdef DIRECTX_ENABLE_XMA2 + std::unique_ptr mXMAMemory; +#endif + + HRESULT AllocateStreamingBuffers(const WAVEFORMATEX* wfx) noexcept; + HRESULT ReadBuffers() noexcept; + HRESULT PlayBuffers() noexcept; +}; + + +HRESULT SoundStreamInstance::Impl::AllocateStreamingBuffers(const WAVEFORMATEX* wfx) noexcept +{ + if (!wfx) + return E_INVALIDARG; + + uint32_t tag = GetFormatTag(wfx); + + size_t packetSize = ComputeAsyncPacketSize(wfx, tag); + if (!packetSize) + return E_UNEXPECTED; + + uint64_t totalSize = uint64_t(packetSize) * uint64_t(MAX_BUFFER_COUNT); + if (totalSize > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + + mPacketSize = packetSize; + mBlockAlign = wfx->nBlockAlign; + mSitching = false; + + size_t stitchSize = 0; + if ((packetSize % wfx->nBlockAlign) != 0) + { + mSitching = true; + + stitchSize = AlignUp(wfx->nBlockAlign, DVD_SECTOR_SIZE); + totalSize += uint64_t(stitchSize) * uint64_t(MAX_BUFFER_COUNT); + if (totalSize > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + } + + #ifdef DIRECTX_ENABLE_XMA2 + if ((mTotalSize < totalSize) || (tag == WAVE_FORMAT_XMA2 && !mXMAMemory) || (tag != WAVE_FORMAT_XMA2 && !mStreamBuffer)) + #else + if (mTotalSize < totalSize) + #endif + { + mStreamBuffer.reset(); + #ifdef DIRECTX_ENABLE_XMA2 + mXMAMemory.reset(); + if (tag == WAVE_FORMAT_XMA2) + { + void* xmaMemory = nullptr; + HRESULT hr = ApuAlloc(&xmaMemory, nullptr, static_cast(totalSize), SHAPE_XMA_INPUT_BUFFER_ALIGNMENT); + if (FAILED(hr)) + { + DebugTrace("ERROR: ApuAlloc failed (%llu bytes). Did you allocate a large enough heap with ApuCreateHeap for all your XMA wave data?\n", totalSize); + return hr; + } + mXMAMemory.reset(static_cast(xmaMemory)); + } + else + #endif + { + mStreamBuffer.reset(reinterpret_cast( + VirtualAlloc(nullptr, static_cast(totalSize), MEM_COMMIT, PAGE_READWRITE) + )); + + if (!mStreamBuffer) + { + DebugTrace("ERROR: Failed allocating %llu bytes for SoundStreamInstance\n", totalSize); + mPacketSize = 0; + totalSize = 0; + return E_OUTOFMEMORY; + } + } + + mTotalSize = static_cast(totalSize); + + #ifdef DIRECTX_ENABLE_XMA2 + uint8_t* ptr = (tag == WAVE_FORMAT_XMA2) ? mXMAMemory.get() : mStreamBuffer.get(); + #else + uint8_t* ptr = mStreamBuffer.get(); + #endif + for (size_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + mPackets[j].buffer = ptr; + mPackets[j].stitchBuffer = nullptr; + mPackets[j].request.hEvent = mBufferRead.get(); + mPackets[j].notify.Set(this, j); + ptr += packetSize; + } + + if (stitchSize > 0) + { + for (size_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + mPackets[j].stitchBuffer = ptr; + ptr += stitchSize; + } + } + } + + return S_OK; +} + + +HRESULT SoundStreamInstance::Impl::ReadBuffers() noexcept +{ + if (mCurrentPosition >= mLengthInBytes) + { + if (!mLooped) + { + mEndStream = true; + return S_FALSE; + } + +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): Loop restart\n"); +#endif + + mCurrentPosition = 0; + } + + HANDLE async = mWaveBank->GetAsyncHandle(); + + uint32_t readBuffer = mCurrentDiskReadBuffer; + for (uint32_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + uint32_t entry = (j + readBuffer) % uint32_t(MAX_BUFFER_COUNT); + if (mPackets[entry].state == State::FREE) + { + if (mCurrentPosition < mLengthInBytes) + { + auto cbValid = static_cast(std::min(mPacketSize, mLengthInBytes - mCurrentPosition)); + + mPackets[entry].valid = cbValid; + mPackets[entry].audioBytes = 0; + mPackets[entry].startPosition = static_cast(mCurrentPosition); + mPackets[entry].request.Offset = static_cast(mOffsetBytes + mCurrentPosition); + + if (!ReadFile(async, mPackets[entry].buffer, uint32_t(mPacketSize), nullptr, &mPackets[entry].request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + { + return HRESULT_FROM_WIN32(error); + } + } + + mCurrentPosition += cbValid; + + mCurrentDiskReadBuffer = (entry + 1) % uint32_t(MAX_BUFFER_COUNT); + + mPackets[entry].state = State::PENDING; + + if ((cbValid < mPacketSize) && mLooped) + { +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): Loop restart\n"); +#endif + mCurrentPosition = 0; + } + } + } + } + + return S_OK; +} + + +HRESULT SoundStreamInstance::Impl::PlayBuffers() noexcept +{ + HANDLE async = mWaveBank->GetAsyncHandle(); + + for (uint32_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + if (mPackets[j].state == State::PENDING) + { + DWORD cb = 0; +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + BOOL result = GetOverlappedResultEx(async, &mPackets[j].request, &cb, 0, FALSE); +#else + BOOL result = GetOverlappedResult(async, &mPackets[j].request, &cb, FALSE); +#endif + if (result) + { + mPackets[j].state = State::READY; + } + else + { + DWORD error = GetLastError(); + if (error != ERROR_IO_INCOMPLETE) + { + ThrowIfFailed(HRESULT_FROM_WIN32(error)); + } + } + } + } + + if (!mBase.voice || !mPlaying) + return S_FALSE; + + for (uint32_t j = 0; j < MAX_BUFFER_COUNT; ++j) + { + if (mPackets[mCurrentPlayBuffer].state != State::READY) + break; + + const uint8_t* ptr = mPackets[mCurrentPlayBuffer].buffer; + uint32_t valid = mPackets[mCurrentPlayBuffer].valid; + + bool endstream = false; + if (valid < mPacketSize) + { + endstream = true; +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): End of stream (%u of %zu bytes)\n", mPackets[mCurrentPlayBuffer].valid, mPacketSize); +#endif + } + + uint32_t thisFrameStitch = 0; + if (mSitching) + { + // Compute how many left-over bytes at the end of the previous packet (if any, they form the head of a partial block). + uint32_t prevFrameStitch = (mPackets[mCurrentPlayBuffer].startPosition % mBlockAlign); + + if (prevFrameStitch > 0) + { + auto buffer = mPackets[mCurrentPlayBuffer].stitchBuffer; + + // Compute how many bytes at the start of our current packet are the tail of the partial block. + thisFrameStitch = mBlockAlign - prevFrameStitch; + + uint32_t k = (mCurrentPlayBuffer + MAX_BUFFER_COUNT - 1) % MAX_BUFFER_COUNT; + if (mPackets[k].state == State::READY || mPackets[k].state == State::PLAYING) + { + // Compute how many bytes at the start of the previous packet were the tail of the previous stitch block. + uint32_t prevFrameStitchOffset = (mPackets[k].startPosition % mBlockAlign); + prevFrameStitchOffset = (prevFrameStitchOffset > 0) ? (mBlockAlign - prevFrameStitchOffset) : 0u; + + // Point to the start of the partial block's head in the previous packet. + auto prevBuffer = mPackets[k].buffer + prevFrameStitchOffset + mPackets[k].audioBytes; + + // Merge the the head partial block in the previous packet with the tail partial block at the start of our packet. + memcpy(buffer, prevBuffer, prevFrameStitch); + memcpy(buffer + prevFrameStitch, ptr, thisFrameStitch); + + // Submit stitch packet (only need to get notified if we aren't submitting another packet for this buffer). + XAUDIO2_BUFFER buf = {}; + buf.AudioBytes = mBlockAlign; + buf.pAudioData = buffer; + + if (endstream && (valid <= thisFrameStitch)) + { + buf.Flags = XAUDIO2_END_OF_STREAM; + buf.pContext = &mPackets[mCurrentPlayBuffer].notify; + } +#ifdef VERBOSE_TRACE + DebugTrace("INFO (Streaming): Stitch packet (%u + %u = %u)\n", prevFrameStitch, thisFrameStitch, mBlockAlign); +#endif + #ifdef DIRECTX_ENABLE_XWMA + if (mSeekCount > 0) + { + XAUDIO2_BUFFER_WMA wmaBuf = {}; + wmaBuf.pDecodedPacketCumulativeBytes = mSeekTableCopy; + wmaBuf.PacketCount = 1; + + uint32_t seekOffset = (mPackets[k].startPosition + prevFrameStitchOffset + mPackets[k].audioBytes) / mBlockAlign; + assert(seekOffset > 0); + mSeekTableCopy[0] = mSeekTable[seekOffset] - mSeekTable[seekOffset - 1]; + + ThrowIfFailed(mBase.voice->SubmitSourceBuffer(&buf, &wmaBuf)); + } + else + #endif // XWMA + { + ThrowIfFailed(mBase.voice->SubmitSourceBuffer(&buf)); + } + } + + ptr += thisFrameStitch; + } + + // Compute valid audio bytes in our current packet. + valid = ((valid - thisFrameStitch) / mBlockAlign) * mBlockAlign; + } + + if (valid > 0) + { + // Record the audioBytes we actually submitted... + mPackets[mCurrentPlayBuffer].audioBytes = valid; + + XAUDIO2_BUFFER buf = {}; + buf.Flags = (endstream) ? XAUDIO2_END_OF_STREAM : 0; + buf.AudioBytes = valid; + buf.pAudioData = ptr; + buf.pContext = &mPackets[mCurrentPlayBuffer].notify; + + #ifdef DIRECTX_ENABLE_XWMA + if (mSeekCount > 0) + { + XAUDIO2_BUFFER_WMA wmaBuf = {}; + + wmaBuf.PacketCount = valid / mBlockAlign; + + uint32_t seekOffset = mPackets[mCurrentPlayBuffer].startPosition / mBlockAlign; + if (seekOffset > MAX_STREAMING_SEEK_PACKETS) + { + DebugTrace("ERROR: xWMA packet seek count exceeds %zu\n", MAX_STREAMING_SEEK_PACKETS); + return E_FAIL; + } + else if (seekOffset > 0) + { + for (uint32_t i = 0; i < wmaBuf.PacketCount; ++i) + { + mSeekTableCopy[i] = mSeekTable[i + seekOffset] - mSeekTable[seekOffset - 1]; + } + + wmaBuf.pDecodedPacketCumulativeBytes = mSeekTableCopy; + } + else + { + wmaBuf.pDecodedPacketCumulativeBytes = mSeekTable; + } + + ThrowIfFailed(mBase.voice->SubmitSourceBuffer(&buf, &wmaBuf)); + } + else + #endif // xWMA + { + ThrowIfFailed(mBase.voice->SubmitSourceBuffer(&buf)); + } + } + + mPackets[mCurrentPlayBuffer].state = State::PLAYING; + mCurrentPlayBuffer = (mCurrentPlayBuffer + 1) % uint32_t(MAX_BUFFER_COUNT); + } + + return S_OK; +} + +#ifdef VERBOSE_TRACE +const wchar_t* SoundStreamInstance::Impl::s_debugState[4] = +{ + L"FREE", + L"PENDING", + L"READY", + L"PLAYING" +}; +#endif + + +//-------------------------------------------------------------------------------------- +// SoundStreamInstance +//-------------------------------------------------------------------------------------- + +// Private constructors +_Use_decl_annotations_ +SoundStreamInstance::SoundStreamInstance(AudioEngine* engine, WaveBank* waveBank, unsigned int index, SOUND_EFFECT_INSTANCE_FLAGS flags) : + pImpl(std::make_unique(engine, waveBank, index, flags)) +{ +} + + +// Move constructor. +SoundStreamInstance::SoundStreamInstance(SoundStreamInstance&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +SoundStreamInstance& SoundStreamInstance::operator= (SoundStreamInstance&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +SoundStreamInstance::~SoundStreamInstance() +{ + if (pImpl) + { + if (pImpl->mWaveBank) + { + pImpl->mWaveBank->UnregisterInstance(pImpl.get()); + pImpl->mWaveBank = nullptr; + } + } +} + + +// Public methods. +void SoundStreamInstance::Play(bool loop) +{ + pImpl->Play(loop); + pImpl->mPlaying = true; +} + + +void SoundStreamInstance::Stop(bool immediate) noexcept +{ + pImpl->mBase.Stop(immediate, pImpl->mLooped); + pImpl->mPlaying = !immediate; +} + + +void SoundStreamInstance::Pause() noexcept +{ + pImpl->mBase.Pause(); +} + + +void SoundStreamInstance::Resume() +{ + pImpl->mBase.Resume(); +} + + +void SoundStreamInstance::SetVolume(float volume) +{ + pImpl->mBase.SetVolume(volume); +} + + +void SoundStreamInstance::SetPitch(float pitch) +{ + pImpl->mBase.SetPitch(pitch); +} + + +void SoundStreamInstance::SetPan(float pan) +{ + pImpl->mBase.SetPan(pan); +} + + +void SoundStreamInstance::Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords) +{ + pImpl->mBase.Apply3D(listener, emitter, rhcoords); +} + + +// Public accessors. +bool SoundStreamInstance::IsLooped() const noexcept +{ + return pImpl->mLooped; +} + + +SoundState SoundStreamInstance::GetState() noexcept +{ + SoundState state = pImpl->mBase.GetState(pImpl->mEndStream); + if (state == STOPPED) + { + pImpl->mPlaying = false; + } + return state; +} + + +IVoiceNotify* SoundStreamInstance::GetVoiceNotify() const noexcept +{ + return pImpl.get(); +} diff --git a/Sdk/External/DirectXTK/Audio/WAVFileReader.cpp b/Sdk/External/DirectXTK/Audio/WAVFileReader.cpp new file mode 100644 index 0000000..0c37063 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/WAVFileReader.cpp @@ -0,0 +1,700 @@ +//-------------------------------------------------------------------------------------- +// File: WAVFileReader.cpp +// +// Functions for loading WAV audio files +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#include "pch.h" +#include "PlatformHelpers.h" +#include "WAVFileReader.h" + +using namespace DirectX; + + +namespace +{ + //--------------------------------------------------------------------------------- + // .WAV files + //--------------------------------------------------------------------------------- + constexpr uint32_t FOURCC_RIFF_TAG = MAKEFOURCC('R', 'I', 'F', 'F'); + constexpr uint32_t FOURCC_FORMAT_TAG = MAKEFOURCC('f', 'm', 't', ' '); + constexpr uint32_t FOURCC_DATA_TAG = MAKEFOURCC('d', 'a', 't', 'a'); + constexpr uint32_t FOURCC_WAVE_FILE_TAG = MAKEFOURCC('W', 'A', 'V', 'E'); + constexpr uint32_t FOURCC_XWMA_FILE_TAG = MAKEFOURCC('X', 'W', 'M', 'A'); + constexpr uint32_t FOURCC_DLS_SAMPLE = MAKEFOURCC('w', 's', 'm', 'p'); + constexpr uint32_t FOURCC_MIDI_SAMPLE = MAKEFOURCC('s', 'm', 'p', 'l'); + constexpr uint32_t FOURCC_XWMA_DPDS = MAKEFOURCC('d', 'p', 'd', 's'); + constexpr uint32_t FOURCC_XMA_SEEK = MAKEFOURCC('s', 'e', 'e', 'k'); + +#pragma pack(push,1) + struct RIFFChunk + { + uint32_t tag; + uint32_t size; + }; + + struct RIFFChunkHeader + { + uint32_t tag; + uint32_t size; + uint32_t riff; + }; + + struct DLSLoop + { + static const uint32_t LOOP_TYPE_FORWARD = 0x00000000; + static const uint32_t LOOP_TYPE_RELEASE = 0x00000001; + + uint32_t size; + uint32_t loopType; + uint32_t loopStart; + uint32_t loopLength; + }; + + struct RIFFDLSSample + { + static const uint32_t OPTIONS_NOTRUNCATION = 0x00000001; + static const uint32_t OPTIONS_NOCOMPRESSION = 0x00000002; + + uint32_t size; + uint16_t unityNote; + int16_t fineTune; + int32_t gain; + uint32_t options; + uint32_t loopCount; + }; + + struct MIDILoop + { + static const uint32_t LOOP_TYPE_FORWARD = 0x00000000; + static const uint32_t LOOP_TYPE_ALTERNATING = 0x00000001; + static const uint32_t LOOP_TYPE_BACKWARD = 0x00000002; + + uint32_t cuePointId; + uint32_t type; + uint32_t start; + uint32_t end; + uint32_t fraction; + uint32_t playCount; + }; + + struct RIFFMIDISample + { + uint32_t manufacturerId; + uint32_t productId; + uint32_t samplePeriod; + uint32_t unityNode; + uint32_t pitchFraction; + uint32_t SMPTEFormat; + uint32_t SMPTEOffset; + uint32_t loopCount; + uint32_t samplerData; + }; +#pragma pack(pop) + + static_assert(sizeof(RIFFChunk) == 8, "structure size mismatch"); + static_assert(sizeof(RIFFChunkHeader) == 12, "structure size mismatch"); + static_assert(sizeof(DLSLoop) == 16, "structure size mismatch"); + static_assert(sizeof(RIFFDLSSample) == 20, "structure size mismatch"); + static_assert(sizeof(MIDILoop) == 24, "structure size mismatch"); + static_assert(sizeof(RIFFMIDISample) == 36, "structure size mismatch"); + + //--------------------------------------------------------------------------------- + const RIFFChunk* FindChunk( + _In_reads_bytes_(sizeBytes) const uint8_t* data, + _In_ size_t sizeBytes, + _In_ uint32_t tag) noexcept + { + if (!data) + return nullptr; + + const uint8_t* ptr = data; + const uint8_t* end = data + sizeBytes; + + while (end > (ptr + sizeof(RIFFChunk))) + { + auto header = reinterpret_cast(ptr); + if (header->tag == tag) + return header; + + auto offset = header->size + sizeof(RIFFChunk); + ptr += offset; + } + + return nullptr; + } + + + //--------------------------------------------------------------------------------- + HRESULT WaveFindFormatAndData( + _In_reads_bytes_(wavDataSize) const uint8_t* wavData, + _In_ size_t wavDataSize, + _Outptr_ const WAVEFORMATEX** pwfx, + _Outptr_ const uint8_t** pdata, + _Out_ uint32_t* dataSize, + _Out_ bool& dpds, + _Out_ bool& seek) noexcept + { + if (!wavData || !pwfx) + return E_POINTER; + + dpds = seek = false; + + if (wavDataSize < (sizeof(RIFFChunk) * 2 + sizeof(uint32_t) + sizeof(WAVEFORMAT))) + { + return E_FAIL; + } + + const uint8_t* wavEnd = wavData + wavDataSize; + + // Locate RIFF 'WAVE' + auto riffChunk = FindChunk(wavData, wavDataSize, FOURCC_RIFF_TAG); + if (!riffChunk || riffChunk->size < 4) + { + return E_FAIL; + } + + auto riffHeader = reinterpret_cast(riffChunk); + if (riffHeader->riff != FOURCC_WAVE_FILE_TAG && riffHeader->riff != FOURCC_XWMA_FILE_TAG) + { + return E_FAIL; + } + + // Locate 'fmt ' + auto ptr = reinterpret_cast(riffHeader) + sizeof(RIFFChunkHeader); + if ((ptr + sizeof(RIFFChunk)) > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + auto fmtChunk = FindChunk(ptr, riffHeader->size, FOURCC_FORMAT_TAG); + if (!fmtChunk || fmtChunk->size < sizeof(PCMWAVEFORMAT)) + { + return E_FAIL; + } + + ptr = reinterpret_cast(fmtChunk) + sizeof(RIFFChunk); + if (ptr + fmtChunk->size > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + auto wf = reinterpret_cast(ptr); + + // Validate WAVEFORMAT (focused on chunk size and format tag, not other data that XAUDIO2 will validate) + switch (wf->wFormatTag) + { + case WAVE_FORMAT_PCM: + case WAVE_FORMAT_IEEE_FLOAT: + // Can be a PCMWAVEFORMAT (16 bytes) or WAVEFORMATEX (18 bytes) + // We validiated chunk as at least sizeof(PCMWAVEFORMAT) above + break; + + default: + { + if (fmtChunk->size < sizeof(WAVEFORMATEX)) + { + return E_FAIL; + } + + auto wfx = reinterpret_cast(ptr); + + if (fmtChunk->size < (sizeof(WAVEFORMATEX) + wfx->cbSize)) + { + return E_FAIL; + } + + switch (wfx->wFormatTag) + { + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + dpds = true; + break; + + case 0x166 /*WAVE_FORMAT_XMA2*/: // XMA2 is supported by Xbox One + if ((fmtChunk->size < 52 /*sizeof(XMA2WAVEFORMATEX)*/) || (wfx->cbSize < 34 /*( sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX) )*/)) + { + return E_FAIL; + } + seek = true; + break; + + case WAVE_FORMAT_ADPCM: + if ((fmtChunk->size < (sizeof(WAVEFORMATEX) + 32)) || (wfx->cbSize < 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/)) + { + return E_FAIL; + } + break; + + case WAVE_FORMAT_EXTENSIBLE: + if ((fmtChunk->size < sizeof(WAVEFORMATEXTENSIBLE)) || (wfx->cbSize < (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX)))) + { + return E_FAIL; + } + else + { + static const GUID s_wfexBase = { 0x00000000, 0x0000, 0x0010, { 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } }; + + auto wfex = reinterpret_cast(ptr); + + if (memcmp(reinterpret_cast(&wfex->SubFormat) + sizeof(DWORD), + reinterpret_cast(&s_wfexBase) + sizeof(DWORD), sizeof(GUID) - sizeof(DWORD)) != 0) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + switch (wfex->SubFormat.Data1) + { + case WAVE_FORMAT_PCM: + case WAVE_FORMAT_IEEE_FLOAT: + break; + + // MS-ADPCM and XMA2 are not supported as WAVEFORMATEXTENSIBLE + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + dpds = true; + break; + + default: + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + } + break; + + default: + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + } + } + + // Locate 'data' + ptr = reinterpret_cast(riffHeader) + sizeof(RIFFChunkHeader); + if ((ptr + sizeof(RIFFChunk)) > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + auto dataChunk = FindChunk(ptr, riffChunk->size, FOURCC_DATA_TAG); + if (!dataChunk || !dataChunk->size) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + + ptr = reinterpret_cast(dataChunk) + sizeof(RIFFChunk); + if (ptr + dataChunk->size > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + *pwfx = reinterpret_cast(wf); + *pdata = ptr; + *dataSize = dataChunk->size; + return S_OK; + } + + + //--------------------------------------------------------------------------------- + HRESULT WaveFindLoopInfo( + _In_reads_bytes_(wavDataSize) const uint8_t* wavData, + _In_ size_t wavDataSize, + _Out_ uint32_t* pLoopStart, + _Out_ uint32_t* pLoopLength) noexcept + { + if (!wavData || !pLoopStart || !pLoopLength) + return E_POINTER; + + if (wavDataSize < (sizeof(RIFFChunk) + sizeof(uint32_t))) + { + return E_FAIL; + } + + *pLoopStart = 0; + *pLoopLength = 0; + + const uint8_t* wavEnd = wavData + wavDataSize; + + // Locate RIFF 'WAVE' + auto riffChunk = FindChunk(wavData, wavDataSize, FOURCC_RIFF_TAG); + if (!riffChunk || riffChunk->size < 4) + { + return E_FAIL; + } + + auto riffHeader = reinterpret_cast(riffChunk); + if (riffHeader->riff == FOURCC_XWMA_FILE_TAG) + { + // xWMA files do not contain loop information + return S_OK; + } + + if (riffHeader->riff != FOURCC_WAVE_FILE_TAG) + { + return E_FAIL; + } + + // Locate 'wsmp' (DLS Chunk) + auto ptr = reinterpret_cast(riffHeader) + sizeof(RIFFChunkHeader); + if ((ptr + sizeof(RIFFChunk)) > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + auto dlsChunk = FindChunk(ptr, riffChunk->size, FOURCC_DLS_SAMPLE); + if (dlsChunk) + { + ptr = reinterpret_cast(dlsChunk) + sizeof(RIFFChunk); + if (ptr + dlsChunk->size > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + if (dlsChunk->size >= sizeof(RIFFDLSSample)) + { + auto dlsSample = reinterpret_cast(ptr); + + if (dlsChunk->size >= (dlsSample->size + dlsSample->loopCount * sizeof(DLSLoop))) + { + auto loops = reinterpret_cast(ptr + dlsSample->size); + for (uint32_t j = 0; j < dlsSample->loopCount; ++j) + { + if ((loops[j].loopType == DLSLoop::LOOP_TYPE_FORWARD || loops[j].loopType == DLSLoop::LOOP_TYPE_RELEASE)) + { + // Return 'forward' loop + *pLoopStart = loops[j].loopStart; + *pLoopLength = loops[j].loopLength; + return S_OK; + } + } + } + } + } + + // Locate 'smpl' (Sample Chunk) + auto midiChunk = FindChunk(ptr, riffChunk->size, FOURCC_MIDI_SAMPLE); + if (midiChunk) + { + ptr = reinterpret_cast(midiChunk) + sizeof(RIFFChunk); + if (ptr + midiChunk->size > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + if (midiChunk->size >= sizeof(RIFFMIDISample)) + { + auto midiSample = reinterpret_cast(ptr); + + if (midiChunk->size >= (sizeof(RIFFMIDISample) + midiSample->loopCount * sizeof(MIDILoop))) + { + auto loops = reinterpret_cast(ptr + sizeof(RIFFMIDISample)); + for (uint32_t j = 0; j < midiSample->loopCount; ++j) + { + if (loops[j].type == MIDILoop::LOOP_TYPE_FORWARD) + { + // Return 'forward' loop + *pLoopStart = loops[j].start; + *pLoopLength = loops[j].end - loops[j].start + 1; + return S_OK; + } + } + } + } + } + + return S_OK; + } + + + //--------------------------------------------------------------------------------- + HRESULT WaveFindTable( + _In_reads_bytes_(wavDataSize) const uint8_t* wavData, + _In_ size_t wavDataSize, + _In_ uint32_t tag, + _Outptr_result_maybenull_ const uint32_t** pData, + _Out_ uint32_t* dataCount) noexcept + { + if (!wavData || !pData || !dataCount) + return E_POINTER; + + if (wavDataSize < (sizeof(RIFFChunk) + sizeof(uint32_t))) + { + return E_FAIL; + } + + *pData = nullptr; + *dataCount = 0; + + const uint8_t* wavEnd = wavData + wavDataSize; + + // Locate RIFF 'WAVE' + auto riffChunk = FindChunk(wavData, wavDataSize, FOURCC_RIFF_TAG); + if (!riffChunk || riffChunk->size < 4) + { + return E_FAIL; + } + + auto riffHeader = reinterpret_cast(riffChunk); + if (riffHeader->riff != FOURCC_WAVE_FILE_TAG && riffHeader->riff != FOURCC_XWMA_FILE_TAG) + { + return E_FAIL; + } + + // Locate tag + auto ptr = reinterpret_cast(riffHeader) + sizeof(RIFFChunkHeader); + if ((ptr + sizeof(RIFFChunk)) > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + auto tableChunk = FindChunk(ptr, riffChunk->size, tag); + if (tableChunk) + { + ptr = reinterpret_cast(tableChunk) + sizeof(RIFFChunk); + if (ptr + tableChunk->size > wavEnd) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + if ((tableChunk->size % sizeof(uint32_t)) != 0) + { + return E_FAIL; + } + + *pData = reinterpret_cast(ptr); + *dataCount = tableChunk->size / 4; + } + + return S_OK; + } + + + //--------------------------------------------------------------------------------- + HRESULT LoadAudioFromFile( + _In_z_ const wchar_t* szFileName, + _Inout_ std::unique_ptr& wavData, + _Out_ DWORD* bytesRead) noexcept + { + if (!szFileName) + return E_INVALIDARG; + + // open the file + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + ScopedHandle hFile(safe_handle(CreateFile2(szFileName, + GENERIC_READ, + FILE_SHARE_READ, + OPEN_EXISTING, + nullptr))); + #else + ScopedHandle hFile(safe_handle(CreateFileW(szFileName, + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + nullptr))); + #endif + + if (!hFile) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // Get the file size + FILE_STANDARD_INFO fileInfo; + if (!GetFileInformationByHandleEx(hFile.get(), FileStandardInfo, &fileInfo, sizeof(fileInfo))) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // File is too big for 32-bit allocation, so reject read + if (fileInfo.EndOfFile.HighPart > 0) + { + return E_FAIL; + } + + // Need at least enough data to have a valid minimal WAV file + if (fileInfo.EndOfFile.LowPart < (sizeof(RIFFChunk) * 2 + sizeof(DWORD) + sizeof(WAVEFORMAT))) + { + return E_FAIL; + } + + // create enough space for the file data + wavData.reset(new (std::nothrow) uint8_t[fileInfo.EndOfFile.LowPart]); + if (!wavData) + { + return E_OUTOFMEMORY; + } + + // read the data in + if (!ReadFile(hFile.get(), + wavData.get(), + fileInfo.EndOfFile.LowPart, + bytesRead, + nullptr + )) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + return (*bytesRead < fileInfo.EndOfFile.LowPart) ? E_FAIL : S_OK; + } +} + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::LoadWAVAudioInMemory( + const uint8_t* wavData, + size_t wavDataSize, + const WAVEFORMATEX** wfx, + const uint8_t** startAudio, + uint32_t* audioBytes) noexcept +{ + if (!wavData || !wfx || !startAudio || !audioBytes) + return E_INVALIDARG; + + *wfx = nullptr; + *startAudio = nullptr; + *audioBytes = 0; + + // Need at least enough data to have a valid minimal WAV file + if (wavDataSize < (sizeof(RIFFChunk) * 2 + sizeof(DWORD) + sizeof(WAVEFORMAT))) + { + return E_FAIL; + } + + bool dpds, seek; + HRESULT hr = WaveFindFormatAndData(wavData, wavDataSize, wfx, startAudio, audioBytes, dpds, seek); + if (FAILED(hr)) + return hr; + + return (dpds || seek) ? E_FAIL : S_OK; +} + + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::LoadWAVAudioFromFile( + const wchar_t* szFileName, + std::unique_ptr& wavData, + const WAVEFORMATEX** wfx, + const uint8_t** startAudio, + uint32_t* audioBytes) noexcept +{ + if (!szFileName || !wfx || !startAudio || !audioBytes) + return E_INVALIDARG; + + *wfx = nullptr; + *startAudio = nullptr; + *audioBytes = 0; + + DWORD bytesRead = 0; + HRESULT hr = LoadAudioFromFile(szFileName, wavData, &bytesRead); + if (FAILED(hr)) + { + return hr; + } + + bool dpds, seek; + hr = WaveFindFormatAndData(wavData.get(), bytesRead, wfx, startAudio, audioBytes, dpds, seek); + if (FAILED(hr)) + return hr; + + return (dpds || seek) ? E_FAIL : S_OK; +} + + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::LoadWAVAudioInMemoryEx( + const uint8_t* wavData, + size_t wavDataSize, + DirectX::WAVData& result) noexcept +{ + if (!wavData) + return E_INVALIDARG; + + memset(&result, 0, sizeof(result)); + + // Need at least enough data to have a valid minimal WAV file + if (wavDataSize < (sizeof(RIFFChunk) * 2 + sizeof(DWORD) + sizeof(WAVEFORMAT))) + { + return E_FAIL; + } + + bool dpds, seek; + HRESULT hr = WaveFindFormatAndData(wavData, wavDataSize, &result.wfx, &result.startAudio, &result.audioBytes, dpds, seek); + if (FAILED(hr)) + return hr; + + hr = WaveFindLoopInfo(wavData, wavDataSize, &result.loopStart, &result.loopLength); + if (FAILED(hr)) + return hr; + + if (dpds) + { + hr = WaveFindTable(wavData, wavDataSize, FOURCC_XWMA_DPDS, &result.seek, &result.seekCount); + if (FAILED(hr)) + return hr; + } + else if (seek) + { + hr = WaveFindTable(wavData, wavDataSize, FOURCC_XMA_SEEK, &result.seek, &result.seekCount); + if (FAILED(hr)) + return hr; + } + + return S_OK; +} + + +//------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::LoadWAVAudioFromFileEx( + const wchar_t* szFileName, + std::unique_ptr& wavData, + DirectX::WAVData& result) noexcept +{ + if (!szFileName) + return E_INVALIDARG; + + memset(&result, 0, sizeof(result)); + + DWORD bytesRead = 0; + HRESULT hr = LoadAudioFromFile(szFileName, wavData, &bytesRead); + if (FAILED(hr)) + { + return hr; + } + + bool dpds, seek; + hr = WaveFindFormatAndData(wavData.get(), bytesRead, &result.wfx, &result.startAudio, &result.audioBytes, dpds, seek); + if (FAILED(hr)) + return hr; + + hr = WaveFindLoopInfo(wavData.get(), bytesRead, &result.loopStart, &result.loopLength); + if (FAILED(hr)) + return hr; + + if (dpds) + { + hr = WaveFindTable(wavData.get(), bytesRead, FOURCC_XWMA_DPDS, &result.seek, &result.seekCount); + if (FAILED(hr)) + return hr; + } + else if (seek) + { + hr = WaveFindTable(wavData.get(), bytesRead, FOURCC_XMA_SEEK, &result.seek, &result.seekCount); + if (FAILED(hr)) + return hr; + } + + return S_OK; +} + diff --git a/Sdk/External/DirectXTK/Audio/WAVFileReader.h b/Sdk/External/DirectXTK/Audio/WAVFileReader.h new file mode 100644 index 0000000..8a6f4dc --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/WAVFileReader.h @@ -0,0 +1,58 @@ +//-------------------------------------------------------------------------------------- +// File: WAVFileReader.h +// +// Functions for loading WAV audio files +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#pragma once + +#include + +#include +#include +#include + + +namespace DirectX +{ + HRESULT LoadWAVAudioInMemory( + _In_reads_bytes_(wavDataSize) const uint8_t* wavData, + _In_ size_t wavDataSize, + _Outptr_ const WAVEFORMATEX** wfx, + _Outptr_ const uint8_t** startAudio, + _Out_ uint32_t* audioBytes) noexcept; + + HRESULT LoadWAVAudioFromFile( + _In_z_ const wchar_t* szFileName, + _Inout_ std::unique_ptr& wavData, + _Outptr_ const WAVEFORMATEX** wfx, + _Outptr_ const uint8_t** startAudio, + _Out_ uint32_t* audioBytes) noexcept; + + struct WAVData + { + const WAVEFORMATEX* wfx; + const uint8_t* startAudio; + uint32_t audioBytes; + uint32_t loopStart; + uint32_t loopLength; + const uint32_t* seek; // Note: XMA Seek data is Big-Endian + uint32_t seekCount; + }; + + HRESULT LoadWAVAudioInMemoryEx( + _In_reads_bytes_(wavDataSize) const uint8_t* wavData, + _In_ size_t wavDataSize, + _Out_ WAVData& result) noexcept; + + HRESULT LoadWAVAudioFromFileEx( + _In_z_ const wchar_t* szFileName, + _Inout_ std::unique_ptr& wavData, + _Out_ WAVData& result) noexcept; +} diff --git a/Sdk/External/DirectXTK/Audio/WaveBank.cpp b/Sdk/External/DirectXTK/Audio/WaveBank.cpp new file mode 100644 index 0000000..aa379c4 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/WaveBank.cpp @@ -0,0 +1,615 @@ +//-------------------------------------------------------------------------------------- +// File: WaveBank.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Audio.h" +#include "WaveBankReader.h" +#include "SoundCommon.h" +#include "PlatformHelpers.h" + +#include + +using namespace DirectX; + + +//====================================================================================== +// WaveBank +//====================================================================================== + +// Internal object implementation class. +class WaveBank::Impl : public IVoiceNotify +{ +public: + explicit Impl(_In_ AudioEngine* engine) : + mEngine(engine), + mOneShots(0), + mPrepared(false), + mStreaming(false) + { + assert(mEngine != nullptr); + mEngine->RegisterNotify(this, false); + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() override + { + if (!mInstances.empty()) + { + DebugTrace("WARNING: Destroying WaveBank \"%hs\" with %zu outstanding instances\n", + mReader.BankName(), mInstances.size()); + + for (auto it = mInstances.begin(); it != mInstances.end(); ++it) + { + assert(*it != nullptr); + (*it)->OnDestroyParent(); + } + + mInstances.clear(); + } + + if (mOneShots > 0) + { + DebugTrace("WARNING: Destroying WaveBank \"%hs\" with %u outstanding one shot effects\n", + mReader.BankName(), mOneShots); + } + + if (mEngine) + { + mEngine->UnregisterNotify(this, true, false); + mEngine = nullptr; + } + } + + HRESULT Initialize(_In_ AudioEngine* engine, _In_z_ const wchar_t* wbFileName) noexcept; + + void Play(unsigned int index, float volume, float pitch, float pan); + + // IVoiceNotify + void __cdecl OnBufferEnd() override + { + InterlockedDecrement(&mOneShots); + } + + void __cdecl OnCriticalError() override + { + mOneShots = 0; + } + + void __cdecl OnReset() override + { + // No action required + } + + void __cdecl OnUpdate() override + { + // We do not register for update notification + assert(false); + } + + void __cdecl OnDestroyEngine() noexcept override + { + mEngine = nullptr; + mOneShots = 0; + } + + void __cdecl OnTrim() override + { + // No action required + } + + void __cdecl GatherStatistics(AudioStatistics& stats) const noexcept override + { + stats.playingOneShots += mOneShots; + + if (!mStreaming) + { + stats.audioBytes += mReader.BankAudioSize(); + + #ifdef DIRECTX_ENABLE_XMA2 + if (mReader.HasXMA()) + stats.xmaAudioBytes += mReader.BankAudioSize(); + #endif + } + } + + void __cdecl OnDestroyParent() noexcept override + { + } + + AudioEngine* mEngine; + std::list mInstances; + WaveBankReader mReader; + uint32_t mOneShots; + bool mPrepared; + bool mStreaming; +}; + + +_Use_decl_annotations_ +HRESULT WaveBank::Impl::Initialize(AudioEngine* engine, const wchar_t* wbFileName) noexcept +{ + if (!engine || !wbFileName) + return E_INVALIDARG; + + HRESULT hr = mReader.Open(wbFileName); + if (FAILED(hr)) + return hr; + + mStreaming = mReader.IsStreamingBank(); + + return S_OK; +} + + +void WaveBank::Impl::Play(unsigned int index, float volume, float pitch, float pan) +{ + assert(volume >= -XAUDIO2_MAX_VOLUME_LEVEL && volume <= XAUDIO2_MAX_VOLUME_LEVEL); + assert(pitch >= -1.f && pitch <= 1.f); + assert(pan >= -1.f && pan <= 1.f); + + if (mStreaming) + { + DebugTrace("ERROR: One-shots can only be created from an in-memory wave bank\n"); + throw std::exception("WaveBank::Play"); + } + + if (index >= mReader.Count()) + { + DebugTrace("WARNING: Index %u not found in wave bank with only %u entries, one-shot not triggered\n", + index, mReader.Count()); + return; + } + + if (!mPrepared) + { + mReader.WaitOnPrepare(); + mPrepared = true; + } + + char wfxbuff[64] = {}; + auto wfx = reinterpret_cast(wfxbuff); + HRESULT hr = mReader.GetFormat(index, wfx, sizeof(wfxbuff)); + ThrowIfFailed(hr); + + IXAudio2SourceVoice* voice = nullptr; + mEngine->AllocateVoice(wfx, SoundEffectInstance_Default, true, &voice); + + if (!voice) + return; + + if (volume != 1.f) + { + hr = voice->SetVolume(volume); + ThrowIfFailed(hr); + } + + if (pitch != 0.f) + { + float fr = XAudio2SemitonesToFrequencyRatio(pitch * 12.f); + + hr = voice->SetFrequencyRatio(fr); + ThrowIfFailed(hr); + } + + if (pan != 0.f) + { + float matrix[16]; + if (ComputePan(pan, wfx->nChannels, matrix)) + { + hr = voice->SetOutputMatrix(nullptr, wfx->nChannels, mEngine->GetOutputChannels(), matrix); + ThrowIfFailed(hr); + } + } + + hr = voice->Start(0); + ThrowIfFailed(hr); + + XAUDIO2_BUFFER buffer = {}; + hr = mReader.GetWaveData(index, &buffer.pAudioData, buffer.AudioBytes); + ThrowIfFailed(hr); + + WaveBankReader::Metadata metadata; + hr = mReader.GetMetadata(index, metadata); + ThrowIfFailed(hr); + + buffer.Flags = XAUDIO2_END_OF_STREAM; + buffer.pContext = this; + + #ifdef DIRECTX_ENABLE_XWMA + + XAUDIO2_BUFFER_WMA wmaBuffer = {}; + + uint32_t tag; + hr = mReader.GetSeekTable(index, &wmaBuffer.pDecodedPacketCumulativeBytes, wmaBuffer.PacketCount, tag); + ThrowIfFailed(hr); + + if (tag == WAVE_FORMAT_WMAUDIO2 || tag == WAVE_FORMAT_WMAUDIO3) + { + hr = voice->SubmitSourceBuffer(&buffer, &wmaBuffer); + } + else + #endif // xWMA + { + hr = voice->SubmitSourceBuffer(&buffer, nullptr); + } + if (FAILED(hr)) + { + DebugTrace("ERROR: WaveBank failed (%08X) when submitting buffer:\n", static_cast(hr)); + DebugTrace("\tFormat Tag %u, %u channels, %u-bit, %u Hz, %u bytes\n", + wfx->wFormatTag, wfx->nChannels, wfx->wBitsPerSample, wfx->nSamplesPerSec, metadata.lengthBytes); + throw std::exception("SubmitSourceBuffer"); + } + + InterlockedIncrement(&mOneShots); +} + + +//-------------------------------------------------------------------------------------- +// WaveBank +//-------------------------------------------------------------------------------------- + +// Public constructors. +_Use_decl_annotations_ +WaveBank::WaveBank(AudioEngine* engine, const wchar_t* wbFileName) + : pImpl(std::make_unique(engine)) +{ + HRESULT hr = pImpl->Initialize(engine, wbFileName); + if (FAILED(hr)) + { + DebugTrace("ERROR: WaveBank failed (%08X) to intialize from .xwb file \"%ls\"\n", + static_cast(hr), wbFileName); + throw std::exception("WaveBank"); + } + + DebugTrace("INFO: WaveBank \"%hs\" with %u entries loaded from .xwb file \"%ls\"\n", + pImpl->mReader.BankName(), pImpl->mReader.Count(), wbFileName); +} + + +// Move constructor. +WaveBank::WaveBank(WaveBank&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +WaveBank& WaveBank::operator= (WaveBank&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +WaveBank::~WaveBank() +{ +} + + +// Public methods (one-shots) +void WaveBank::Play(unsigned int index) +{ + pImpl->Play(index, 1.f, 0.f, 0.f); +} + + +void WaveBank::Play(unsigned int index, float volume, float pitch, float pan) +{ + pImpl->Play(index, volume, pitch, pan); +} + + +void WaveBank::Play(_In_z_ const char* name) +{ + unsigned int index = pImpl->mReader.Find(name); + if (index == unsigned(-1)) + { + DebugTrace("WARNING: Name '%hs' not found in wave bank, one-shot not triggered\n", name); + return; + } + + pImpl->Play(index, 1.f, 0.f, 0.f); +} + + +void WaveBank::Play(_In_z_ const char* name, float volume, float pitch, float pan) +{ + unsigned int index = pImpl->mReader.Find(name); + if (index == unsigned(-1)) + { + DebugTrace("WARNING: Name '%hs' not found in wave bank, one-shot not triggered\n", name); + return; + } + + pImpl->Play(index, volume, pitch, pan); +} + + +// Public methods (sound effect instance) +std::unique_ptr WaveBank::CreateInstance(unsigned int index, SOUND_EFFECT_INSTANCE_FLAGS flags) +{ + auto& wb = pImpl->mReader; + + if (pImpl->mStreaming) + { + DebugTrace("ERROR: SoundEffectInstances can only be created from an in-memory wave bank\n"); + throw std::exception("WaveBank::CreateInstance"); + } + + if (index >= wb.Count()) + { + // We don't throw an exception here as titles often simply ignore missing assets rather than fail + return std::unique_ptr(); + } + + if (!pImpl->mPrepared) + { + wb.WaitOnPrepare(); + pImpl->mPrepared = true; + } + + auto effect = new SoundEffectInstance(pImpl->mEngine, this, index, flags); + assert(effect != nullptr); + pImpl->mInstances.emplace_back(effect->GetVoiceNotify()); + return std::unique_ptr(effect); +} + + +std::unique_ptr WaveBank::CreateInstance(_In_z_ const char* name, SOUND_EFFECT_INSTANCE_FLAGS flags) +{ + unsigned int index = pImpl->mReader.Find(name); + if (index == unsigned(-1)) + { + // We don't throw an exception here as titles often simply ignore missing assets rather than fail + return std::unique_ptr(); + } + + return CreateInstance(index, flags); +} + + +// Public methods (sound stream instance) +std::unique_ptr WaveBank::CreateStreamInstance(unsigned int index, SOUND_EFFECT_INSTANCE_FLAGS flags) +{ + auto& wb = pImpl->mReader; + + if (!pImpl->mStreaming) + { + DebugTrace("ERROR: SoundStreamInstances can only be created from a streaming wave bank\n"); + throw std::exception("WaveBank::CreateStreamInstance"); + } + + if (index >= wb.Count()) + { + // We don't throw an exception here as titles often simply ignore missing assets rather than fail + return std::unique_ptr(); + } + + if (!pImpl->mPrepared) + { + wb.WaitOnPrepare(); + pImpl->mPrepared = true; + } + + auto effect = new SoundStreamInstance(pImpl->mEngine, this, index, flags); + assert(effect != nullptr); + pImpl->mInstances.emplace_back(effect->GetVoiceNotify()); + return std::unique_ptr(effect); +} + + +std::unique_ptr WaveBank::CreateStreamInstance(_In_z_ const char* name, SOUND_EFFECT_INSTANCE_FLAGS flags) +{ + unsigned int index = pImpl->mReader.Find(name); + if (index == unsigned(-1)) + { + // We don't throw an exception here as titles often simply ignore missing assets rather than fail + return std::unique_ptr(); + } + + return CreateStreamInstance(index, flags); +} + + +void WaveBank::UnregisterInstance(_In_ IVoiceNotify* instance) +{ + auto it = std::find(pImpl->mInstances.begin(), pImpl->mInstances.end(), instance); + if (it == pImpl->mInstances.end()) + return; + + pImpl->mInstances.erase(it); +} + + +// Public accessors. +bool WaveBank::IsPrepared() const noexcept +{ + if (pImpl->mPrepared) + return true; + + if (!pImpl->mReader.IsPrepared()) + return false; + + pImpl->mPrepared = true; + return true; +} + + +bool WaveBank::IsInUse() const noexcept +{ + return (pImpl->mOneShots > 0) || !pImpl->mInstances.empty(); +} + + +bool WaveBank::IsStreamingBank() const noexcept +{ + return pImpl->mReader.IsStreamingBank(); +} + + +size_t WaveBank::GetSampleSizeInBytes(unsigned int index) const noexcept +{ + if (index >= pImpl->mReader.Count()) + return 0; + + WaveBankReader::Metadata metadata; + HRESULT hr = pImpl->mReader.GetMetadata(index, metadata); + if (FAILED(hr)) + return 0; + + return metadata.lengthBytes; +} + + +size_t WaveBank::GetSampleDuration(unsigned int index) const noexcept +{ + if (index >= pImpl->mReader.Count()) + return 0; + + WaveBankReader::Metadata metadata; + HRESULT hr = pImpl->mReader.GetMetadata(index, metadata); + if (FAILED(hr)) + return 0; + + return metadata.duration; +} + + +size_t WaveBank::GetSampleDurationMS(unsigned int index) const noexcept +{ + if (index >= pImpl->mReader.Count()) + return 0; + + char buff[64] = {}; + auto wfx = reinterpret_cast(buff); + HRESULT hr = pImpl->mReader.GetFormat(index, wfx, sizeof(buff)); + if (FAILED(hr)) + return 0; + + WaveBankReader::Metadata metadata; + hr = pImpl->mReader.GetMetadata(index, metadata); + if (FAILED(hr)) + return 0; + + return static_cast((uint64_t(metadata.duration) * 1000) / wfx->nSamplesPerSec); +} + + +_Use_decl_annotations_ +const WAVEFORMATEX* WaveBank::GetFormat(unsigned int index, WAVEFORMATEX* wfx, size_t maxsize) const noexcept +{ + if (index >= pImpl->mReader.Count()) + return nullptr; + + HRESULT hr = pImpl->mReader.GetFormat(index, wfx, maxsize); + if (FAILED(hr)) + return nullptr; + + return wfx; +} + + +_Use_decl_annotations_ +int WaveBank::Find(const char* name) const +{ + return static_cast(pImpl->mReader.Find(name)); +} + + +#ifdef DIRECTX_ENABLE_XWMA + +_Use_decl_annotations_ +bool WaveBank::FillSubmitBuffer(unsigned int index, XAUDIO2_BUFFER& buffer, XAUDIO2_BUFFER_WMA& wmaBuffer) const +{ + memset(&buffer, 0, sizeof(buffer)); + memset(&wmaBuffer, 0, sizeof(wmaBuffer)); + + HRESULT hr = pImpl->mReader.GetWaveData(index, &buffer.pAudioData, buffer.AudioBytes); + ThrowIfFailed(hr); + + WaveBankReader::Metadata metadata; + hr = pImpl->mReader.GetMetadata(index, metadata); + ThrowIfFailed(hr); + + buffer.LoopBegin = metadata.loopStart; + buffer.LoopLength = metadata.loopLength; + + uint32_t tag; + hr = pImpl->mReader.GetSeekTable(index, &wmaBuffer.pDecodedPacketCumulativeBytes, wmaBuffer.PacketCount, tag); + ThrowIfFailed(hr); + + return (tag == WAVE_FORMAT_WMAUDIO2 || tag == WAVE_FORMAT_WMAUDIO3); +} + +#else // !xWMA + +_Use_decl_annotations_ +void WaveBank::FillSubmitBuffer(unsigned int index, XAUDIO2_BUFFER& buffer) const +{ + memset(&buffer, 0, sizeof(buffer)); + + HRESULT hr = pImpl->mReader.GetWaveData(index, &buffer.pAudioData, buffer.AudioBytes); + ThrowIfFailed(hr); + + WaveBankReader::Metadata metadata; + hr = pImpl->mReader.GetMetadata(index, metadata); + ThrowIfFailed(hr); + + buffer.LoopBegin = metadata.loopStart; + buffer.LoopLength = metadata.loopLength; +} + +#endif + + +HANDLE WaveBank::GetAsyncHandle() const noexcept +{ + if (pImpl) + { + return pImpl->mReader.GetAsyncHandle(); + } + + return nullptr; +} + + +_Use_decl_annotations_ +bool WaveBank::GetPrivateData(unsigned int index, void* data, size_t datasize) +{ + if (index >= pImpl->mReader.Count()) + return false; + + if (!data) + return false; + + switch (datasize) + { + case sizeof(WaveBankReader::Metadata): + { + auto ptr = reinterpret_cast(data); + return SUCCEEDED(pImpl->mReader.GetMetadata(index, *ptr)); + } + + case sizeof(WaveBankSeekData): + { + auto ptr = reinterpret_cast(data); + return SUCCEEDED(pImpl->mReader.GetSeekTable(index, &ptr->seekTable, ptr->seekCount, ptr->tag)); + } + + default: + return false; + } +} diff --git a/Sdk/External/DirectXTK/Audio/WaveBankReader.cpp b/Sdk/External/DirectXTK/Audio/WaveBankReader.cpp new file mode 100644 index 0000000..21e3fc0 --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/WaveBankReader.cpp @@ -0,0 +1,1386 @@ +//-------------------------------------------------------------------------------------- +// File: WaveBankReader.cpp +// +// Functions for loading audio data from Wave Banks +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#include "pch.h" +#include "WaveBankReader.h" +#include "Audio.h" +#include "PlatformHelpers.h" +#include "SoundCommon.h" + +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) +#include +#include +#endif + + +namespace +{ +#pragma pack(push, 1) + + constexpr size_t DVD_SECTOR_SIZE = 2048; + constexpr size_t DVD_BLOCK_SIZE = DVD_SECTOR_SIZE * 16; + + constexpr size_t ALIGNMENT_MIN = 4; + constexpr size_t ALIGNMENT_DVD = DVD_SECTOR_SIZE; + + constexpr size_t MAX_DATA_SEGMENT_SIZE = 0xFFFFFFFF; + constexpr size_t MAX_COMPACT_DATA_SEGMENT_SIZE = 0x001FFFFF; + + struct REGION + { + uint32_t dwOffset; // Region offset, in bytes. + uint32_t dwLength; // Region length, in bytes. + + void BigEndian() noexcept + { + dwOffset = _byteswap_ulong(dwOffset); + dwLength = _byteswap_ulong(dwLength); + } + }; + + struct SAMPLEREGION + { + uint32_t dwStartSample; // Start sample for the region. + uint32_t dwTotalSamples; // Region length in samples. + + void BigEndian() noexcept + { + dwStartSample = _byteswap_ulong(dwStartSample); + dwTotalSamples = _byteswap_ulong(dwTotalSamples); + } + }; + + struct HEADER + { + static const uint32_t SIGNATURE = MAKEFOURCC('W', 'B', 'N', 'D'); + static const uint32_t BE_SIGNATURE = MAKEFOURCC('D', 'N', 'B', 'W'); + static const uint32_t VERSION = 44; + + enum SEGIDX + { + SEGIDX_BANKDATA = 0, // Bank data + SEGIDX_ENTRYMETADATA, // Entry meta-data + SEGIDX_SEEKTABLES, // Storage for seek tables for the encoded waves. + SEGIDX_ENTRYNAMES, // Entry friendly names + SEGIDX_ENTRYWAVEDATA, // Entry wave data + SEGIDX_COUNT + }; + + uint32_t dwSignature; // File signature + uint32_t dwVersion; // Version of the tool that created the file + uint32_t dwHeaderVersion; // Version of the file format + REGION Segments[SEGIDX_COUNT]; // Segment lookup table + + void BigEndian() noexcept + { + // Leave dwSignature alone as indicator of BE vs. LE + + dwVersion = _byteswap_ulong(dwVersion); + dwHeaderVersion = _byteswap_ulong(dwHeaderVersion); + for (size_t j = 0; j < SEGIDX_COUNT; ++j) + { + Segments[j].BigEndian(); + } + } + }; + +#pragma warning( disable : 4201 4203 ) + + union MINIWAVEFORMAT + { + static const uint32_t TAG_PCM = 0x0; + static const uint32_t TAG_XMA = 0x1; + static const uint32_t TAG_ADPCM = 0x2; + static const uint32_t TAG_WMA = 0x3; + + static const uint32_t BITDEPTH_8 = 0x0; // PCM only + static const uint32_t BITDEPTH_16 = 0x1; // PCM only + + static const size_t ADPCM_BLOCKALIGN_CONVERSION_OFFSET = 22; + + struct + { + uint32_t wFormatTag : 2; // Format tag + uint32_t nChannels : 3; // Channel count (1 - 6) + uint32_t nSamplesPerSec : 18; // Sampling rate + uint32_t wBlockAlign : 8; // Block alignment. For WMA, lower 6 bits block alignment index, upper 2 bits bytes-per-second index. + uint32_t wBitsPerSample : 1; // Bits per sample (8 vs. 16, PCM only); WMAudio2/WMAudio3 (for WMA) + }; + + uint32_t dwValue; + + void BigEndian() noexcept + { + dwValue = _byteswap_ulong(dwValue); + } + + WORD BitsPerSample() const noexcept + { + if (wFormatTag == TAG_XMA) + return 16; // XMA_OUTPUT_SAMPLE_BITS == 16 + if (wFormatTag == TAG_WMA) + return 16; + if (wFormatTag == TAG_ADPCM) + return 4; // MSADPCM_BITS_PER_SAMPLE == 4 + + // wFormatTag must be TAG_PCM (2 bits can only represent 4 different values) + return (wBitsPerSample == BITDEPTH_16) ? 16u : 8u; + } + + DWORD BlockAlign() const noexcept + { + switch (wFormatTag) + { + case TAG_PCM: + return wBlockAlign; + + case TAG_XMA: + return (nChannels * 16 / 8); // XMA_OUTPUT_SAMPLE_BITS = 16 + + case TAG_ADPCM: + return (wBlockAlign + ADPCM_BLOCKALIGN_CONVERSION_OFFSET) * nChannels; + + case TAG_WMA: + { + static const uint32_t aWMABlockAlign[] = + { + 929, + 1487, + 1280, + 2230, + 8917, + 8192, + 4459, + 5945, + 2304, + 1536, + 1485, + 1008, + 2731, + 4096, + 6827, + 5462, + 1280 + }; + + uint32_t dwBlockAlignIndex = wBlockAlign & 0x1F; + if (dwBlockAlignIndex < _countof(aWMABlockAlign)) + return aWMABlockAlign[dwBlockAlignIndex]; + } + break; + } + + return 0; + } + + DWORD AvgBytesPerSec() const noexcept + { + switch (wFormatTag) + { + case TAG_PCM: + return nSamplesPerSec * wBlockAlign; + + case TAG_XMA: + return nSamplesPerSec * BlockAlign(); + + case TAG_ADPCM: + { + uint32_t blockAlign = BlockAlign(); + uint32_t samplesPerAdpcmBlock = AdpcmSamplesPerBlock(); + return blockAlign * nSamplesPerSec / samplesPerAdpcmBlock; + } + + case TAG_WMA: + { + static const uint32_t aWMAAvgBytesPerSec[] = + { + 12000, + 24000, + 4000, + 6000, + 8000, + 20000, + 2500 + }; + // bitrate = entry * 8 + + uint32_t dwBytesPerSecIndex = wBlockAlign >> 5; + if (dwBytesPerSecIndex < _countof(aWMAAvgBytesPerSec)) + return aWMAAvgBytesPerSec[dwBytesPerSecIndex]; + } + break; + } + + return 0; + } + + DWORD AdpcmSamplesPerBlock() const noexcept + { + uint32_t nBlockAlign = (wBlockAlign + ADPCM_BLOCKALIGN_CONVERSION_OFFSET) * nChannels; + return nBlockAlign * 2 / uint32_t(nChannels) - 12; + } + + void AdpcmFillCoefficientTable(ADPCMWAVEFORMAT *fmt) const noexcept + { + // These are fixed since we are always using MS ADPCM + fmt->wNumCoef = 7 /* MSADPCM_NUM_COEFFICIENTS */; + + static ADPCMCOEFSET aCoef[7] = { { 256, 0}, {512, -256}, {0,0}, {192,64}, {240,0}, {460, -208}, {392,-232} }; + memcpy(&fmt->aCoef, aCoef, sizeof(aCoef)); + } + }; + + struct BANKDATA + { + static const size_t BANKNAME_LENGTH = 64; + + static const uint32_t TYPE_BUFFER = 0x00000000; + static const uint32_t TYPE_STREAMING = 0x00000001; + static const uint32_t TYPE_MASK = 0x00000001; + + static const uint32_t FLAGS_ENTRYNAMES = 0x00010000; + static const uint32_t FLAGS_COMPACT = 0x00020000; + static const uint32_t FLAGS_SYNC_DISABLED = 0x00040000; + static const uint32_t FLAGS_SEEKTABLES = 0x00080000; + static const uint32_t FLAGS_MASK = 0x000F0000; + + uint32_t dwFlags; // Bank flags + uint32_t dwEntryCount; // Number of entries in the bank + char szBankName[BANKNAME_LENGTH]; // Bank friendly name + uint32_t dwEntryMetaDataElementSize; // Size of each entry meta-data element, in bytes + uint32_t dwEntryNameElementSize; // Size of each entry name element, in bytes + uint32_t dwAlignment; // Entry alignment, in bytes + MINIWAVEFORMAT CompactFormat; // Format data for compact bank + FILETIME BuildTime; // Build timestamp + + void BigEndian() noexcept + { + dwFlags = _byteswap_ulong(dwFlags); + dwEntryCount = _byteswap_ulong(dwEntryCount); + dwEntryMetaDataElementSize = _byteswap_ulong(dwEntryMetaDataElementSize); + dwEntryNameElementSize = _byteswap_ulong(dwEntryNameElementSize); + dwAlignment = _byteswap_ulong(dwAlignment); + CompactFormat.BigEndian(); + BuildTime.dwLowDateTime = _byteswap_ulong(BuildTime.dwLowDateTime); + BuildTime.dwHighDateTime = _byteswap_ulong(BuildTime.dwHighDateTime); + } + }; + + struct ENTRY + { + static const uint32_t FLAGS_READAHEAD = 0x00000001; // Enable stream read-ahead + static const uint32_t FLAGS_LOOPCACHE = 0x00000002; // One or more looping sounds use this wave + static const uint32_t FLAGS_REMOVELOOPTAIL = 0x00000004;// Remove data after the end of the loop region + static const uint32_t FLAGS_IGNORELOOP = 0x00000008; // Used internally when the loop region can't be used + static const uint32_t FLAGS_MASK = 0x00000008; + + union + { + struct + { + // Entry flags + uint32_t dwFlags : 4; + + // Duration of the wave, in units of one sample. + // For instance, a ten second long wave sampled + // at 48KHz would have a duration of 480,000. + // This value is not affected by the number of + // channels, the number of bits per sample, or the + // compression format of the wave. + uint32_t Duration : 28; + }; + uint32_t dwFlagsAndDuration; + }; + + MINIWAVEFORMAT Format; // Entry format. + REGION PlayRegion; // Region within the wave data segment that contains this entry. + SAMPLEREGION LoopRegion; // Region within the wave data (in samples) that should loop. + + void BigEndian() noexcept + { + dwFlagsAndDuration = _byteswap_ulong(dwFlagsAndDuration); + Format.BigEndian(); + PlayRegion.BigEndian(); + LoopRegion.BigEndian(); + } + }; + + struct ENTRYCOMPACT + { + uint32_t dwOffset : 21; // Data offset, in multiplies of the bank alignment + uint32_t dwLengthDeviation : 11; // Data length deviation, in bytes + + void BigEndian() noexcept + { + *reinterpret_cast(this) = _byteswap_ulong(*reinterpret_cast(this)); + } + + void ComputeLocations(DWORD& offset, DWORD& length, uint32_t index, const HEADER& header, const BANKDATA& data, const ENTRYCOMPACT* entries) const noexcept + { + offset = dwOffset * data.dwAlignment; + + if (index < (data.dwEntryCount - 1)) + { + length = (entries[index + 1].dwOffset * data.dwAlignment) - offset - dwLengthDeviation; + } + else + { + length = header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength - offset - dwLengthDeviation; + } + } + + static uint32_t GetDuration(DWORD length, const BANKDATA& data, const uint32_t* seekTable) noexcept + { + switch (data.CompactFormat.wFormatTag) + { + case MINIWAVEFORMAT::TAG_ADPCM: + { + uint32_t duration = (length / data.CompactFormat.BlockAlign()) * data.CompactFormat.AdpcmSamplesPerBlock(); + uint32_t partial = length % data.CompactFormat.BlockAlign(); + if (partial) + { + if (partial >= (7u * data.CompactFormat.nChannels)) + duration += (partial * 2 / data.CompactFormat.nChannels - 12); + } + return duration; + } + + case MINIWAVEFORMAT::TAG_WMA: + if (seekTable) + { + uint32_t seekCount = *seekTable; + if (seekCount > 0) + { + return seekTable[seekCount] / uint32_t(2 * data.CompactFormat.nChannels); + } + } + return 0; + + case MINIWAVEFORMAT::TAG_XMA: + if (seekTable) + { + uint32_t seekCount = *seekTable; + if (seekCount > 0) + { + return seekTable[seekCount]; + } + } + return 0; + + default: + return uint32_t((uint64_t(length) * 8) + / (uint64_t(data.CompactFormat.BitsPerSample()) * uint64_t(data.CompactFormat.nChannels))); + } + } + }; + +#pragma pack(pop) + + inline const uint32_t* FindSeekTable(uint32_t index, const uint8_t* seekTable, const HEADER& header, const BANKDATA& data) noexcept + { + if (!seekTable || index >= data.dwEntryCount) + return nullptr; + + uint32_t seekSize = header.Segments[HEADER::SEGIDX_SEEKTABLES].dwLength; + + if ((index * sizeof(uint32_t)) > seekSize) + return nullptr; + + auto table = reinterpret_cast(seekTable); + uint32_t offset = table[index]; + if (offset == uint32_t(-1)) + return nullptr; + + offset += sizeof(uint32_t) * data.dwEntryCount; + + if (offset > seekSize) + return nullptr; + + return reinterpret_cast(seekTable + offset); + } +} + +static_assert(sizeof(REGION) == 8, "Mismatch with xact3wb.h"); +static_assert(sizeof(SAMPLEREGION) == 8, "Mismatch with xact3wb.h"); +static_assert(sizeof(HEADER) == 52, "Mismatch with xact3wb.h"); +static_assert(sizeof(ENTRY) == 24, "Mismatch with xact3wb.h"); +static_assert(sizeof(MINIWAVEFORMAT) == 4, "Mismatch with xact3wb.h"); +static_assert(sizeof(ENTRY) == 24, "Mismatch with xact3wb.h"); +static_assert(sizeof(ENTRYCOMPACT) == 4, "Mismatch with xact3wb.h"); +static_assert(sizeof(BANKDATA) == 96, "Mismatch with xact3wb.h"); + +using namespace DirectX; + +//-------------------------------------------------------------------------------------- +class WaveBankReader::Impl +{ +public: + Impl() noexcept : + m_async(INVALID_HANDLE_VALUE), + m_request{}, + m_prepared(false), + m_header{}, + m_data{} + #ifdef DIRECTX_ENABLE_XMA2 + , m_xmaMemory(nullptr) + #endif + { + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() { Close(); } + + HRESULT Open(_In_z_ const wchar_t* szFileName) noexcept(false); + void Close() noexcept; + + HRESULT GetFormat(_In_ uint32_t index, _Out_writes_bytes_(maxsize) WAVEFORMATEX* pFormat, _In_ size_t maxsize) const noexcept; + + HRESULT GetWaveData(_In_ uint32_t index, _Outptr_ const uint8_t** pData, _Out_ uint32_t& dataSize) const noexcept; + + HRESULT GetSeekTable(_In_ uint32_t index, _Out_ const uint32_t** pData, _Out_ uint32_t& dataCount, _Out_ uint32_t& tag) const noexcept; + + HRESULT GetMetadata(_In_ uint32_t index, _Out_ Metadata& metadata) const noexcept; + + bool UpdatePrepared() noexcept; + + void Clear() noexcept + { + memset(&m_header, 0, sizeof(HEADER)); + memset(&m_data, 0, sizeof(BANKDATA)); + + m_names.clear(); + m_entries.reset(); + m_seekData.reset(); + m_waveData.reset(); + + #ifdef DIRECTX_ENABLE_XMA2 + if (m_xmaMemory) + { + ApuFree(m_xmaMemory); + m_xmaMemory = nullptr; + } + #endif + } + + HANDLE m_async; + ScopedHandle m_event; + OVERLAPPED m_request; + bool m_prepared; + + HEADER m_header; + BANKDATA m_data; + std::map m_names; + +private: + std::unique_ptr m_entries; + std::unique_ptr m_seekData; + std::unique_ptr m_waveData; + +#ifdef DIRECTX_ENABLE_XMA2 +public: + void* m_xmaMemory; +#endif +}; + + +_Use_decl_annotations_ +HRESULT WaveBankReader::Impl::Open(const wchar_t* szFileName) noexcept(false) +{ + Close(); + Clear(); + + m_prepared = false; + + m_event.reset(CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!m_event) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + CREATEFILE2_EXTENDED_PARAMETERS params = { sizeof(CREATEFILE2_EXTENDED_PARAMETERS), 0, 0, 0, {}, nullptr }; + params.dwFileAttributes = FILE_ATTRIBUTE_NORMAL; + params.dwFileFlags = FILE_FLAG_OVERLAPPED | FILE_FLAG_SEQUENTIAL_SCAN; + ScopedHandle hFile(safe_handle(CreateFile2(szFileName, + GENERIC_READ, + FILE_SHARE_READ, + OPEN_EXISTING, + ¶ms))); +#else + ScopedHandle hFile(safe_handle(CreateFileW(szFileName, + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + FILE_FLAG_OVERLAPPED | FILE_FLAG_SEQUENTIAL_SCAN, + nullptr))); +#endif + + if (!hFile) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // Read and verify header + OVERLAPPED request = {}; + request.hEvent = m_event.get(); + + bool wait = false; + if (!ReadFile(hFile.get(), &m_header, sizeof(m_header), nullptr, &request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + return HRESULT_FROM_WIN32(error); + wait = true; + } + + DWORD bytes; +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + BOOL result = GetOverlappedResultEx(hFile.get(), &request, &bytes, INFINITE, FALSE); +#else + if (wait) + (void)WaitForSingleObject(m_event.get(), INFINITE); + + BOOL result = GetOverlappedResult(hFile.get(), &request, &bytes, FALSE); +#endif + + if (!result || (bytes != sizeof(m_header))) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (m_header.dwSignature != HEADER::SIGNATURE && m_header.dwSignature != HEADER::BE_SIGNATURE) + { + return E_FAIL; + } + + bool be = (m_header.dwSignature == HEADER::BE_SIGNATURE); + if (be) + { + DebugTrace("INFO: \"%ls\" is a big-endian (Xbox 360) wave bank\n", szFileName); + m_header.BigEndian(); + } + + if (m_header.dwHeaderVersion != HEADER::VERSION) + { + return E_FAIL; + } + + // Load bank data + memset(&request, 0, sizeof(request)); + request.Offset = m_header.Segments[HEADER::SEGIDX_BANKDATA].dwOffset; + request.hEvent = m_event.get(); + + wait = false; + if (!ReadFile(hFile.get(), &m_data, sizeof(m_data), nullptr, &request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + return HRESULT_FROM_WIN32(error); + wait = true; + } + +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + result = GetOverlappedResultEx(hFile.get(), &request, &bytes, INFINITE, FALSE); +#else + if (wait) + (void)WaitForSingleObject(m_event.get(), INFINITE); + + result = GetOverlappedResult(hFile.get(), &request, &bytes, FALSE); +#endif + + if (!result || (bytes != sizeof(m_data))) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (be) + m_data.BigEndian(); + + if (!m_data.dwEntryCount) + { + return HRESULT_FROM_WIN32(ERROR_NO_DATA); + } + + if (m_data.dwFlags & BANKDATA::TYPE_STREAMING) + { + if (m_data.dwAlignment < ALIGNMENT_DVD) + return E_FAIL; + if (m_data.dwAlignment % DVD_SECTOR_SIZE) + return E_FAIL; + } + else if (m_data.dwAlignment < ALIGNMENT_MIN) + { + return E_FAIL; + } + + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + if (m_data.dwEntryMetaDataElementSize != sizeof(ENTRYCOMPACT)) + { + return E_FAIL; + } + + if (m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength > (MAX_COMPACT_DATA_SEGMENT_SIZE * m_data.dwAlignment)) + { + // Data segment is too large to be valid compact wavebank + return E_FAIL; + } + } + else + { + if (m_data.dwEntryMetaDataElementSize != sizeof(ENTRY)) + { + return E_FAIL; + } + } + + DWORD metadataBytes = m_header.Segments[HEADER::SEGIDX_ENTRYMETADATA].dwLength; + if (metadataBytes != (m_data.dwEntryCount * m_data.dwEntryMetaDataElementSize)) + { + return E_FAIL; + } + + // Load names + DWORD namesBytes = m_header.Segments[HEADER::SEGIDX_ENTRYNAMES].dwLength; + if (namesBytes > 0) + { + if (namesBytes >= (m_data.dwEntryNameElementSize * m_data.dwEntryCount)) + { + std::unique_ptr temp(new (std::nothrow) char[namesBytes]); + if (!temp) + return E_OUTOFMEMORY; + + memset(&request, 0, sizeof(request)); + request.Offset = m_header.Segments[HEADER::SEGIDX_ENTRYNAMES].dwOffset; + request.hEvent = m_event.get(); + + wait = false; + if (!ReadFile(hFile.get(), temp.get(), namesBytes, nullptr, &request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + return HRESULT_FROM_WIN32(error); + wait = true; + } + + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + result = GetOverlappedResultEx(hFile.get(), &request, &bytes, INFINITE, FALSE); + #else + if (wait) + (void)WaitForSingleObject(m_event.get(), INFINITE); + + result = GetOverlappedResult(hFile.get(), &request, &bytes, FALSE); + #endif + + if (!result || (namesBytes != bytes)) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + for (uint32_t j = 0; j < m_data.dwEntryCount; ++j) + { + DWORD n = m_data.dwEntryNameElementSize * j; + + char name[64] = {}; + strncpy_s(name, &temp[n], sizeof(name)); + + m_names[name] = j; + } + } + } + + // Load entries + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + m_entries.reset(reinterpret_cast(new (std::nothrow) ENTRYCOMPACT[m_data.dwEntryCount])); + } + else + { + m_entries.reset(reinterpret_cast(new (std::nothrow) ENTRY[m_data.dwEntryCount])); + } + if (!m_entries) + return E_OUTOFMEMORY; + + memset(&request, 0, sizeof(request)); + request.Offset = m_header.Segments[HEADER::SEGIDX_ENTRYMETADATA].dwOffset; + request.hEvent = m_event.get(); + + wait = false; + if (!ReadFile(hFile.get(), m_entries.get(), metadataBytes, nullptr, &request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + return HRESULT_FROM_WIN32(error); + wait = true; + } + +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + result = GetOverlappedResultEx(hFile.get(), &request, &bytes, INFINITE, FALSE); +#else + if (wait) + (void)WaitForSingleObject(m_event.get(), INFINITE); + + result = GetOverlappedResult(hFile.get(), &request, &bytes, FALSE); +#endif + + if (!result || (metadataBytes != bytes)) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (be) + { + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + auto ptr = reinterpret_cast(m_entries.get()); + for (size_t j = 0; j < m_data.dwEntryCount; ++j, ++ptr) + ptr->BigEndian(); + } + else + { + auto ptr = reinterpret_cast(m_entries.get()); + for (size_t j = 0; j < m_data.dwEntryCount; ++j, ++ptr) + ptr->BigEndian(); + } + } + + // Load seek tables (XMA2 / xWMA) + DWORD seekLen = m_header.Segments[HEADER::SEGIDX_SEEKTABLES].dwLength; + if (seekLen > 0) + { + m_seekData.reset(new (std::nothrow) uint8_t[seekLen]); + if (!m_seekData) + return E_OUTOFMEMORY; + + memset(&request, 0, sizeof(OVERLAPPED)); + request.Offset = m_header.Segments[HEADER::SEGIDX_SEEKTABLES].dwOffset; + request.hEvent = m_event.get(); + + wait = false; + if (!ReadFile(hFile.get(), m_seekData.get(), seekLen, nullptr, &request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + return HRESULT_FROM_WIN32(error); + wait = true; + } + + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + result = GetOverlappedResultEx(hFile.get(), &request, &bytes, INFINITE, FALSE); + #else + if (wait) + (void)WaitForSingleObject(m_event.get(), INFINITE); + + result = GetOverlappedResult(hFile.get(), &request, &bytes, FALSE); + #endif + + if (!result || (seekLen != bytes)) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (be) + { + auto ptr = reinterpret_cast(m_seekData.get()); + for (size_t j = 0; j < seekLen; j += 4, ++ptr) + { + *ptr = _byteswap_ulong(*ptr); + } + } + } + + DWORD waveLen = m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength; + if (!waveLen) + { + return HRESULT_FROM_WIN32(ERROR_NO_DATA); + } + + if (m_data.dwFlags & BANKDATA::TYPE_STREAMING) + { + // If streaming, reopen without buffering + hFile.reset(); + + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + CREATEFILE2_EXTENDED_PARAMETERS params2 = { sizeof(CREATEFILE2_EXTENDED_PARAMETERS), 0, 0, 0, {}, nullptr }; + params2.dwFileAttributes = FILE_ATTRIBUTE_NORMAL; + params2.dwFileFlags = FILE_FLAG_OVERLAPPED | FILE_FLAG_NO_BUFFERING; + m_async = CreateFile2(szFileName, + GENERIC_READ, + FILE_SHARE_READ, + OPEN_EXISTING, + ¶ms2); + #else + m_async = CreateFileW(szFileName, + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + FILE_FLAG_OVERLAPPED | FILE_FLAG_NO_BUFFERING, + nullptr); + #endif + + if (m_async == INVALID_HANDLE_VALUE) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + m_prepared = true; + } + else + { + // If in-memory, kick off read of wave data + void* dest = nullptr; + + #ifdef DIRECTX_ENABLE_XMA2 + bool xma = false; + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + if (m_data.CompactFormat.wFormatTag == MINIWAVEFORMAT::TAG_XMA) + xma = true; + } + else + { + for (uint32_t j = 0; j < m_data.dwEntryCount; ++j) + { + auto& entry = reinterpret_cast(m_entries.get())[j]; + if (entry.Format.wFormatTag == MINIWAVEFORMAT::TAG_XMA) + { + xma = true; + break; + } + } + } + + if (xma) + { + HRESULT hr = ApuAlloc(&m_xmaMemory, nullptr, waveLen, SHAPE_XMA_INPUT_BUFFER_ALIGNMENT); + if (FAILED(hr)) + { + DebugTrace("ERROR: ApuAlloc failed. Did you allocate a large enough heap with ApuCreateHeap for all your XMA wave data?\n"); + return hr; + } + + dest = m_xmaMemory; + } + else + #endif // XMA2 + { + m_waveData.reset(new (std::nothrow) uint8_t[waveLen]); + if (!m_waveData) + return E_OUTOFMEMORY; + + dest = m_waveData.get(); + } + + memset(&m_request, 0, sizeof(OVERLAPPED)); + m_request.Offset = m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwOffset; + m_request.hEvent = m_event.get(); + + if (!ReadFile(hFile.get(), dest, waveLen, nullptr, &m_request)) + { + DWORD error = GetLastError(); + if (error != ERROR_IO_PENDING) + return HRESULT_FROM_WIN32(error); + } + else + { + m_prepared = true; + memset(&m_request, 0, sizeof(OVERLAPPED)); + } + + m_async = hFile.release(); + } + + return S_OK; +} + + +void WaveBankReader::Impl::Close() noexcept +{ + if (m_async != INVALID_HANDLE_VALUE) + { + if (m_request.hEvent) + { + DWORD bytes; + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + (void)GetOverlappedResultEx(m_async, &m_request, &bytes, INFINITE, FALSE); + #else + (void)WaitForSingleObject(m_request.hEvent, INFINITE); + + (void)GetOverlappedResult(m_async, &m_request, &bytes, FALSE); + #endif + } + + CloseHandle(m_async); + m_async = INVALID_HANDLE_VALUE; + } + m_event.reset(); + +#ifdef DIRECTX_ENABLE_XMA2 + if (m_xmaMemory) + { + ApuFree(m_xmaMemory); + m_xmaMemory = nullptr; + } +#endif +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::Impl::GetFormat(uint32_t index, WAVEFORMATEX* pFormat, size_t maxsize) const noexcept +{ + if (!pFormat || !maxsize) + return E_INVALIDARG; + + if (index >= m_data.dwEntryCount || !m_entries) + { + return E_FAIL; + } + + auto& miniFmt = (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) ? m_data.CompactFormat : (reinterpret_cast(m_entries.get())[index].Format); + + switch (miniFmt.wFormatTag) + { + case MINIWAVEFORMAT::TAG_PCM: + if (maxsize < sizeof(PCMWAVEFORMAT)) + return HRESULT_FROM_WIN32(ERROR_MORE_DATA); + + pFormat->wFormatTag = WAVE_FORMAT_PCM; + + if (maxsize >= sizeof(WAVEFORMATEX)) + { + pFormat->cbSize = 0; + } + break; + + case MINIWAVEFORMAT::TAG_ADPCM: + if (maxsize < (sizeof(WAVEFORMATEX) + 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/)) + return HRESULT_FROM_WIN32(ERROR_MORE_DATA); + + pFormat->wFormatTag = WAVE_FORMAT_ADPCM; + pFormat->cbSize = 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/; + { + auto adpcmFmt = reinterpret_cast(pFormat); + adpcmFmt->wSamplesPerBlock = static_cast(miniFmt.AdpcmSamplesPerBlock()); + miniFmt.AdpcmFillCoefficientTable(adpcmFmt); + } + break; + + case MINIWAVEFORMAT::TAG_WMA: + if (maxsize < sizeof(WAVEFORMATEX)) + return HRESULT_FROM_WIN32(ERROR_MORE_DATA); + + pFormat->wFormatTag = static_cast((miniFmt.wBitsPerSample & 0x1) ? WAVE_FORMAT_WMAUDIO3 : WAVE_FORMAT_WMAUDIO2); + pFormat->cbSize = 0; + break; + + case MINIWAVEFORMAT::TAG_XMA: // XMA2 is supported by Xbox One + #ifdef DIRECTX_ENABLE_XMA2 + if (maxsize < sizeof(XMA2WAVEFORMATEX)) + return HRESULT_FROM_WIN32(ERROR_MORE_DATA); + + pFormat->wFormatTag = WAVE_FORMAT_XMA2; + pFormat->cbSize = sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX); + { + auto xmaFmt = reinterpret_cast(pFormat); + + xmaFmt->NumStreams = static_cast((miniFmt.nChannels + 1) / 2); + xmaFmt->BytesPerBlock = 65536 /* XACT_FIXED_XMA_BLOCK_SIZE */; + xmaFmt->EncoderVersion = 4 /* XMAENCODER_VERSION_XMA2 */; + + auto seekTable = FindSeekTable(index, m_seekData.get(), m_header, m_data); + if (seekTable) + { + xmaFmt->BlockCount = static_cast(*seekTable); + } + else + { + xmaFmt->BlockCount = 0; + } + + switch (miniFmt.nChannels) + { + case 1: xmaFmt->ChannelMask = SPEAKER_MONO; break; + case 2: xmaFmt->ChannelMask = SPEAKER_STEREO; break; + case 3: xmaFmt->ChannelMask = SPEAKER_2POINT1; break; + case 4: xmaFmt->ChannelMask = SPEAKER_QUAD; break; + case 5: xmaFmt->ChannelMask = SPEAKER_4POINT1; break; + case 6: xmaFmt->ChannelMask = SPEAKER_5POINT1; break; + case 7: xmaFmt->ChannelMask = SPEAKER_5POINT1 | SPEAKER_BACK_CENTER; break; + case 8: xmaFmt->ChannelMask = SPEAKER_7POINT1; break; + default: xmaFmt->ChannelMask = DWORD(-1); break; + } + + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + auto& entry = reinterpret_cast(m_entries.get())[index]; + + DWORD dwOffset, dwLength; + entry.ComputeLocations(dwOffset, dwLength, index, m_header, m_data, reinterpret_cast(m_entries.get())); + + xmaFmt->SamplesEncoded = entry.GetDuration(dwLength, m_data, seekTable); + + xmaFmt->PlayBegin = xmaFmt->PlayLength = + xmaFmt->LoopBegin = xmaFmt->LoopLength = xmaFmt->LoopCount = 0; + } + else + { + auto& entry = reinterpret_cast(m_entries.get())[index]; + + xmaFmt->SamplesEncoded = entry.Duration; + xmaFmt->PlayBegin = 0; + xmaFmt->PlayLength = entry.PlayRegion.dwLength; + + if (entry.LoopRegion.dwTotalSamples > 0) + { + xmaFmt->LoopBegin = entry.LoopRegion.dwStartSample; + xmaFmt->LoopLength = entry.LoopRegion.dwTotalSamples; + xmaFmt->LoopCount = 0xff /* XACTLOOPCOUNT_INFINITE */; + } + else + { + xmaFmt->LoopBegin = xmaFmt->LoopLength = xmaFmt->LoopCount = 0; + } + } + } + break; + #else + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + #endif + + default: + return E_FAIL; + } + + pFormat->nChannels = miniFmt.nChannels; + pFormat->wBitsPerSample = miniFmt.BitsPerSample(); + pFormat->nBlockAlign = static_cast(miniFmt.BlockAlign()); + pFormat->nSamplesPerSec = miniFmt.nSamplesPerSec; + pFormat->nAvgBytesPerSec = miniFmt.AvgBytesPerSec(); + + return S_OK; +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::Impl::GetWaveData(uint32_t index, const uint8_t** pData, uint32_t& dataSize) const noexcept +{ + if (!pData) + return E_INVALIDARG; + + if (index >= m_data.dwEntryCount || !m_entries) + { + return E_FAIL; + } + +#ifdef DIRECTX_ENABLE_XMA2 + const uint8_t* waveData = (m_xmaMemory) ? reinterpret_cast(m_xmaMemory) : m_waveData.get(); +#else + const uint8_t* waveData = m_waveData.get(); +#endif + + if (!waveData) + return E_FAIL; + + if (m_data.dwFlags & BANKDATA::TYPE_STREAMING) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + if (!m_prepared) + { + return HRESULT_FROM_WIN32(ERROR_IO_INCOMPLETE); + } + + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + auto& entry = reinterpret_cast(m_entries.get())[index]; + + DWORD dwOffset, dwLength; + entry.ComputeLocations(dwOffset, dwLength, index, m_header, m_data, reinterpret_cast(m_entries.get())); + + if ((uint64_t(dwOffset) + uint64_t(dwLength)) > uint64_t(m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength)) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + *pData = &waveData[dwOffset]; + dataSize = dwLength; + } + else + { + auto& entry = reinterpret_cast(m_entries.get())[index]; + + if ((uint64_t(entry.PlayRegion.dwOffset) + uint64_t(entry.PlayRegion.dwLength)) > uint64_t(m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength)) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + *pData = &waveData[entry.PlayRegion.dwOffset]; + dataSize = entry.PlayRegion.dwLength; + } + + return S_OK; +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::Impl::GetSeekTable(uint32_t index, const uint32_t** pData, uint32_t& dataCount, uint32_t& tag) const noexcept +{ + if (!pData) + return E_INVALIDARG; + + *pData = nullptr; + dataCount = 0; + tag = 0; + + if (index >= m_data.dwEntryCount || !m_entries) + { + return E_FAIL; + } + + if (!m_seekData) + return S_OK; + + auto& miniFmt = (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) ? m_data.CompactFormat : (reinterpret_cast(m_entries.get())[index].Format); + + switch (miniFmt.wFormatTag) + { + case MINIWAVEFORMAT::TAG_WMA: + tag = static_cast((miniFmt.wBitsPerSample & 0x1) ? WAVE_FORMAT_WMAUDIO3 : WAVE_FORMAT_WMAUDIO2); + break; + + case MINIWAVEFORMAT::TAG_XMA: + tag = 0x166 /* WAVE_FORMAT_XMA2 */; + break; + + default: + return S_OK; + } + + auto seekTable = FindSeekTable(index, m_seekData.get(), m_header, m_data); + if (!seekTable) + return S_OK; + + dataCount = *seekTable; + *pData = seekTable + 1; + + return S_OK; +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::Impl::GetMetadata(uint32_t index, Metadata& metadata) const noexcept +{ + if (index >= m_data.dwEntryCount || !m_entries) + { + return E_FAIL; + } + + if (m_data.dwFlags & BANKDATA::FLAGS_COMPACT) + { + auto& entry = reinterpret_cast(m_entries.get())[index]; + + DWORD dwOffset, dwLength; + entry.ComputeLocations(dwOffset, dwLength, index, m_header, m_data, reinterpret_cast(m_entries.get())); + + auto seekTable = FindSeekTable(index, m_seekData.get(), m_header, m_data); + metadata.duration = entry.GetDuration(dwLength, m_data, seekTable); + metadata.loopStart = metadata.loopLength = 0; + metadata.offsetBytes = dwOffset; + metadata.lengthBytes = dwLength; + } + else + { + auto& entry = reinterpret_cast(m_entries.get())[index]; + + metadata.duration = entry.Duration; + metadata.loopStart = entry.LoopRegion.dwStartSample; + metadata.loopLength = entry.LoopRegion.dwTotalSamples; + metadata.offsetBytes = entry.PlayRegion.dwOffset; + metadata.lengthBytes = entry.PlayRegion.dwLength; + } + + if (m_data.dwFlags & BANKDATA::TYPE_STREAMING) + { + uint64_t offset = uint64_t(metadata.offsetBytes) + uint64_t(m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwOffset); + if (offset > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + + metadata.offsetBytes = static_cast(offset); + } + + return S_OK; +} + + +bool WaveBankReader::Impl::UpdatePrepared() noexcept +{ + if (m_prepared) + return true; + + if (m_async == INVALID_HANDLE_VALUE) + return false; + + if (m_request.hEvent) + { + + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + DWORD bytes; + BOOL result = GetOverlappedResultEx(m_async, &m_request, &bytes, 0, FALSE); + #else + bool result = HasOverlappedIoCompleted(&m_request); + #endif + if (result) + { + m_prepared = true; + + memset(&m_request, 0, sizeof(OVERLAPPED)); + } + } + + return m_prepared; +} + + + +//-------------------------------------------------------------------------------------- +WaveBankReader::WaveBankReader() noexcept(false) : + pImpl(std::make_unique()) +{ +} + + +WaveBankReader::~WaveBankReader() +{ +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::Open(const wchar_t* szFileName) noexcept +{ + return pImpl->Open(szFileName); +} + + +_Use_decl_annotations_ +uint32_t WaveBankReader::Find(const char* name) const +{ + auto it = pImpl->m_names.find(name); + if (it != pImpl->m_names.cend()) + { + return it->second; + } + + return uint32_t(-1); +} + + +bool WaveBankReader::IsPrepared() noexcept +{ + if (pImpl->m_prepared) + return true; + + return pImpl->UpdatePrepared(); +} + + +void WaveBankReader::WaitOnPrepare() noexcept +{ + if (pImpl->m_prepared) + return; + + if (pImpl->m_request.hEvent) + { + (void)WaitForSingleObjectEx(pImpl->m_request.hEvent, INFINITE, FALSE); + + pImpl->UpdatePrepared(); + } +} + + +bool WaveBankReader::HasNames() const noexcept +{ + return !pImpl->m_names.empty(); +} + + +bool WaveBankReader::IsStreamingBank() const noexcept +{ + return (pImpl->m_data.dwFlags & BANKDATA::TYPE_STREAMING) != 0; +} + + +#ifdef DIRECTX_ENABLE_XMA2 +bool WaveBankReader::HasXMA() const noexcept +{ + return (pImpl->m_xmaMemory != nullptr); +} +#endif + + +const char* WaveBankReader::BankName() const noexcept +{ + return pImpl->m_data.szBankName; +} + + +uint32_t WaveBankReader::Count() const noexcept +{ + return pImpl->m_data.dwEntryCount; +} + + +uint32_t WaveBankReader::BankAudioSize() const noexcept +{ + return pImpl->m_header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength; +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::GetFormat(uint32_t index, WAVEFORMATEX* pFormat, size_t maxsize) const noexcept +{ + return pImpl->GetFormat(index, pFormat, maxsize); +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::GetWaveData(uint32_t index, const uint8_t** pData, uint32_t& dataSize) const noexcept +{ + return pImpl->GetWaveData(index, pData, dataSize); +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::GetSeekTable(uint32_t index, const uint32_t** pData, uint32_t& dataCount, uint32_t& tag) const noexcept +{ + return pImpl->GetSeekTable(index, pData, dataCount, tag); +} + + +_Use_decl_annotations_ +HRESULT WaveBankReader::GetMetadata(uint32_t index, Metadata& metadata) const noexcept +{ + return pImpl->GetMetadata(index, metadata); +} + + +HANDLE WaveBankReader::GetAsyncHandle() const noexcept +{ + return (pImpl->m_data.dwFlags & BANKDATA::TYPE_STREAMING) ? pImpl->m_async : INVALID_HANDLE_VALUE; +} diff --git a/Sdk/External/DirectXTK/Audio/WaveBankReader.h b/Sdk/External/DirectXTK/Audio/WaveBankReader.h new file mode 100644 index 0000000..d0adedc --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/WaveBankReader.h @@ -0,0 +1,81 @@ +//-------------------------------------------------------------------------------------- +// File: WaveBankReader.h +// +// Functions for loading audio data from Wave Banks +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#pragma once + +#include +#include + +#include +#include + + +namespace DirectX +{ + class WaveBankReader + { + public: + WaveBankReader() noexcept(false); + + WaveBankReader(WaveBankReader&&) = default; + WaveBankReader& operator= (WaveBankReader&&) = default; + + WaveBankReader(WaveBankReader const&) = delete; + WaveBankReader& operator= (WaveBankReader const&) = delete; + + ~WaveBankReader(); + + HRESULT Open(_In_z_ const wchar_t* szFileName) noexcept; + + uint32_t Find(_In_z_ const char* name) const; + + bool IsPrepared() noexcept; + void WaitOnPrepare() noexcept; + + bool HasNames() const noexcept; + bool IsStreamingBank() const noexcept; + + #if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) + bool HasXMA() const noexcept; + #endif + + const char* BankName() const noexcept; + + uint32_t Count() const noexcept; + + uint32_t BankAudioSize() const noexcept; + + HRESULT GetFormat(_In_ uint32_t index, _Out_writes_bytes_(maxsize) WAVEFORMATEX* pFormat, _In_ size_t maxsize) const noexcept; + + HRESULT GetWaveData(_In_ uint32_t index, _Outptr_ const uint8_t** pData, _Out_ uint32_t& dataSize) const noexcept; + + HRESULT GetSeekTable(_In_ uint32_t index, _Out_ const uint32_t** pData, _Out_ uint32_t& dataCount, _Out_ uint32_t& tag) const noexcept; + + HANDLE GetAsyncHandle() const noexcept; + + struct Metadata + { + uint32_t duration; + uint32_t loopStart; + uint32_t loopLength; + uint32_t offsetBytes; + uint32_t lengthBytes; + }; + HRESULT GetMetadata(_In_ uint32_t index, _Out_ Metadata& metadata) const noexcept; + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} diff --git a/Sdk/External/DirectXTK/Audio/packages.config b/Sdk/External/DirectXTK/Audio/packages.config new file mode 100644 index 0000000..1f8347f --- /dev/null +++ b/Sdk/External/DirectXTK/Audio/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/CMakeLists.txt b/Sdk/External/DirectXTK/CMakeLists.txt new file mode 100644 index 0000000..bc64afd --- /dev/null +++ b/Sdk/External/DirectXTK/CMakeLists.txt @@ -0,0 +1,234 @@ +# DirectX Tool Kit for DirectX 11 +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# +# http://go.microsoft.com/fwlink/?LinkId=248929 + +cmake_minimum_required (VERSION 3.11) + +project (DirectXTK LANGUAGES CXX) + +option(BUILD_XAUDIO_WIN10 "Build for XAudio 2.9" OFF) +option(BUILD_XAUDIO_WIN8 "Build for XAudio 2.8" ON) + +option(ENABLE_CODE_ANALYSIS "Use Static Code Analysis on build" OFF) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/CMake") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/CMake") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/CMake") + +set(LIBRARY_SOURCES + Inc/BufferHelpers.h + Inc/CommonStates.h + Inc/DDSTextureLoader.h + Inc/DirectXHelpers.h + Inc/Effects.h + Inc/GamePad.h + Inc/GeometricPrimitive.h + Inc/GraphicsMemory.h + Inc/Keyboard.h + Inc/Model.h + Inc/Mouse.h + Inc/PostProcess.h + Inc/PrimitiveBatch.h + Inc/ScreenGrab.h + Inc/SimpleMath.h + Inc/SimpleMath.inl + Inc/SpriteBatch.h + Inc/SpriteFont.h + Inc/VertexTypes.h + Inc/WICTextureLoader.h + Src/AlignedNew.h + Src/AlphaTestEffect.cpp + Src/BasicEffect.cpp + Src/BasicPostProcess.cpp + Src/Bezier.h + Src/BinaryReader.cpp + Src/BinaryReader.h + Src/BufferHelpers.cpp + Src/CommonStates.cpp + Src/dds.h + Src/DDSTextureLoader.cpp + Src/DebugEffect.cpp + Src/DemandCreate.h + Src/DGSLEffect.cpp + Src/DGSLEffectFactory.cpp + Src/DirectXHelpers.cpp + Src/DualPostProcess.cpp + Src/DualTextureEffect.cpp + Src/EffectCommon.cpp + Src/EffectCommon.h + Src/EffectFactory.cpp + Src/EnvironmentMapEffect.cpp + Src/GamePad.cpp + Src/GeometricPrimitive.cpp + Src/Geometry.h + Src/Geometry.cpp + Src/GraphicsMemory.cpp + Src/Keyboard.cpp + Src/LoaderHelpers.h + Src/Model.cpp + Src/ModelLoadCMO.cpp + Src/ModelLoadSDKMESH.cpp + Src/ModelLoadVBO.cpp + Src/Mouse.cpp + Src/NormalMapEffect.cpp + Src/PBREffect.cpp + Src/PBREffectFactory.cpp + Src/pch.h + Src/PlatformHelpers.h + Src/PrimitiveBatch.cpp + Src/ScreenGrab.cpp + Src/SDKMesh.h + Src/SharedResourcePool.h + Src/SimpleMath.cpp + Src/SkinnedEffect.cpp + Src/SpriteBatch.cpp + Src/SpriteFont.cpp + Src/TeapotData.inc + Src/ToneMapPostProcess.cpp + Src/vbo.h + Src/VertexTypes.cpp + Src/WICTextureLoader.cpp) + +set(SHADER_SOURCES + Src/Shaders/AlphaTestEffect.fx + Src/Shaders/BasicEffect.fx + Src/Shaders/Common.fxh + Src/Shaders/DebugEffect.fx + Src/Shaders/DGSLEffect.fx + Src/Shaders/DGSLLambert.hlsl + Src/Shaders/DGSLPhong.hlsl + Src/Shaders/DGSLUnlit.hlsl + Src/Shaders/DualTextureEffect.fx + Src/Shaders/EnvironmentMapEffect.fx + Src/Shaders/Lighting.fxh + Src/Shaders/NormalMapEffect.fx + Src/Shaders/PBRCommon.fxh + Src/Shaders/PBREffect.fx + Src/Shaders/PixelPacking_Velocity.hlsli + Src/Shaders/PostProcess.fx + Src/Shaders/SkinnedEffect.fx + Src/Shaders/SpriteEffect.fx + Src/Shaders/Structures.fxh + Src/Shaders/ToneMap.fx + Src/Shaders/Utilities.fxh) + +if((BUILD_XAUDIO_WIN10) OR (BUILD_XAUDIO_WIN8)) + set(LIBRARY_SOURCES ${LIBRARY_SOURCES} + Inc/Audio.h + Audio/AudioEngine.cpp + Audio/DynamicSoundEffectInstance.cpp + Audio/SoundCommon.cpp + Audio/SoundCommon.h + Audio/SoundEffect.cpp + Audio/SoundEffectInstance.cpp + Audio/SoundStreamInstance.cpp + Audio/WaveBank.cpp + Audio/WaveBankReader.cpp + Audio/WaveBankReader.h + Audio/WAVFileReader.cpp + Audio/WAVFileReader.h) +endif() + +add_library(${PROJECT_NAME} STATIC ${LIBRARY_SOURCES} Src/Shaders/Compiled/SpriteEffect_SpriteVertexShader.inc) + +if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16") + target_precompile_headers(${PROJECT_NAME} PRIVATE Src/pch.h) +endif() + +add_custom_command( + OUTPUT "${PROJECT_SOURCE_DIR}/Src/Shaders/Compiled/SpriteEffect_SpriteVertexShader.inc" + MAIN_DEPENDENCY "${PROJECT_SOURCE_DIR}/Src/Shaders/CompileShaders.cmd" + DEPENDS ${SHADER_SOURCES} + COMMENT "Generating HLSL shaders..." + COMMAND "CompileShaders.cmd" + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}/Src/Shaders" + USES_TERMINAL) + +source_group(Audio REGULAR_EXPRESSION Audio/*.*) +source_group(Inc REGULAR_EXPRESSION Inc/*.*) +source_group(Src REGULAR_EXPRESSION Src/*.*) + +target_include_directories(${PROJECT_NAME} PUBLIC Inc) +target_include_directories(${PROJECT_NAME} PRIVATE Src) + +if((BUILD_XAUDIO_WIN10) OR (BUILD_XAUDIO_WIN8)) + target_include_directories(${PROJECT_NAME} PRIVATE Audio) +endif() + +if(MSVC) + # Use max Warning Level + string(REPLACE "/W3 " "/Wall " CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + string(REPLACE "/W3 " "/Wall " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + string(REPLACE "/W3 " "/Wall " CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + + # Model uses dynamic_cast, so we need /GR (Enable RTTI) + + # Library needs /EHsc (Enable C++ exceptions) +endif() + +add_executable(xwbtool + xwbtool/xwbtool.cpp + Audio/WAVFileReader.cpp + Audio/WAVFileReader.h) +target_include_directories(xwbtool PRIVATE Audio Src) +target_link_libraries(xwbtool version.lib) +source_group(xwbtool REGULAR_EXPRESSION XWBTool/*.*) + +if(MSVC) + target_compile_options(${PROJECT_NAME} PRIVATE /fp:fast) + target_compile_options(xwbtool PRIVATE /fp:fast) + + if(ENABLE_CODE_ANALYSIS) + target_compile_options(${PROJECT_NAME} PRIVATE /analyze) + target_compile_options(xwbtool PRIVATE /analyze) + endif() + + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 19.26) + target_compile_options(${PROJECT_NAME} PRIVATE /Zc:preprocessor /wd5104 /wd5105) + target_compile_options(xwbtool PRIVATE /Zc:preprocessor /wd5104 /wd5105) + endif() + + if (${CMAKE_SIZEOF_VOID_P} EQUAL "4") + target_compile_options(${PROJECT_NAME} PRIVATE /arch:SSE2) + target_compile_options(xwbtool PRIVATE /arch:SSE2) + endif() +endif() + +if ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) + set(WarningsLib "-Wpedantic" "-Wextra") + target_compile_options(${PROJECT_NAME} PRIVATE ${WarningsLib}) + + set(WarningsEXE ${WarningsLib} "-Wno-c++98-compat" "-Wno-c++98-compat-pedantic" + "-Wno-double-promotion" "-Wno-exit-time-destructors" "-Wno-gnu-anonymous-struct" + "-Wno-missing-prototypes" "-Wno-nested-anon-types" "-Wno-unused-const-variable") + target_compile_options(xwbtool PRIVATE ${WarningsEXE}) +endif() +if ( CMAKE_CXX_COMPILER_ID MATCHES "MSVC" ) + target_compile_options(${PROJECT_NAME} PRIVATE /permissive- /JMC- /Zc:__cplusplus) + target_compile_options(xwbtool PRIVATE /permissive- /Zc:__cplusplus) + + set(WarningsEXE "/wd4365" "/wd4710" "/wd4820" "/wd5039" "/wd5045") + target_compile_options(xwbtool PRIVATE ${WarningsEXE}) +endif() + +if(WIN32) + target_compile_definitions(${PROJECT_NAME} PRIVATE _UNICODE UNICODE) + target_compile_definitions(xwbtool PRIVATE _UNICODE UNICODE _WIN32_WINNT=0x0601) + + if(BUILD_XAUDIO_WIN10) + target_compile_definitions(${PROJECT_NAME} PRIVATE _WIN32_WINNT=0x0A00) + elseif(BUILD_XAUDIO_WIN8) + target_compile_definitions(${PROJECT_NAME} PRIVATE _WIN32_WINNT=0x0602) + else() + target_compile_definitions(${PROJECT_NAME} PRIVATE _WIN32_WINNT=0x0601) + endif() +endif() + +set_property(DIRECTORY PROPERTY VS_STARTUP_PROJECT xwbtool) diff --git a/Sdk/External/DirectXTK/CMakeSettings.json b/Sdk/External/DirectXTK/CMakeSettings.json new file mode 100644 index 0000000..323627d --- /dev/null +++ b/Sdk/External/DirectXTK/CMakeSettings.json @@ -0,0 +1,64 @@ +{ + "configurations": [ + { + "name": "x86-Clang-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x86" ], + "variables": [] + }, + { + "name": "x86-Clang-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x86" ], + "variables": [] + }, + { + "name": "x64-Clang-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x64" ], + "variables": [] + }, + { + "name": "x64-Clang-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x64" ], + "variables": [] + }, + { + "name": "x64-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "msvc_x64_x64" ], + "variables": [] + } + ] +} \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.sln b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.sln new file mode 100644 index 0000000..fd96704 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.sln @@ -0,0 +1,84 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.27703.2000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_Desktop_2017", "DirectXTK_Desktop_2017.vcxproj", "{E0B52AE7-E160-4D32-BF3F-910B785E5A8E}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeSpriteFont", "MakeSpriteFont\MakeSpriteFont.csproj", "{7329B02D-C504-482A-A156-181D48CE493C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTKAudio_Desktop_2017_Win8", "Audio\DirectXTKAudio_Desktop_2017_Win8.vcxproj", "{4F150A30-CECB-49D1-8283-6A3F57438CF5}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XWBTool_Desktop_2017", "XWBTool\XWBTool_Desktop_2017.vcxproj", "{C7AB4186-54B2-4244-A533-77494763EA1D}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{0317D9F7-1BFB-4422-8B2F-670E7956F12D}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Mixed Platforms = Debug|Mixed Platforms + Debug|x86 = Debug|x86 + Debug|x64 = Debug|x64 + Release|Mixed Platforms = Release|Mixed Platforms + Release|x86 = Release|x86 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.ActiveCfg = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.Build.0 = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.ActiveCfg = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.Build.0 = Release|x64 + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.Build.0 = Release|Any CPU + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.Build.0 = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.ActiveCfg = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.Build.0 = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|Mixed Platforms.Build.0 = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.Build.0 = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.ActiveCfg = Release|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.Build.0 = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.ActiveCfg = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.Build.0 = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.ActiveCfg = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A26A07B7-AF41-47A4-B21F-8C772153EAA4} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj new file mode 100644 index 0000000..06aecc7 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj @@ -0,0 +1,342 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + Document + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + Document + + + + + Document + + + + + Document + + + + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E} + Win32Proj + DirectXTK + 10.0.17763.0 + x64 + + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + + + + + + + + + + + + + + + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + + + + <_ATGFXCPath>$(WindowsSDK_ExecutablePath_x64.Split(';')[0]) + <_ATGFXCPath>$(_ATGFXCPath.Replace("x64","")) + <_ATGFXCPath Condition="'$(_ATGFXCPath)' != '' and !HasTrailingSlash('$(_ATGFXCPath)')">$(_ATGFXCPath)\ + + + + <_ATGFXCPath /> + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/*.inc" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/*.pdb" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj.filters new file mode 100644 index 0000000..bd8bc30 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017.vcxproj.filters @@ -0,0 +1,315 @@ + + + + + {771f5f80-d173-49c3-8afb-790e8f7cb0ce} + + + {c52e19b6-8703-49a1-9b36-101a05b4745d} + + + {e07ba3bc-b69f-4ded-9ffa-3b47427e9fef} + + + {28e934ed-a6d7-4c30-b064-c72c50c99626} + + + {28d5fa16-99e2-471c-8cd8-2020e81f0024} + + + {34676173-31f5-4435-b90d-a0ad4f7d163c} + + + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Inc + + + Src + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.sln b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.sln new file mode 100644 index 0000000..a2ebffa --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.sln @@ -0,0 +1,84 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.27703.2000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_Desktop_2017_Win10", "DirectXTK_Desktop_2017_Win10.vcxproj", "{E0B52AE7-E160-4D32-BF3F-910B785E5A8E}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeSpriteFont", "MakeSpriteFont\MakeSpriteFont.csproj", "{7329B02D-C504-482A-A156-181D48CE493C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XWBTool_Desktop_2017", "XWBTool\XWBTool_Desktop_2017.vcxproj", "{C7AB4186-54B2-4244-A533-77494763EA1D}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{BD5A62C9-FE7B-4491-82C2-BD46EA64D1C8}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM64 = Debug|ARM64 + Debug|Mixed Platforms = Debug|Mixed Platforms + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|ARM64 = Release|ARM64 + Release|Mixed Platforms = Release|Mixed Platforms + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|ARM64.Build.0 = Debug|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.ActiveCfg = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.Build.0 = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|ARM64.ActiveCfg = Release|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|ARM64.Build.0 = Release|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.ActiveCfg = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.Build.0 = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.Build.0 = Release|Win32 + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|ARM64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|ARM64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|ARM64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|ARM64.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.Build.0 = Release|Any CPU + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|ARM64.Build.0 = Debug|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.ActiveCfg = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.Build.0 = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|ARM64.ActiveCfg = Release|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|ARM64.Build.0 = Release|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.ActiveCfg = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.Build.0 = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {0F7A0290-0AF9-47AB-A91D-1346B9433995} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj new file mode 100644 index 0000000..416daa0 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj @@ -0,0 +1,434 @@ + + + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + Document + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + Document + + + + + Document + + + + + Document + + + + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E} + Win32Proj + DirectXTK + 10.0.17763.0 + x64 + + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + true + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + StaticLibrary + false + v141 + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2017_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + false + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus /Zc:twoPhase- %(AdditionalOptions) + + + Windows + true + true + true + + + + + + + + <_ATGFXCPath>$(WindowsSDK_ExecutablePath_x64.Split(';')[0]) + <_ATGFXCPath>$(_ATGFXCPath.Replace("x64","")) + <_ATGFXCPath Condition="'$(_ATGFXCPath)' != '' and !HasTrailingSlash('$(_ATGFXCPath)')">$(_ATGFXCPath)\ + + + + <_ATGFXCPath /> + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/*.inc" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/*.pdb" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj.filters new file mode 100644 index 0000000..bcd441b --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win10.vcxproj.filters @@ -0,0 +1,357 @@ + + + + + {771f5f80-d173-49c3-8afb-790e8f7cb0ce} + + + {c52e19b6-8703-49a1-9b36-101a05b4745d} + + + {e07ba3bc-b69f-4ded-9ffa-3b47427e9fef} + + + {28e934ed-a6d7-4c30-b064-c72c50c99626} + + + {28d5fa16-99e2-471c-8cd8-2020e81f0024} + + + {34676173-31f5-4435-b90d-a0ad4f7d163c} + + + {5d47b946-09b9-4477-934f-ca45fba9845d} + + + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Src + + + Inc + + + Src + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Audio + + + Audio + + + Audio + + + Audio + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Src + + + Src + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win7.sln b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win7.sln new file mode 100644 index 0000000..51d85e8 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2017_Win7.sln @@ -0,0 +1,73 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.1000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_Desktop_2017", "DirectXTK_Desktop_2017.vcxproj", "{E0B52AE7-E160-4D32-BF3F-910B785E5A8E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTKAudio_Desktop_2017_Win7", "Audio\DirectXTKAudio_Desktop_2017_Win7.vcxproj", "{4F150A30-CECB-49D1-8283-6A3F57438CF5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeSpriteFont", "MakeSpriteFont\MakeSpriteFont.csproj", "{7329B02D-C504-482A-A156-181D48CE493C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xwbtool_Desktop_2017", "XWBTool\xwbtool_Desktop_2017.vcxproj", "{C7AB4186-54B2-4244-A533-77494763EA1D}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.ActiveCfg = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.Build.0 = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.Build.0 = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|Any CPU.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.ActiveCfg = Release|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.Build.0 = Release|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.ActiveCfg = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.Build.0 = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Any CPU.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.ActiveCfg = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.Build.0 = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.Build.0 = Release|Win32 + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Any CPU.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.Build.0 = Release|Any CPU + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.ActiveCfg = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.Build.0 = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Any CPU.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.ActiveCfg = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.Build.0 = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {3B774DEE-1D2A-4EEF-B8FA-75BDC1DA64DE} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.sln b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.sln new file mode 100644 index 0000000..3f4f0bd --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.sln @@ -0,0 +1,84 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 15.0.27703.2000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_Desktop_2019", "DirectXTK_Desktop_2019.vcxproj", "{E0B52AE7-E160-4D32-BF3F-910B785E5A8E}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeSpriteFont", "MakeSpriteFont\MakeSpriteFont.csproj", "{7329B02D-C504-482A-A156-181D48CE493C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTKAudio_Desktop_2019_Win8", "Audio\DirectXTKAudio_Desktop_2019_Win8.vcxproj", "{4F150A30-CECB-49D1-8283-6A3F57438CF5}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XWBTool_Desktop_2019", "XWBTool\XWBTool_Desktop_2019.vcxproj", "{C7AB4186-54B2-4244-A533-77494763EA1D}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{0317D9F7-1BFB-4422-8B2F-670E7956F12D}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Mixed Platforms = Debug|Mixed Platforms + Debug|x86 = Debug|x86 + Debug|x64 = Debug|x64 + Release|Mixed Platforms = Release|Mixed Platforms + Release|x86 = Release|x86 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.ActiveCfg = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.Build.0 = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.ActiveCfg = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.Build.0 = Release|x64 + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.Build.0 = Release|Any CPU + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.Build.0 = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.ActiveCfg = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.Build.0 = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|Mixed Platforms.Build.0 = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.Build.0 = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.ActiveCfg = Release|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.Build.0 = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.ActiveCfg = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.Build.0 = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.ActiveCfg = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A26A07B7-AF41-47A4-B21F-8C772153EAA4} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj new file mode 100644 index 0000000..c5b1fa8 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj @@ -0,0 +1,346 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + Document + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + Document + + + + + Document + + + + + Document + + + + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E} + Win32Proj + DirectXTK + 10.0 + x64 + + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + + + + + + + + + + + + + + + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + DirectXTK + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0601;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + + + + <_ATGFXCPath>$(WindowsSDK_ExecutablePath_x64.Split(';')[0]) + <_ATGFXCPath>$(_ATGFXCPath.Replace("x64","")) + <_ATGFXCPath Condition="'$(_ATGFXCPath)' != '' and !HasTrailingSlash('$(_ATGFXCPath)')">$(_ATGFXCPath)\ + + + + <_ATGFXCPath /> + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/*.inc" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/*.pdb" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj.filters new file mode 100644 index 0000000..bd8bc30 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019.vcxproj.filters @@ -0,0 +1,315 @@ + + + + + {771f5f80-d173-49c3-8afb-790e8f7cb0ce} + + + {c52e19b6-8703-49a1-9b36-101a05b4745d} + + + {e07ba3bc-b69f-4ded-9ffa-3b47427e9fef} + + + {28e934ed-a6d7-4c30-b064-c72c50c99626} + + + {28d5fa16-99e2-471c-8cd8-2020e81f0024} + + + {34676173-31f5-4435-b90d-a0ad4f7d163c} + + + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Inc + + + Src + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.sln b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.sln new file mode 100644 index 0000000..7ee5aab --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.sln @@ -0,0 +1,84 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29609.76 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_Desktop_2019_Win10", "DirectXTK_Desktop_2019_Win10.vcxproj", "{E0B52AE7-E160-4D32-BF3F-910B785E5A8E}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeSpriteFont", "MakeSpriteFont\MakeSpriteFont.csproj", "{7329B02D-C504-482A-A156-181D48CE493C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XWBTool_Desktop_2019", "XWBTool\XWBTool_Desktop_2019.vcxproj", "{C7AB4186-54B2-4244-A533-77494763EA1D}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{BD5A62C9-FE7B-4491-82C2-BD46EA64D1C8}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM64 = Debug|ARM64 + Debug|Mixed Platforms = Debug|Mixed Platforms + Debug|x86 = Debug|x86 + Debug|x64 = Debug|x64 + Release|ARM64 = Release|ARM64 + Release|Mixed Platforms = Release|Mixed Platforms + Release|x86 = Release|x86 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|ARM64.Build.0 = Debug|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.ActiveCfg = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.Build.0 = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|ARM64.ActiveCfg = Release|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|ARM64.Build.0 = Release|ARM64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Mixed Platforms.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.ActiveCfg = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.Build.0 = Release|x64 + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|ARM64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|ARM64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|ARM64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|ARM64.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.Build.0 = Release|Any CPU + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|ARM64.Build.0 = Debug|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.ActiveCfg = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.Build.0 = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|ARM64.ActiveCfg = Release|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|ARM64.Build.0 = Release|ARM64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Mixed Platforms.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.Build.0 = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.ActiveCfg = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {0F7A0290-0AF9-47AB-A91D-1346B9433995} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj new file mode 100644 index 0000000..c251fc2 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj @@ -0,0 +1,440 @@ + + + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + Document + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + Document + + + + + Document + + + + + Document + + + + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E} + Win32Proj + DirectXTK + 10.0 + x64 + + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + true + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + StaticLibrary + false + v142 + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + Bin\Desktop_2019_Win10\$(Platform)\$(Configuration)\ + DirectXTK + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + Use + EnableAllWarnings + Disabled + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;_DEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Windows + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + EnableAllWarnings + Use + MaxSpeed + _WIN32_WINNT=0x0A00;_WIN7_PLATFORM_UPDATE;WIN32;NDEBUG;_LIB;_CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + pch.h + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + true + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Windows + true + true + true + + + + + + + + <_ATGFXCPath>$(WindowsSDK_ExecutablePath_x64.Split(';')[0]) + <_ATGFXCPath>$(_ATGFXCPath.Replace("x64","")) + <_ATGFXCPath Condition="'$(_ATGFXCPath)' != '' and !HasTrailingSlash('$(_ATGFXCPath)')">$(_ATGFXCPath)\ + + + + <_ATGFXCPath /> + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/*.inc" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/*.pdb" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj.filters new file mode 100644 index 0000000..bcd441b --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win10.vcxproj.filters @@ -0,0 +1,357 @@ + + + + + {771f5f80-d173-49c3-8afb-790e8f7cb0ce} + + + {c52e19b6-8703-49a1-9b36-101a05b4745d} + + + {e07ba3bc-b69f-4ded-9ffa-3b47427e9fef} + + + {28e934ed-a6d7-4c30-b064-c72c50c99626} + + + {28d5fa16-99e2-471c-8cd8-2020e81f0024} + + + {34676173-31f5-4435-b90d-a0ad4f7d163c} + + + {5d47b946-09b9-4477-934f-ca45fba9845d} + + + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Src + + + Inc + + + Src + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Audio + + + Audio + + + Audio + + + Audio + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Src + + + Src + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win7.sln b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win7.sln new file mode 100644 index 0000000..1759867 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Desktop_2019_Win7.sln @@ -0,0 +1,73 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 15.0.28307.1000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_Desktop_2019", "DirectXTK_Desktop_2019.vcxproj", "{E0B52AE7-E160-4D32-BF3F-910B785E5A8E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTKAudio_Desktop_2019_Win7", "Audio\DirectXTKAudio_Desktop_2019_Win7.vcxproj", "{4F150A30-CECB-49D1-8283-6A3F57438CF5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeSpriteFont", "MakeSpriteFont\MakeSpriteFont.csproj", "{7329B02D-C504-482A-A156-181D48CE493C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xwbtool_Desktop_2019", "XWBTool\xwbtool_Desktop_2019.vcxproj", "{C7AB4186-54B2-4244-A533-77494763EA1D}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.ActiveCfg = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x64.Build.0 = Debug|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.ActiveCfg = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Debug|x86.Build.0 = Debug|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|Any CPU.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.ActiveCfg = Release|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x64.Build.0 = Release|x64 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.ActiveCfg = Release|Win32 + {4F150A30-CECB-49D1-8283-6A3F57438CF5}.Release|x86.Build.0 = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.ActiveCfg = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x64.Build.0 = Debug|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.ActiveCfg = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Debug|x86.Build.0 = Debug|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|Any CPU.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.ActiveCfg = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x64.Build.0 = Release|x64 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.ActiveCfg = Release|Win32 + {E0B52AE7-E160-4D32-BF3F-910B785E5A8E}.Release|x86.Build.0 = Release|Win32 + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x64.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.ActiveCfg = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Debug|x86.Build.0 = Debug|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|Any CPU.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x64.Build.0 = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.ActiveCfg = Release|Any CPU + {7329B02D-C504-482A-A156-181D48CE493C}.Release|x86.Build.0 = Release|Any CPU + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.ActiveCfg = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x64.Build.0 = Debug|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.ActiveCfg = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Debug|x86.Build.0 = Debug|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|Any CPU.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.ActiveCfg = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x64.Build.0 = Release|x64 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.ActiveCfg = Release|Win32 + {C7AB4186-54B2-4244-A533-77494763EA1D}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {3B774DEE-1D2A-4EEF-B8FA-75BDC1DA64DE} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.sln b/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.sln new file mode 100644 index 0000000..f634eea --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.sln @@ -0,0 +1,48 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.27703.2000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK", "DirectXTK_Windows10_2017.vcxproj", "{F4776924-619C-42C7-88B2-82C947CCC9E7}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{88140C29-E1F8-41E0-9126-6912B6A713BA}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM = Debug|ARM + Debug|ARM64 = Debug|ARM64 + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|ARM = Release|ARM + Release|ARM64 = Release|ARM64 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM.ActiveCfg = Debug|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM.Build.0 = Debug|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM64.Build.0 = Debug|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x64.ActiveCfg = Debug|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x64.Build.0 = Debug|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x86.ActiveCfg = Debug|Win32 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x86.Build.0 = Debug|Win32 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM.ActiveCfg = Release|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM.Build.0 = Release|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM64.ActiveCfg = Release|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM64.Build.0 = Release|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x64.ActiveCfg = Release|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x64.Build.0 = Release|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x86.ActiveCfg = Release|Win32 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {FFD89A12-8620-47B9-A00E-6FD1824BFAC5} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj b/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj new file mode 100644 index 0000000..dd9ff2f --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj @@ -0,0 +1,527 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + Create + Create + Create + Create + + + + + + + + + + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + + + Document + + + + + Document + + + + + Document + + + + {f4776924-619c-42c7-88b2-82c947ccc9e7} + StaticLibrary + DirectXTK + DirectXTK + en-US + 14.0 + true + Windows Store + 10.0.17763.0 + 10.0.14393.0 + 10.0 + x64 + + + + StaticLibrary + true + v141 + + + StaticLibrary + true + v141 + + + StaticLibrary + true + v141 + + + StaticLibrary + true + v141 + + + StaticLibrary + false + v141 + + + StaticLibrary + false + v141 + + + StaticLibrary + false + v141 + + + StaticLibrary + false + v141 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + Bin\Windows10_2017\$(Platform)\$(Configuration)\ + DirectXTK + false + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + + + Console + false + false + + + + + Use + + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + + + Console + false + false + + + + + Use + + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + + + Console + false + false + + + + + + + + <_ATGFXCPath>$(WindowsSDK_ExecutablePath_x64.Split(';')[0]) + <_ATGFXCPath>$(_ATGFXCPath.Replace("x64","")) + <_ATGFXCPath Condition="'$(_ATGFXCPath)' != '' and !HasTrailingSlash('$(_ATGFXCPath)')">$(_ATGFXCPath)\ + + + + <_ATGFXCPath /> + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/*.inc" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/*.pdb" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj.filters new file mode 100644 index 0000000..83f64fb --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Windows10_2017.vcxproj.filters @@ -0,0 +1,357 @@ + + + + + {a77af43b-f2ab-4dcc-b84e-70909b198d8a} + + + {4a81ebd8-dd1a-46fb-ad14-8b57d8e92774} + + + {a872f54e-e97f-4e14-a946-da034ce61f99} + + + {e536bb5b-5908-4d5a-b629-6a73cf2fc9ca} + + + {68d47991-3b63-4a17-a705-680374a426f2} + + + {b5728d91-918a-4481-8e6f-8e793da9ee2c} + + + {020af8ad-d3a0-41bc-bc05-b1d0a5d2a85f} + + + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Src + + + Audio + + + Audio + + + Audio + + + Audio + + + Inc + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Inc\Shared + + + Src\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Src + + + Src + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.sln b/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.sln new file mode 100644 index 0000000..41fb205 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.sln @@ -0,0 +1,48 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 15.0.27703.2000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK", "DirectXTK_Windows10_2019.vcxproj", "{F4776924-619C-42C7-88B2-82C947CCC9E7}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{88140C29-E1F8-41E0-9126-6912B6A713BA}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM = Debug|ARM + Debug|ARM64 = Debug|ARM64 + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|ARM = Release|ARM + Release|ARM64 = Release|ARM64 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM.ActiveCfg = Debug|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM.Build.0 = Debug|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|ARM64.Build.0 = Debug|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x64.ActiveCfg = Debug|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x64.Build.0 = Debug|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x86.ActiveCfg = Debug|Win32 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Debug|x86.Build.0 = Debug|Win32 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM.ActiveCfg = Release|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM.Build.0 = Release|ARM + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM64.ActiveCfg = Release|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|ARM64.Build.0 = Release|ARM64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x64.ActiveCfg = Release|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x64.Build.0 = Release|x64 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x86.ActiveCfg = Release|Win32 + {F4776924-619C-42C7-88B2-82C947CCC9E7}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {FFD89A12-8620-47B9-A00E-6FD1824BFAC5} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj b/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj new file mode 100644 index 0000000..21868ca --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj @@ -0,0 +1,535 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + Create + Create + Create + Create + + + + + + + + + + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + + + Document + + + + + Document + + + + + Document + + + + {f4776924-619c-42c7-88b2-82c947ccc9e7} + StaticLibrary + DirectXTK + DirectXTK + en-US + 14.0 + true + Windows Store + 10.0 + 10.0.14393.0 + 10.0 + x64 + + + + StaticLibrary + true + v142 + + + StaticLibrary + true + v142 + + + StaticLibrary + true + v142 + + + StaticLibrary + true + v142 + + + StaticLibrary + false + v142 + + + StaticLibrary + false + v142 + + + StaticLibrary + false + v142 + + + StaticLibrary + false + v142 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + Bin\Windows10_2019\$(Platform)\$(Configuration)\ + DirectXTK + false + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + StreamingSIMDExtensions2 + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Console + false + false + + + + + Use + + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Console + false + false + + + + + Use + + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + false + 4711;5045;26812 + + + Console + false + false + + + + + Use + + false + true + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + Fast + $(IntDir)$(TargetName).pdb + EnableAllWarnings + _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS;%(PreprocessorDefinitions) + true + /Zc:__cplusplus %(AdditionalOptions) + 4711;5045;26812 + + + Console + false + false + + + + + + + + <_ATGFXCPath>$(WindowsSDK_ExecutablePath_x64.Split(';')[0]) + <_ATGFXCPath>$(_ATGFXCPath.Replace("x64","")) + <_ATGFXCPath Condition="'$(_ATGFXCPath)' != '' and !HasTrailingSlash('$(_ATGFXCPath)')">$(_ATGFXCPath)\ + + + + <_ATGFXCPath /> + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/*.inc" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/*.pdb" Exclude="$(ProjectDir)src/Shaders/Compiled/*Xbox*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj.filters new file mode 100644 index 0000000..83f64fb --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_Windows10_2019.vcxproj.filters @@ -0,0 +1,357 @@ + + + + + {a77af43b-f2ab-4dcc-b84e-70909b198d8a} + + + {4a81ebd8-dd1a-46fb-ad14-8b57d8e92774} + + + {a872f54e-e97f-4e14-a946-da034ce61f99} + + + {e536bb5b-5908-4d5a-b629-6a73cf2fc9ca} + + + {68d47991-3b63-4a17-a705-680374a426f2} + + + {b5728d91-918a-4481-8e6f-8e793da9ee2c} + + + {020af8ad-d3a0-41bc-bc05-b1d0a5d2a85f} + + + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Src + + + Audio + + + Audio + + + Audio + + + Audio + + + Inc + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Inc\Shared + + + Src\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Src + + + Src + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.sln b/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.sln new file mode 100644 index 0000000..0af0a72 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.sln @@ -0,0 +1,33 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.27703.2000 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DirectXTK_XboxOneXDK_2017", "DirectXTK_XboxOneXDK_2017.vcxproj", "{4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{991F09BD-CC92-491D-B166-2DBEAF4C0420}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Durango = Debug|Durango + Profile|Durango = Profile|Durango + Release|Durango = Release|Durango + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}.Debug|Durango.ActiveCfg = Debug|Durango + {4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}.Debug|Durango.Build.0 = Debug|Durango + {4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}.Profile|Durango.ActiveCfg = Profile|Durango + {4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}.Profile|Durango.Build.0 = Profile|Durango + {4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}.Release|Durango.ActiveCfg = Release|Durango + {4FDFDCCD-EE2E-415C-B458-1E8FDB0D85F2}.Release|Durango.Build.0 = Release|Durango + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {406725DF-706A-46B2-BCAA-47445FD39BC6} + EndGlobalSection +EndGlobal diff --git a/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj b/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj new file mode 100644 index 0000000..0335244 --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj @@ -0,0 +1,351 @@ + + + + + Release + Durango + + + Profile + Durango + + + Debug + Durango + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + + + Document + + + + + Document + + + Document + + + + + Document + + + + + Document + + + + DirectXTK + {4fdfdccd-ee2e-415c-b458-1e8fdb0d85f2} + en-US + Win32Proj + title + + v141 + 14.0 + Native + + + + StaticLibrary + v141 + false + Unicode + false + false + + + StaticLibrary + v141 + false + Unicode + false + false + + + StaticLibrary + v141 + true + Unicode + false + false + + + + + + + + + + + + + + + + $(Console_SdkLibPath);$(Console_SdkWindowsMetadataPath) + $(Console_SdkLibPath) + $(Console_SdkLibPath);$(Console_SdkWindowsMetadataPath) + $(Console_SdkIncludeRoot) + $(Console_SdkRoot)bin;$(VCInstallDir)bin\x86_amd64;$(VCInstallDir)bin;$(WindowsSDK_ExecutablePath_x86);$(VSInstallDir)Common7\Tools\bin;$(VSInstallDir)Common7\tools;$(VSInstallDir)Common7\ide;$(ProgramFiles)\HTML Help Workshop;$(MSBuildToolsPath32);$(FxCopDir);$(PATH); + Bin\XboxOneXDK_2017\$(Platform)\$(Configuration)\ + Bin\XboxOneXDK_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + $(Console_SdkLibPath);$(Console_SdkWindowsMetadataPath) + $(Console_SdkLibPath) + $(Console_SdkLibPath);$(Console_SdkWindowsMetadataPath) + $(Console_SdkIncludeRoot) + $(Console_SdkRoot)bin;$(VCInstallDir)bin\x86_amd64;$(VCInstallDir)bin;$(WindowsSDK_ExecutablePath_x86);$(VSInstallDir)Common7\Tools\bin;$(VSInstallDir)Common7\tools;$(VSInstallDir)Common7\ide;$(ProgramFiles)\HTML Help Workshop;$(MSBuildToolsPath32);$(FxCopDir);$(PATH); + Bin\XboxOneXDK_2017\$(Platform)\$(Configuration)\ + Bin\XboxOneXDK_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + $(Console_SdkLibPath);$(Console_SdkWindowsMetadataPath) + $(Console_SdkLibPath) + $(Console_SdkLibPath);$(Console_SdkWindowsMetadataPath) + $(Console_SdkIncludeRoot) + $(Console_SdkRoot)bin;$(VCInstallDir)bin\x86_amd64;$(VCInstallDir)bin;$(WindowsSDK_ExecutablePath_x86);$(VSInstallDir)Common7\Tools\bin;$(VSInstallDir)Common7\tools;$(VSInstallDir)Common7\ide;$(ProgramFiles)\HTML Help Workshop;$(MSBuildToolsPath32);$(FxCopDir);$(PATH); + Bin\XboxOneXDK_2017\$(Platform)\$(Configuration)\ + Bin\XboxOneXDK_2017\$(Platform)\$(Configuration)\ + DirectXTK + + + + d3d11_x.lib;combase.lib;kernelx.lib;toolhelpx.lib;uuid.lib; + + + true + Windows + true + true + false + + + Use + pch.h + + + MaxSpeed + true + NDEBUG;__WRL_NO_DEFAULT_LIB__;_LIB;%(PreprocessorDefinitions) + EnableAllWarnings + true + true + false + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + $(IntDir)$(TargetName).pdb + /Zc:twoPhase- %(AdditionalOptions) + + + + + pixEvt.lib;d3d11_x.lib;combase.lib;kernelx.lib;toolhelpx.lib;uuid.lib; + + + true + Windows + true + true + false + + + Use + pch.h + + + MaxSpeed + true + NDEBUG;__WRL_NO_DEFAULT_LIB__;_LIB;PROFILE;%(PreprocessorDefinitions) + EnableAllWarnings + true + true + false + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + $(IntDir)$(TargetName).pdb + /Zc:twoPhase- %(AdditionalOptions) + + + + + d3d11_x.lib;combase.lib;kernelx.lib;toolhelpx.lib;uuid.lib; + Windows + true + false + + + pch.h + Use + false + + + EnableAllWarnings + Disabled + true + _DEBUG;__WRL_NO_DEFAULT_LIB__;_LIB;%(PreprocessorDefinitions) + false + $(ProjectDir)Inc;$(ProjectDir)Src;%(AdditionalIncludeDirectories) + $(IntDir)$(TargetName).pdb + /Zc:twoPhase- %(AdditionalOptions) + false + + + + + + + + + + + <_ATGShaderHeaders Include="$(ProjectDir)src/Shaders/Compiled/XboxOne*.inc" /> + <_ATGShaderSymbols Include="$(ProjectDir)src/Shaders/Compiled/XboxOne*.pdb" /> + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj.filters b/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj.filters new file mode 100644 index 0000000..7cdb7dc --- /dev/null +++ b/Sdk/External/DirectXTK/DirectXTK_XboxOneXDK_2017.vcxproj.filters @@ -0,0 +1,365 @@ + + + + + {37d8d298-8ff7-4b16-9de2-1e8e5b943795} + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {1d7c612f-bf24-4368-a247-46d28920c4a7} + + + {9fa25651-7bb5-42a1-b589-06be42c55e5b} + + + {bb66477f-d2e3-4eed-9c13-e86060685825} + + + {a6ee428c-85f6-4fea-979a-8c22779cd9e7} + + + + + Audio + + + Audio + + + Audio + + + Audio + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Inc + + + Src + + + Src + + + Inc + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Inc\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Inc + + + Inc + + + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Audio + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src\Shared + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Src + + + Audio + + + Src + + + Src + + + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shared + + + Inc\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + Src\Shaders + + + Src\Shaders\Shared + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/HISTORY.md b/Sdk/External/DirectXTK/HISTORY.md new file mode 100644 index 0000000..5bb294c --- /dev/null +++ b/Sdk/External/DirectXTK/HISTORY.md @@ -0,0 +1,456 @@ +# DirectX Tool Kit for DirectX 11 + +http://go.microsoft.com/fwlink/?LinkId=248929 + +Release available for download on [GitHub](https://github.com/microsoft/DirectXTK/releases) + +## Release History + +### September 30, 2020 +* GamePad class updated with ``c_MostRecent`` constant for ``-1`` player index special behavior +* Fixed bug in WICTextureLoader that resulted in ``WINCODEC_ERR_INSUFFICIENTBUFFER`` for some resize requests +* Fixed ``.wav`` file reading of MIDILoop chunk +* Minor code cleanup + +### August 15, 2020 +* EnvironmentMapEffect now supports cubemaps, spherical, and dual-parabola environment maps +* Code review and project updates + +### July 2, 2020 +* Improved SpriteFont drawing performance in Debug builds +* Regenerated shaders using Windows 10 May 2020 Update SDK (19041) +* Code cleanup for some new VC++ 16.7 warnings and static code analysis +* CMake updates + +### June 1, 2020 +* Added BufferHelpers header with functions **CreateStaticBuffer** / **CreateTextureFromMemory**, and the **ConstantBuffer** helper class +* Added **IsPowerOf2** and **CreateInputLayoutFromEffect** helpers to DirectXHelpers +* Converted to typed enum bitmask flags (see release notes for details on this potential *breaking change*) + + ``AUDIO_ENGINE_FLAGS``, ``ModelLoaderFlags``, ``SOUND_EFFECT_INSTANCE_FLAGS``, and ``WIC_LOADER_FLAGS`` +* WICTextureLoader for ``PNG`` codec now checks ``gAMA`` chunk to determine colorspace if the ``sRGB`` chunk is not found for legacy sRGB detection. +* ``WIC_LOADER_SRGB_DEFAULT`` flag added when loading image via WIC without explicit colorspace metadata +* Retired XAudio 2.7 for *DirectX Tool Kit for Audio*. Use XAudio 2.9, XAudio 2.8, or XAudio2Redist instead. +* CMake project updates + +### May 10, 2020 +* WICTextureLoader updated with new loader flags: ``FORCE_RGBA32``, ``FIT_POW2``, and ``MAKE_SQUARE`` +* SimpleMath no longer forces use of d3d11.h or d3d12.h (can be used with d3d9.h for example) +* *DirectX Tool Kit for Audio* updated with **SoundStreamInstance** class for async I/O playback from XACT-style streaming wavebanks +* Code cleanup +* xwbtool: Updated with ``-l`` switch for case-sensitive file systems + +### April 3, 2020 +* SpriteFont **MeasureString** / **MeasureDrawBounds** fixes for !ignoreWhitespace +* Code review (``constexpr`` / ``noexcept`` usage) +* CMake updated for PCH usage with 3.16 or later + +### February 24, 2020 +* *breaking change* **Model::CreateFromxxx** changed to use ModelLoaderFlags instead of default bool parameters +* DirectX Tool Kit for Audio updated to support XAudio2Redist NuGet +* Added ``ignoreWhitespace`` defaulted parameter to SpriteFont Measure methods +* Fixed encoding issue with Utilities.fxh +* Code and project cleanup +* Retired VS 2015 projects +* xwbtool: Changed ``-n`` switch to a more safe ``-y`` switch + +### December 17, 2019 +* Added ARM64 platform to VS 2019 Win32 desktop Win10 project +* Added Vector ``operator/`` by float scalar to SimpleMath +* Updated CMake project +* Code cleaup + +### October 17, 2019 +* Added optional ``forceSRGB`` parameter to **SaveWICTextureToFile** +* GamePad updated to report VID/PID (when supported) +* Minor code cleanup + +### August 21, 2019 +* Added xwbtool to CMake project +* Minor code cleanup + +### June 30, 2019 +* Additional validation for Ex texture loaders +* Clang/LLVM warning cleanup +* Renamed ``DirectXTK_Windows10.vcxproj`` to ``_Windows10_2017.vcxproj`` +* Added VS 2019 UWP project + +### May 30, 2019 +* PBREffect updated with additional set methods +* Added CMake project files +* Code cleanup + +### April 26, 2019 +* Added VS 2019 desktop projects +* Fixed guards w.r.t. to windows.h usage in Keyboard/Mouse headers +* Added C++/WinRT **SetWindow** helper to Keyboard/Mouse +* Code cleanup for texture loaders +* Officially dropped Windows Vista support + +### February 7, 2019 +* Model now supports loading _SDKMESH v2_ models +* **PBREffectFactory** added to support PBR materials +* PBREffect and NormalMapEffect shaders updated to support ``BC5_UNORM`` compressed normal maps +* SpriteFont: **DrawString** overloads for UTF-8 chars in addition to UTF-16LE wide chars + +### November 16, 2018 +* VS 2017 updated for Windows 10 October 2018 Update SDK (17763) +* ARM64 platform configurations added to UWP projects +* Minor code review + +### October 31, 2018 +* Model loader for SDKMESH now attempts to use legacy DE3CN compressed normals + + This is an approximation only and emits a warning in debug builds + +### October 25, 2018 +* Use UTF-8 instead of ANSI for narrow strings +* Minor code review + +### August 17, 2018 +* Improved validation for 16k textures and other large resources +* Improved debug output for failed texture loads and screengrabs +* Updated for VS 2017 15.8 +* Code cleanup + +### July 3, 2018 +* ModelMeshPart **DrawInstanced** method added +* Code and project cleanup + +### May 31, 2018 +* VS 2017 updated for Windows 10 April 2018 Update SDK (17134) +* Regenerated shaders using Windows 10 April 2018 Update SDK (17134) + +### May 14, 2018 +* Updated for VS 2017 15.7 update warnings +* Code and project cleanup +* Retired VS 2013 projects + +### April 23, 2018 +* ``AlignUp``, ``AlignDown`` template functions in DirectXHelpers.h +* Mouse support for cursor visibility +* SimpleMath and VertexTypes updated with default copy and move ctors +* SimpleMath updates to use ``constexpr`` +* EffectFactory updated with **GetDevice** method +* PostProcess updated with 'big triangle' optimization +* Fix for ``CMO`` handling of skinning vertex data +* Code and project file cleanup +* xwbtool: Fixed Windows 7 compatibility issue + +### February 7, 2018 +* Mouse fix for cursor behavior when using Remote Desktop for Win32 +* Updated for a few more VS 2017 warnings +* Code cleanup + +### December 13, 2017 +* **PBREffect** and **DebugEffect** added +* **NormalMapEffect** no longer requires or uses explicit vertex tangents +* *breaking change* NormalMapEffect::SetBiasedVertexNormalsAndTangents renamed to **SetBiasedVertexNormals** +* PBREffect, DebugEffect, & NormalMapEffect all require Direct3D hardware feature level 10.0 or better +* **VertexType** typedef added to GeometricPrimitive as alias for VertexPositionNormalTexture +* Updated for VS 2017 15.5 update warnings +* Code cleanup + +### November 1, 2017 +* VS 2017 updated for Windows 10 Fall Creators Update SDK (16299) +* Regenerated shaders using Windows 10 Fall Creators Update SDK (16299) + +### September 22, 2017 +* Updated for VS 2017 15.3 update ``/permissive-`` changes +* **ScreenGrab** updated to use non-sRGB metadata for PNG +* Mouse use of ``WM_INPUT`` updated for Remote Desktop scenarios +* Fix for ``CMO`` load issue when no materials are defined +* xwbtool: added ``-flist`` option + +### July 28, 2017 +* Fix for WIC writer when codec target format requires a palette +* Code cleanup + +### June 21, 2017 +* Post-processing support with the **BasicPostProcess**, **DualPostProcess**, and **ToneMapPostProcess** classes +* SDKMESH loader fix when loading legacy files with all zero materials +* DirectXTK for Audio: Minor fixes for environmental audio +* Minor code cleanup + +### April 24, 2017 +* VS 2017 project updates +* Regenerated shaders using Windows 10 Creators Update SDK (15063) +* Fixed **NormalMapEffect** shader selection for specular texture usage +* Fixed **AudioEngine** enumeration when using Single Threaded Apartment (STA) +* Fixed bug with **GamePad** (Windows.Gaming.Input) when no user bound + +### April 7, 2017 +* VS 2017 updated for Windows Creators Update SDK (15063) +* XboxDDSTextureLoader updates + +### February 10, 2017 +* **GamePad** now supports special value of ``-1`` for 'most recently connected controller' +* WIC format 40bppCMYKAlpha should be converted to RGBA8 rather than RGBA16 +* DDS support for L8A8 with bitcount 8 rather than 16 +* Minor code cleanup + +### December 5, 2016 +* Mouse and Keyboard classes updated with **IsConnected** method +* Windows10 project ``/ZW`` switch removed to support use in C++/WinRT projection apps +* VS 2017 RC projects added +* Minor code cleanup + +### October 6, 2016 +* SDKMESH loader and BasicEffects support for compressed vertex normals with biasing +* WICTextureLoader Ex bool forceSRGB parameter is now a **WIC_LOADER_FLAGS** flag +* Minor code cleanup + +### September 15, 2016 +* Minor code cleanup +* xwbtool: added wildcard support for input filename and optional ``-r`` switch for recursive search + +### September 1, 2016 +* Added ``forceSRGB`` optional parameter to SpriteFont ctor +* EffectFactory method **EnableForceSRGB** added +* DGSLEffect now defaults to diffuse/alpha of 1 +* Removed problematic ABI::Windows::Foundation::Rect interop for SimpleMath +* Minor code cleanup + +### August 4, 2016 +* Regenerated shaders using Windows 10 Anniversary Update SDK (14393) + +### August 2, 2016 +* Updated for VS 2015 Update 3 and Windows 10 SDK (14393) + +### August 1, 2016 +* GamePad capabilities information updated for Universal Windows and Xbox One platforms +* Specular falloff lighting computation fix in shaders + +### July 18, 2016 +* **NormalMapEffect** for normal-map with optional specular map rendering +* **EnvironmentMapEffect** now supports per-pixel lighting +* Effects updated with **SetMatrices** and **SetColorAndAlpha** methods +* SimpleMath: improved interop with DirectXMath constants +* Minor code cleanup + +### June 30, 2016 +* **MeasureDrawString** added to SpriteFont; bad fix to MeasureString reverted +* GamePad tracker updated to track emulated buttons (i.e. leftStickUp) +* EffectFactory **SetDirectory** now checks current working directory (CWD) as well +* *breaking change* must include before including +* Code refactor for sharing some files with DirectX 12 version +* Minor code cleanup + +### May 31, 2016 +* Added **VertexPosition** and **VertexPositionDualTexture** to VertexTypes +* Xbox One platform fix for PrimitiveBatch +* CompileShader script updated to build external pdbs +* Code cleanup + +### April 26, 2016 +* Added **Rectangle** class to SimpleMath +* Fix for SDKMESH loader when loading models with 'extra' texture coordinate sets +* Made SimpleMath's Viewport **ComputeTitleSafeArea** less conservative +* Added view/menu aliases to GamePad::ButtonStateTracker for Xbox One Controller naming +* Retired Windows phone 8.0 projects and obsolete adapter code +* Minor code and project file cleanup + +### February 23, 2016 +* Fixed width computation bug in **SpriteFont::MeasureString** +* Fix to clean up partial or zero-length image files on failed write +* Fix to WaveBankReader for UWP platform +* Retired VS 2012 projects +* Xbox One platform updates +* Minor code and project file cleanup + +### January 5, 2016 +* Xbox One platform updates +* *breaking change* Need to add use of **GraphicsMemory** class to Xbox One titles +* Minor code cleanup + +### November 30, 2015 +* SimpleMath improvements including Viewport class +* Fixed bug with **Keyboard** for ``OpenBracket`` and later VK codes +* Fixed bug with **Mouse** that reset the scrollwheel on app activate +* ``MakeSpriteFont`` updated with ``/FastPack`` and ``/FeatureLevel`` switches +* Updated for VS 2015 Update 1 and Windows 10 SDK (10586) + +### October 30, 2015 +* DirectXTK for Audio 3D updates +* *breaking change* emitters/listeners now use RH coordinates by default +* **GeometricPrimitive** support for custom geometry +* SimpleMath Matrix class improvements +* DDS support for legacy bumpmap formats (V8U8, Q8W8V8U8, V16U16) +* Mouse fix for WinRT implementation with multiple buttons pressed +* Wireframe **CommonStates** no longer does backface culling +* Xbox One platform updates +* Minor code cleanup + +### August 18, 2015 +* Xbox One platform updates + +### July 29, 2015 +* Added **CreateBox** method to GeometricPrimitive +* Added ``invertn`` optional parameter to **CreateSphere** +* Updates for Keyboard, Mouse class +* Fixed bug when loading older SDKMESH models +* Updated for VS 2015 and Windows 10 SDK RTM +* Retired VS 2010 and Windows Store 8.0 projects + +### July 1, 2015 +* Added **Keyboard**, **Mouse** class +* Support for loading pre-lit models with SDKMESH +* **GamePad** implemented using ``Windows.Gaming.Input`` for Windows 10 +* DirectXTK for Audio updates for xWMA support with XAudio 2.9 +* Added **FindGlyph** and **GetSpriteSheet** methods to SpriteFont + +### March 27, 2015 +* Added projects for Windows apps Technical Preview +* GamePad temporarily uses 'null' device for universal Windows application platform + +### February 25, 2015 +* DirectXTK for Audio updates + + *breaking change* pitch now defined as -1 to 1 with 0 as the default + + One-shot Play method with volume, pitch, and pan + + **GetMasterVolume** / **SetMasterVolume** method for AudioEngine + + Fix for compact wavebank validation + + Improved voice cleanup and shutdown +* Minor code cleanup and C++11 ``=default``/``=delete`` usage + +### January 26, 2015 +* GamePad class: emulate ``XInputEnable`` behavior for XInput 9.1.0 +* DirectXTK for Audio fix for Stop followed by Play doing a proper restart +* DirectXTK for Audio fix when using XAudio 2.7 on a system with no audio device +* Updates for Xbox One platform support +* Minor code cleanup and C99 ``printf`` string conformance + +### November 24, 2014 +* SimpleMath fix for Matrix ``operator !=`` +* DirectXTK for Audio workaround for XAudio 2.7 on Windows 7 problem +* Updates for Windows phone 8.1 platform support +* Updates for Visual Studio 2015 Technical Preview +* Minor code cleanup + +### October 28, 2014 +* Model support for loading from ``VBO`` files +* Model render now sets samplers on slots 0,1 by default for dual-texture effects +* Updates for Xbox One platform support +* Minor code cleanup + +### September 5, 2014 +* **GamePad** class: gamepad controller helper using XInput on Windows, IGamepad for Xbox One +* SimpleMath updates; Matrix billboard methods; *breaking change*: Matrix::Identity() -> Matrix::Identity +* SpriteBatch new optional **SetViewport** method +* SpriteFont fix for white-space character rendering optimization +* DDSTextureLoader fix for auto-gen mipmaps for volume textures +* Explicit calling-convention annotation for public headers +* Updates for Xbox One platform support +* Minor code and project cleanup + +### July 15, 2014 +* DirectXTK for Audio and XWBTool fixes +* Updates to Xbox One platform support + +### April 3, 2014 +* Windows phone 8.1 platform support + +### February 24, 2014 +* DirectXHelper: new utility header with **MapGuard** and public version of **SetDebugObjectName** template +* DDSTextureLoader: Optional support for auto-gen mipmaps +* DDSTextureLoader/ScreenGrab: support for Direct3D 11 video formats including legacy "YUY2" DDS files +* GeometricPrimtive: Handedness fix for tetrahedron, octahedron, dodecahedron, and icosahedron +* ``SpriteBatch::SetRotation(DXGI_MODE_ROTATION_UNSPECIFIED)`` to disable viewport matrix +* XboxDDSTextureLoader: optional forceSRGB parameter + +### January 24, 2014 +* DirectXTK for Audio updated with voice management and optional mastering volume limiter +* Added orientation rotation support to **SpriteBatch** +* Fixed a resource leak with ``GetDefaultTexture`` used by some Effects +* Code cleanup (removed ``DXGI_1_2_FORMATS`` control define; d2d1.h workaround not needed; ScopedObject typedef removed) + +### December 24, 2013 +* Added **DirectX Tool Kit for Audio** using XAudio2 +* Xbox One platform support +* ``MakeSpriteFont`` tool updated with more progress feedback when capturing large fonts +* Minor updates for ``SDKMESH`` Model loader +* Fixed bug in ``CMO`` Model loader when handling multiple textures +* Improved debugging output + +### October 28, 2013 +* Updated for Visual Studio 2013 and Windows 8.1 SDK RTM +* Added **DGSLEffect**, **DGSLEffectFactory**, **VertexPositionNormalTangentColorTexture**, and **VertexPositionNormalTangentColorTextureSkinning** +* Model loading and effect factories support loading skinned models +* ``MakeSpriteFont`` now has a smooth vs. sharp antialiasing option: /sharp +* Model loading from ``CMOs`` now handles UV transforms for texture coordinates +* A number of small fixes for **EffectFactory** +* Minor code and project cleanup +* Added ``NO_D3D11_DEBUG_NAME`` compilation define to control population of Direct3D debug layer names for debug builds + +### July 1, 2013 +* VS 2013 Preview projects added and updates for DirectXMath 3.05 ``__vectorcall`` +* Added use of sRGB WIC metadata for ``JPEG``, ``PNG``, and ``TIFF`` +* SaveToWIC functions updated with new optional setCustomProps parameter and error check with optional targetFormat + +### May 30, 2013 +* Added more **GeometricPrimitives**: Cone, Tetrahedron, Octahedron, Dodecahedron, Icosahedron +* Updated to support loading new metadata from DDS files (if present) +* Fixed bug with loading of WIC 32bpp RGBE format images +* Fixed bug when skipping mipmaps in a 1D or 2D array texture DDS file + +### February 22, 2013 +* Added **SimpleMath** header +* Fixed bug that prevented properly overriding EffectFactory::CreateTexture +* Fixed forceSRGB logic in DDSTextureLoader and WICTextureLoader +* Break circular reference chains when using SpriteBatch with a setCustomShaders lambda +* Updated projects with ``/fp:fast`` for all configs, ``/arch:SSE2`` for Win32 configs +* Sensibly named .pdb output files +* Added ``WIC_USE_FACTORY_PROXY`` build option (uses WindowsCodecs.dll entrypoint rather than CoCreateInstance) + +### January 25, 2013 +* **GeometricPrimitive** support for left-handed coordinates and drawing with custom effects +* Model, ModelMesh, and ModelMeshPart added with loading of rigid non-animating models from .CMO and .SDKMESH files +* EffectFactory helper class added + +### December 11, 2012 +* Ex versions of **DDSTextureLoader** and **WICTextureLoader** +* Removed use of ATL's ``CComPtr`` in favor of WRL's ``ComPtr`` for all platforms to support VS Express editions +* Updated VS 2010 project for official 'property sheet' integration for Windows 8.0 SDK +* Minor fix to **CommonStates** for Feature Level 9.1 +* Tweaked AlphaTestEffect.cpp to work around ARM NEON compiler codegen bug +* Added dxguid.lib as a default library for Debug builds to resolve GUID link issues + +### November 15, 2012 +* Added support for WIC2 when available on Windows 8 and Windows 7 with KB 2670838 +* Cleaned up warning level 4 warnings + +### October 30, 2012 +* Added project files for Windows phone 8 + +### October 12, 2012 +* Added **PrimitiveBatch** for drawing user primitives +* Debug object names for all D3D resources (for PIX and debug layer leak reporting) + +### October 2, 2012 +* Added **ScreenGrab** module +* Added **CreateGeoSphere** for drawing a geodesic sphere +* Put DDSTextureLoader and WICTextureLoader into the DirectX C++ namespace + +### September 7, 2012 +* Renamed project files for better naming consistency +* Updated WICTextureLoader for Windows 8 96bpp floating-point formats +* Win32 desktop projects updated to use Windows Vista (0x0600) rather than Windows 7 (0x0601) APIs +* Tweaked SpriteBatch.cpp to workaround ARM NEON compiler codegen bug + +### May 31, 2012 +* Updated Windows Store project for Visual Studio 2012 Release Candidate changes +* Cleaned up x64 Debug configuration warnings and switched to use ``_DEBUG`` instead of ``DEBUG`` +* Minor fix for DDSTextureLoader's retry fallback that can happen with 10level9 feature levels + +### May 2, 2012 +* Added **SpriteFont** implementation and the MakeSpriteFont utility + +### March 29, 2012 +* WICTextureLoader updated with Windows 8 WIC native pixel formats + +### March 6, 2012 +* Fix for too much temp memory used by WICTextureLoader +* Add separate Visual Studio 11 projects for Desktop vs. Windows Store builds + +### March 5, 2012 +* Bug fix for SpriteBatch with batches > 2048 + +### February 24, 2012 +* Original release diff --git a/Sdk/External/DirectXTK/Inc/Audio.h b/Sdk/External/DirectXTK/Inc/Audio.h new file mode 100644 index 0000000..e892e01 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/Audio.h @@ -0,0 +1,754 @@ +//-------------------------------------------------------------------------------------- +// File: Audio.h +// +// DirectXTK for Audio header +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) +#include +#pragma comment(lib,"acphal.lib") +#endif + +#ifndef XAUDIO2_HELPER_FUNCTIONS +#define XAUDIO2_HELPER_FUNCTIONS +#endif + +#if defined(USING_XAUDIO2_REDIST) || (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/) || defined(_XBOX_ONE) +#define USING_XAUDIO2_9 +#elif (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/) +#define USING_XAUDIO2_8 +#elif (_WIN32_WINNT >= 0x0601 /*_WIN32_WINNT_WIN7*/) +#error Windows 7 SP1 requires the XAudio2Redist NuGet package https://aka.ms/xaudio2redist +#else +#error DirectX Tool Kit for Audio not supported on this platform +#endif + +#include +#include +#include +#include + +#ifndef USING_XAUDIO2_REDIST +#if defined(USING_XAUDIO2_8) && defined(NTDDI_WIN10) && !defined(_M_IX86) +// The xaudio2_8.lib in the Windows 10 SDK for x86 is incorrectly annotated as __cdecl instead of __stdcall, so avoid using it in this case. +#pragma comment(lib,"xaudio2_8.lib") +#else +#pragma comment(lib,"xaudio2.lib") +#endif +#endif + +#include + + +#include +#include +#include +#include +#include + + +namespace DirectX +{ + class SoundEffectInstance; + class SoundStreamInstance; + + //---------------------------------------------------------------------------------- + struct AudioStatistics + { + size_t playingOneShots; // Number of one-shot sounds currently playing + size_t playingInstances; // Number of sound effect instances currently playing + size_t allocatedInstances; // Number of SoundEffectInstance allocated + size_t allocatedVoices; // Number of XAudio2 voices allocated (standard, 3D, one-shots, and idle one-shots) + size_t allocatedVoices3d; // Number of XAudio2 voices allocated for 3D + size_t allocatedVoicesOneShot; // Number of XAudio2 voices allocated for one-shot sounds + size_t allocatedVoicesIdle; // Number of XAudio2 voices allocated for one-shot sounds but not currently in use + size_t audioBytes; // Total wave data (in bytes) in SoundEffects and in-memory WaveBanks +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) + size_t xmaAudioBytes; // Total wave data (in bytes) in SoundEffects and in-memory WaveBanks allocated with ApuAlloc +#endif + size_t streamingBytes; // Total size of streaming buffers (in bytes) in streaming WaveBanks + }; + + + //---------------------------------------------------------------------------------- + class IVoiceNotify + { + public: + virtual ~IVoiceNotify() = default; + + IVoiceNotify(const IVoiceNotify&) = delete; + IVoiceNotify& operator=(const IVoiceNotify&) = delete; + + IVoiceNotify(IVoiceNotify&&) = default; + IVoiceNotify& operator=(IVoiceNotify&&) = default; + + virtual void __cdecl OnBufferEnd() = 0; + // Notfication that a voice buffer has finished + // Note this is called from XAudio2's worker thread, so it should perform very minimal and thread-safe operations + + virtual void __cdecl OnCriticalError() = 0; + // Notification that the audio engine encountered a critical error + + virtual void __cdecl OnReset() = 0; + // Notification of an audio engine reset + + virtual void __cdecl OnUpdate() = 0; + // Notification of an audio engine per-frame update (opt-in) + + virtual void __cdecl OnDestroyEngine() noexcept = 0; + // Notification that the audio engine is being destroyed + + virtual void __cdecl OnTrim() = 0; + // Notification of a request to trim the voice pool + + virtual void __cdecl GatherStatistics(AudioStatistics& stats) const = 0; + // Contribute to statistics request + + virtual void __cdecl OnDestroyParent() noexcept = 0; + // Optional notification used by some objects + + protected: + IVoiceNotify() = default; + }; + + //---------------------------------------------------------------------------------- + enum AUDIO_ENGINE_FLAGS : uint32_t + { + AudioEngine_Default = 0x0, + + AudioEngine_EnvironmentalReverb = 0x1, + AudioEngine_ReverbUseFilters = 0x2, + AudioEngine_UseMasteringLimiter = 0x4, + + AudioEngine_Debug = 0x10000, + AudioEngine_ThrowOnNoAudioHW = 0x20000, + AudioEngine_DisableVoiceReuse = 0x40000, + }; + + enum SOUND_EFFECT_INSTANCE_FLAGS : uint32_t + { + SoundEffectInstance_Default = 0x0, + + SoundEffectInstance_Use3D = 0x1, + SoundEffectInstance_ReverbUseFilters = 0x2, + SoundEffectInstance_NoSetPitch = 0x4, + + SoundEffectInstance_UseRedirectLFE = 0x10000, + }; + + enum AUDIO_ENGINE_REVERB : unsigned int + { + Reverb_Off, + Reverb_Default, + Reverb_Generic, + Reverb_Forest, + Reverb_PaddedCell, + Reverb_Room, + Reverb_Bathroom, + Reverb_LivingRoom, + Reverb_StoneRoom, + Reverb_Auditorium, + Reverb_ConcertHall, + Reverb_Cave, + Reverb_Arena, + Reverb_Hangar, + Reverb_CarpetedHallway, + Reverb_Hallway, + Reverb_StoneCorridor, + Reverb_Alley, + Reverb_City, + Reverb_Mountains, + Reverb_Quarry, + Reverb_Plain, + Reverb_ParkingLot, + Reverb_SewerPipe, + Reverb_Underwater, + Reverb_SmallRoom, + Reverb_MediumRoom, + Reverb_LargeRoom, + Reverb_MediumHall, + Reverb_LargeHall, + Reverb_Plate, + Reverb_MAX + }; + + enum SoundState + { + STOPPED = 0, + PLAYING, + PAUSED + }; + + + //---------------------------------------------------------------------------------- + class AudioEngine + { + public: + explicit AudioEngine( + AUDIO_ENGINE_FLAGS flags = AudioEngine_Default, + _In_opt_ const WAVEFORMATEX* wfx = nullptr, + _In_opt_z_ const wchar_t* deviceId = nullptr, + AUDIO_STREAM_CATEGORY category = AudioCategory_GameEffects) noexcept(false); + + AudioEngine(AudioEngine&& moveFrom) noexcept; + AudioEngine& operator= (AudioEngine&& moveFrom) noexcept; + + AudioEngine(AudioEngine const&) = delete; + AudioEngine& operator= (AudioEngine const&) = delete; + + virtual ~AudioEngine(); + + bool __cdecl Update(); + // Performs per-frame processing for the audio engine, returns false if in 'silent mode' + + bool __cdecl Reset(_In_opt_ const WAVEFORMATEX* wfx = nullptr, _In_opt_z_ const wchar_t* deviceId = nullptr); + // Reset audio engine from critical error/silent mode using a new device; can also 'migrate' the graph + // Returns true if succesfully reset, false if in 'silent mode' due to no default device + // Note: One shots are lost, all SoundEffectInstances are in the STOPPED state after successful reset + + void __cdecl Suspend() noexcept; + void __cdecl Resume(); + // Suspend/resumes audio processing (i.e. global pause/resume) + + float __cdecl GetMasterVolume() const noexcept; + void __cdecl SetMasterVolume(float volume); + // Master volume property for all sounds + + void __cdecl SetReverb(AUDIO_ENGINE_REVERB reverb); + void __cdecl SetReverb(_In_opt_ const XAUDIO2FX_REVERB_PARAMETERS* native); + // Sets environmental reverb for 3D positional audio (if active) + + void __cdecl SetMasteringLimit(int release, int loudness); + // Sets the mastering volume limiter properties (if active) + + AudioStatistics __cdecl GetStatistics() const; + // Gathers audio engine statistics + + WAVEFORMATEXTENSIBLE __cdecl GetOutputFormat() const noexcept; + // Returns the format consumed by the mastering voice (which is the same as the device output if defaults are used) + + uint32_t __cdecl GetChannelMask() const noexcept; + // Returns the output channel mask + + unsigned int __cdecl GetOutputChannels() const noexcept; + // Returns the number of output channels + + bool __cdecl IsAudioDevicePresent() const noexcept; + // Returns true if the audio graph is operating normally, false if in 'silent mode' + + bool __cdecl IsCriticalError() const noexcept; + // Returns true if the audio graph is halted due to a critical error (which also places the engine into 'silent mode') + + // Voice pool management. + void __cdecl SetDefaultSampleRate(int sampleRate); + // Sample rate for voices in the reuse pool (defaults to 44100) + + void __cdecl SetMaxVoicePool(size_t maxOneShots, size_t maxInstances); + // Maximum number of voices to allocate for one-shots and instances + // Note: one-shots over this limit are ignored; too many instance voices throws an exception + + void __cdecl TrimVoicePool(); + // Releases any currently unused voices + + // Internal-use functions + void __cdecl AllocateVoice(_In_ const WAVEFORMATEX* wfx, + SOUND_EFFECT_INSTANCE_FLAGS flags, bool oneshot, _Outptr_result_maybenull_ IXAudio2SourceVoice** voice); + + void __cdecl DestroyVoice(_In_ IXAudio2SourceVoice* voice) noexcept; + // Should only be called for instance voices, not one-shots + + void __cdecl RegisterNotify(_In_ IVoiceNotify* notify, bool usesUpdate); + void __cdecl UnregisterNotify(_In_ IVoiceNotify* notify, bool usesOneShots, bool usesUpdate); + + // XAudio2 interface access + IXAudio2* __cdecl GetInterface() const noexcept; + IXAudio2MasteringVoice* __cdecl GetMasterVoice() const noexcept; + IXAudio2SubmixVoice* __cdecl GetReverbVoice() const noexcept; + X3DAUDIO_HANDLE& __cdecl Get3DHandle() const noexcept; + + // Static functions + struct RendererDetail + { + std::wstring deviceId; + std::wstring description; + }; + + static std::vector __cdecl GetRendererDetails(); + // Returns a list of valid audio endpoint devices + + private: + // Private implementation. + class Impl; + std::unique_ptr pImpl; + }; + + + //---------------------------------------------------------------------------------- + class WaveBank + { + public: + WaveBank(_In_ AudioEngine* engine, _In_z_ const wchar_t* wbFileName); + + WaveBank(WaveBank&& moveFrom) noexcept; + WaveBank& operator= (WaveBank&& moveFrom) noexcept; + + WaveBank(WaveBank const&) = delete; + WaveBank& operator= (WaveBank const&) = delete; + + virtual ~WaveBank(); + + void __cdecl Play(unsigned int index); + void __cdecl Play(unsigned int index, float volume, float pitch, float pan); + + void __cdecl Play(_In_z_ const char* name); + void __cdecl Play(_In_z_ const char* name, float volume, float pitch, float pan); + + std::unique_ptr __cdecl CreateInstance(unsigned int index, + SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default); + std::unique_ptr __cdecl CreateInstance(_In_z_ const char* name, + SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default); + + std::unique_ptr __cdecl CreateStreamInstance(unsigned int index, + SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default); + std::unique_ptr __cdecl CreateStreamInstance(_In_z_ const char* name, + SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default); + + bool __cdecl IsPrepared() const noexcept; + bool __cdecl IsInUse() const noexcept; + bool __cdecl IsStreamingBank() const noexcept; + + size_t __cdecl GetSampleSizeInBytes(unsigned int index) const noexcept; + // Returns size of wave audio data + + size_t __cdecl GetSampleDuration(unsigned int index) const noexcept; + // Returns the duration in samples + + size_t __cdecl GetSampleDurationMS(unsigned int index) const noexcept; + // Returns the duration in milliseconds + + const WAVEFORMATEX* __cdecl GetFormat(unsigned int index, _Out_writes_bytes_(maxsize) WAVEFORMATEX* wfx, size_t maxsize) const noexcept; + + int __cdecl Find(_In_z_ const char* name) const; + +#ifdef USING_XAUDIO2_9 + bool __cdecl FillSubmitBuffer(unsigned int index, _Out_ XAUDIO2_BUFFER& buffer, _Out_ XAUDIO2_BUFFER_WMA& wmaBuffer) const; +#else + void __cdecl FillSubmitBuffer(unsigned int index, _Out_ XAUDIO2_BUFFER& buffer) const; +#endif + + void __cdecl UnregisterInstance(_In_ IVoiceNotify* instance); + + HANDLE __cdecl GetAsyncHandle() const noexcept; + + bool __cdecl GetPrivateData(unsigned int index, _Out_writes_bytes_(datasize) void* data, size_t datasize); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + //---------------------------------------------------------------------------------- + class SoundEffect + { + public: + SoundEffect(_In_ AudioEngine* engine, _In_z_ const wchar_t* waveFileName); + + SoundEffect(_In_ AudioEngine* engine, _Inout_ std::unique_ptr& wavData, + _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes); + + SoundEffect(_In_ AudioEngine* engine, _Inout_ std::unique_ptr& wavData, + _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes, + uint32_t loopStart, uint32_t loopLength); + +#ifdef USING_XAUDIO2_9 + + SoundEffect(_In_ AudioEngine* engine, _Inout_ std::unique_ptr& wavData, + _In_ const WAVEFORMATEX* wfx, _In_reads_bytes_(audioBytes) const uint8_t* startAudio, size_t audioBytes, + _In_reads_(seekCount) const uint32_t* seekTable, size_t seekCount); + +#endif + + SoundEffect(SoundEffect&& moveFrom) noexcept; + SoundEffect& operator= (SoundEffect&& moveFrom) noexcept; + + SoundEffect(SoundEffect const&) = delete; + SoundEffect& operator= (SoundEffect const&) = delete; + + virtual ~SoundEffect(); + + void __cdecl Play(); + void __cdecl Play(float volume, float pitch, float pan); + + std::unique_ptr __cdecl CreateInstance(SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default); + + bool __cdecl IsInUse() const noexcept; + + size_t __cdecl GetSampleSizeInBytes() const noexcept; + // Returns size of wave audio data + + size_t __cdecl GetSampleDuration() const noexcept; + // Returns the duration in samples + + size_t __cdecl GetSampleDurationMS() const noexcept; + // Returns the duration in milliseconds + + const WAVEFORMATEX* __cdecl GetFormat() const noexcept; + +#ifdef USING_XAUDIO2_9 + bool __cdecl FillSubmitBuffer(_Out_ XAUDIO2_BUFFER& buffer, _Out_ XAUDIO2_BUFFER_WMA& wmaBuffer) const; +#else + void __cdecl FillSubmitBuffer(_Out_ XAUDIO2_BUFFER& buffer) const; +#endif + + void __cdecl UnregisterInstance(_In_ IVoiceNotify* instance); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + //---------------------------------------------------------------------------------- + struct AudioListener : public X3DAUDIO_LISTENER + { + AudioListener() noexcept + { + memset(this, 0, sizeof(X3DAUDIO_LISTENER)); + + OrientFront.z = -1.f; + + OrientTop.y = 1.f; + } + + void XM_CALLCONV SetPosition(FXMVECTOR v) noexcept + { + XMStoreFloat3(reinterpret_cast(&Position), v); + } + void __cdecl SetPosition(const XMFLOAT3& pos) noexcept + { + Position.x = pos.x; + Position.y = pos.y; + Position.z = pos.z; + } + + void XM_CALLCONV SetVelocity(FXMVECTOR v) noexcept + { + XMStoreFloat3(reinterpret_cast(&Velocity), v); + } + void __cdecl SetVelocity(const XMFLOAT3& vel) noexcept + { + Velocity.x = vel.x; + Velocity.y = vel.y; + Velocity.z = vel.z; + } + + void XM_CALLCONV SetOrientation(FXMVECTOR forward, FXMVECTOR up) noexcept + { + XMStoreFloat3(reinterpret_cast(&OrientFront), forward); + XMStoreFloat3(reinterpret_cast(&OrientTop), up); + } + void __cdecl SetOrientation(const XMFLOAT3& forward, const XMFLOAT3& up) noexcept + { + OrientFront.x = forward.x; OrientTop.x = up.x; + OrientFront.y = forward.y; OrientTop.y = up.y; + OrientFront.z = forward.z; OrientTop.z = up.z; + } + + void XM_CALLCONV SetOrientationFromQuaternion(FXMVECTOR quat) noexcept + { + XMVECTOR forward = XMVector3Rotate(g_XMIdentityR2, quat); + XMStoreFloat3(reinterpret_cast(&OrientFront), forward); + + XMVECTOR up = XMVector3Rotate(g_XMIdentityR1, quat); + XMStoreFloat3(reinterpret_cast(&OrientTop), up); + } + + void XM_CALLCONV Update(FXMVECTOR newPos, XMVECTOR upDir, float dt) noexcept + // Updates velocity and orientation by tracking changes in position over time... + { + if (dt > 0.f) + { + XMVECTOR lastPos = XMLoadFloat3(reinterpret_cast(&Position)); + + XMVECTOR vDelta = XMVectorSubtract(newPos, lastPos); + XMVECTOR vt = XMVectorReplicate(dt); + XMVECTOR v = XMVectorDivide(vDelta, vt); + XMStoreFloat3(reinterpret_cast(&Velocity), v); + + vDelta = XMVector3Normalize(vDelta); + XMStoreFloat3(reinterpret_cast(&OrientFront), vDelta); + + v = XMVector3Cross(upDir, vDelta); + v = XMVector3Normalize(v); + + v = XMVector3Cross(vDelta, v); + v = XMVector3Normalize(v); + XMStoreFloat3(reinterpret_cast(&OrientTop), v); + + XMStoreFloat3(reinterpret_cast(&Position), newPos); + } + } + }; + + + //---------------------------------------------------------------------------------- + struct AudioEmitter : public X3DAUDIO_EMITTER + { + float EmitterAzimuths[XAUDIO2_MAX_AUDIO_CHANNELS]; + + AudioEmitter() noexcept : + EmitterAzimuths{} + { + memset(this, 0, sizeof(X3DAUDIO_EMITTER)); + + OrientFront.z = -1.f; + + OrientTop.y = + ChannelRadius = + CurveDistanceScaler = + DopplerScaler = 1.f; + + ChannelCount = 1; + pChannelAzimuths = EmitterAzimuths; + + InnerRadiusAngle = X3DAUDIO_PI / 4.0f; + } + + void XM_CALLCONV SetPosition(FXMVECTOR v) noexcept + { + XMStoreFloat3(reinterpret_cast(&Position), v); + } + void __cdecl SetPosition(const XMFLOAT3& pos) noexcept + { + Position.x = pos.x; + Position.y = pos.y; + Position.z = pos.z; + } + + void XM_CALLCONV SetVelocity(FXMVECTOR v) noexcept + { + XMStoreFloat3(reinterpret_cast(&Velocity), v); + } + void __cdecl SetVelocity(const XMFLOAT3& vel) noexcept + { + Velocity.x = vel.x; + Velocity.y = vel.y; + Velocity.z = vel.z; + } + + void XM_CALLCONV SetOrientation(FXMVECTOR forward, FXMVECTOR up) noexcept + { + XMStoreFloat3(reinterpret_cast(&OrientFront), forward); + XMStoreFloat3(reinterpret_cast(&OrientTop), up); + } + void __cdecl SetOrientation(const XMFLOAT3& forward, const XMFLOAT3& up) noexcept + { + OrientFront.x = forward.x; OrientTop.x = up.x; + OrientFront.y = forward.y; OrientTop.y = up.y; + OrientFront.z = forward.z; OrientTop.z = up.z; + } + + void XM_CALLCONV SetOrientationFromQuaternion(FXMVECTOR quat) noexcept + { + XMVECTOR forward = XMVector3Rotate(g_XMIdentityR2, quat); + XMStoreFloat3(reinterpret_cast(&OrientFront), forward); + + XMVECTOR up = XMVector3Rotate(g_XMIdentityR1, quat); + XMStoreFloat3(reinterpret_cast(&OrientTop), up); + } + + void XM_CALLCONV Update(FXMVECTOR newPos, XMVECTOR upDir, float dt) noexcept + // Updates velocity and orientation by tracking changes in position over time... + { + if (dt > 0.f) + { + XMVECTOR lastPos = XMLoadFloat3(reinterpret_cast(&Position)); + + XMVECTOR vDelta = XMVectorSubtract(newPos, lastPos); + XMVECTOR vt = XMVectorReplicate(dt); + XMVECTOR v = XMVectorDivide(vDelta, vt); + XMStoreFloat3(reinterpret_cast(&Velocity), v); + + vDelta = XMVector3Normalize(vDelta); + XMStoreFloat3(reinterpret_cast(&OrientFront), vDelta); + + v = XMVector3Cross(upDir, vDelta); + v = XMVector3Normalize(v); + + v = XMVector3Cross(vDelta, v); + v = XMVector3Normalize(v); + XMStoreFloat3(reinterpret_cast(&OrientTop), v); + + XMStoreFloat3(reinterpret_cast(&Position), newPos); + } + } + }; + + + //---------------------------------------------------------------------------------- + class SoundEffectInstance + { + public: + SoundEffectInstance(SoundEffectInstance&& moveFrom) noexcept; + SoundEffectInstance& operator= (SoundEffectInstance&& moveFrom) noexcept; + + SoundEffectInstance(SoundEffectInstance const&) = delete; + SoundEffectInstance& operator= (SoundEffectInstance const&) = delete; + + virtual ~SoundEffectInstance(); + + void __cdecl Play(bool loop = false); + void __cdecl Stop(bool immediate = true) noexcept; + void __cdecl Pause() noexcept; + void __cdecl Resume(); + + void __cdecl SetVolume(float volume); + void __cdecl SetPitch(float pitch); + void __cdecl SetPan(float pan); + + void __cdecl Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords = true); + + bool __cdecl IsLooped() const noexcept; + + SoundState __cdecl GetState() noexcept; + + IVoiceNotify* __cdecl GetVoiceNotify() const noexcept; + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Private constructors + SoundEffectInstance(_In_ AudioEngine* engine, _In_ SoundEffect* effect, SOUND_EFFECT_INSTANCE_FLAGS flags); + SoundEffectInstance(_In_ AudioEngine* engine, _In_ WaveBank* effect, unsigned int index, SOUND_EFFECT_INSTANCE_FLAGS flags); + + friend std::unique_ptr __cdecl SoundEffect::CreateInstance(SOUND_EFFECT_INSTANCE_FLAGS); + friend std::unique_ptr __cdecl WaveBank::CreateInstance(unsigned int, SOUND_EFFECT_INSTANCE_FLAGS); + }; + + + //---------------------------------------------------------------------------------- + class SoundStreamInstance + { + public: + SoundStreamInstance(SoundStreamInstance&& moveFrom) noexcept; + SoundStreamInstance& operator= (SoundStreamInstance&& moveFrom) noexcept; + + SoundStreamInstance(SoundStreamInstance const&) = delete; + SoundStreamInstance& operator= (SoundStreamInstance const&) = delete; + + virtual ~SoundStreamInstance(); + + void __cdecl Play(bool loop = false); + void __cdecl Stop(bool immediate = true) noexcept; + void __cdecl Pause() noexcept; + void __cdecl Resume(); + + void __cdecl SetVolume(float volume); + void __cdecl SetPitch(float pitch); + void __cdecl SetPan(float pan); + + void __cdecl Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords = true); + + bool __cdecl IsLooped() const noexcept; + + SoundState __cdecl GetState() noexcept; + + IVoiceNotify* __cdecl GetVoiceNotify() const noexcept; + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Private constructors + SoundStreamInstance(_In_ AudioEngine* engine, _In_ WaveBank* effect, unsigned int index, SOUND_EFFECT_INSTANCE_FLAGS flags); + + friend std::unique_ptr __cdecl WaveBank::CreateStreamInstance(unsigned int, SOUND_EFFECT_INSTANCE_FLAGS); + }; + + + //---------------------------------------------------------------------------------- + class DynamicSoundEffectInstance + { + public: + DynamicSoundEffectInstance(_In_ AudioEngine* engine, + _In_opt_ std::function bufferNeeded, + int sampleRate, int channels, int sampleBits = 16, + SOUND_EFFECT_INSTANCE_FLAGS flags = SoundEffectInstance_Default); + DynamicSoundEffectInstance(DynamicSoundEffectInstance&& moveFrom) noexcept; + DynamicSoundEffectInstance& operator= (DynamicSoundEffectInstance&& moveFrom) noexcept; + + DynamicSoundEffectInstance(DynamicSoundEffectInstance const&) = delete; + DynamicSoundEffectInstance& operator= (DynamicSoundEffectInstance const&) = delete; + + virtual ~DynamicSoundEffectInstance(); + + void __cdecl Play(); + void __cdecl Stop(bool immediate = true) noexcept; + void __cdecl Pause() noexcept; + void __cdecl Resume(); + + void __cdecl SetVolume(float volume); + void __cdecl SetPitch(float pitch); + void __cdecl SetPan(float pan); + + void __cdecl Apply3D(const AudioListener& listener, const AudioEmitter& emitter, bool rhcoords = true); + + void __cdecl SubmitBuffer(_In_reads_bytes_(audioBytes) const uint8_t* pAudioData, size_t audioBytes); + void __cdecl SubmitBuffer(_In_reads_bytes_(audioBytes) const uint8_t* pAudioData, uint32_t offset, size_t audioBytes); + + SoundState __cdecl GetState() noexcept; + + size_t __cdecl GetSampleDuration(size_t bytes) const noexcept; + // Returns duration in samples of a buffer of a given size + + size_t __cdecl GetSampleDurationMS(size_t bytes) const noexcept; + // Returns duration in milliseconds of a buffer of a given size + + size_t __cdecl GetSampleSizeInBytes(uint64_t duration) const noexcept; + // Returns size of a buffer for a duration given in milliseconds + + int __cdecl GetPendingBufferCount() const noexcept; + + const WAVEFORMATEX* __cdecl GetFormat() const noexcept; + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" +#endif + + DEFINE_ENUM_FLAG_OPERATORS(AUDIO_ENGINE_FLAGS); + DEFINE_ENUM_FLAG_OPERATORS(SOUND_EFFECT_INSTANCE_FLAGS); + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} diff --git a/Sdk/External/DirectXTK/Inc/BufferHelpers.h b/Sdk/External/DirectXTK/Inc/BufferHelpers.h new file mode 100644 index 0000000..5e07ae0 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/BufferHelpers.h @@ -0,0 +1,160 @@ +//-------------------------------------------------------------------------------------- +// File: BufferHelpers.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#include "GraphicsMemory.h" +#else +#include +#endif + +#include + +#include + + +namespace DirectX +{ + // Helpers for creating initialized Direct3D buffer resources. + HRESULT __cdecl CreateStaticBuffer(_In_ ID3D11Device* device, + _In_reads_bytes_(count* stride) const void* ptr, + size_t count, + size_t stride, + unsigned int bindFlags, + _COM_Outptr_ ID3D11Buffer** pBuffer) noexcept; + + template + HRESULT CreateStaticBuffer(_In_ ID3D11Device* device, + _In_reads_(count) T const* data, + size_t count, + unsigned int bindFlags, + _COM_Outptr_ ID3D11Buffer** pBuffer) noexcept + { + return CreateStaticBuffer(device, data, count, sizeof(T), bindFlags, pBuffer); + } + + template + HRESULT CreateStaticBuffer(_In_ ID3D11Device* device, + T const& data, + unsigned int bindFlags, + _COM_Outptr_ ID3D11Buffer** pBuffer) noexcept + { + return CreateStaticBuffer(device, data.data(), data.size(), sizeof(typename T::value_type), bindFlags, pBuffer); + } + + // Helpers for creating texture from memory arrays. + HRESULT __cdecl CreateTextureFromMemory(_In_ ID3D11Device* device, + size_t width, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + _COM_Outptr_opt_ ID3D11Texture1D** texture, + _COM_Outptr_opt_ ID3D11ShaderResourceView** textureView, + unsigned int bindFlags = D3D11_BIND_SHADER_RESOURCE) noexcept; + + HRESULT __cdecl CreateTextureFromMemory(_In_ ID3D11Device* device, + size_t width, size_t height, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + _COM_Outptr_opt_ ID3D11Texture2D** texture, + _COM_Outptr_opt_ ID3D11ShaderResourceView** textureView, + unsigned int bindFlags = D3D11_BIND_SHADER_RESOURCE) noexcept; + + HRESULT __cdecl CreateTextureFromMemory( +#if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDeviceX, + _In_ ID3D11DeviceContextX* d3dContextX, +#else + _In_ ID3D11Device* device, + _In_ ID3D11DeviceContext* d3dContext, +#endif + size_t width, size_t height, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + _COM_Outptr_opt_ ID3D11Texture2D** texture, + _COM_Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept; + + HRESULT __cdecl CreateTextureFromMemory(_In_ ID3D11Device* device, + size_t width, size_t height, size_t depth, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + _COM_Outptr_opt_ ID3D11Texture3D** texture, + _COM_Outptr_opt_ ID3D11ShaderResourceView** textureView, + unsigned int bindFlags = D3D11_BIND_SHADER_RESOURCE) noexcept; + + // Strongly typed wrapper around a Direct3D constant buffer. + namespace Internal + { + // Base class, not to be used directly: clients should access this via the derived PrimitiveBatch. + class ConstantBufferBase + { + protected: + void __cdecl CreateBuffer(_In_ ID3D11Device* device, size_t bytes, _Outptr_ ID3D11Buffer** pBuffer); + }; + } + + template + class ConstantBuffer : public Internal::ConstantBufferBase + { + public: + // Constructor. + ConstantBuffer() = default; + explicit ConstantBuffer(_In_ ID3D11Device* device) noexcept(false) + { + CreateBuffer(device, sizeof(T), mConstantBuffer.GetAddressOf()); + } + + ConstantBuffer(ConstantBuffer&&) = default; + ConstantBuffer& operator= (ConstantBuffer&&) = default; + + ConstantBuffer(ConstantBuffer const&) = delete; + ConstantBuffer& operator= (ConstantBuffer const&) = delete; + + void Create(_In_ ID3D11Device* device) + { + CreateBuffer(device, sizeof(T), mConstantBuffer.ReleaseAndGetAddressOf()); + } + + // Writes new data into the constant buffer. +#if defined(_XBOX_ONE) && defined(_TITLE) + void __cdecl SetData(_In_ ID3D11DeviceContext* deviceContext, T const& value, void** grfxMemory) + { + assert(grfxMemory != nullptr); + + void* ptr = GraphicsMemory::Get().Allocate(deviceContext, sizeof(T), 64); + assert(ptr != nullptr); + + *(T*)ptr = value; + + *grfxMemory = ptr; + } +#else + + void __cdecl SetData(_In_ ID3D11DeviceContext* deviceContext, T const& value) noexcept + { + assert(mConstantBuffer); + + D3D11_MAPPED_SUBRESOURCE mappedResource; + if (SUCCEEDED(deviceContext->Map(mConstantBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource))) + { + *static_cast(mappedResource.pData) = value; + + deviceContext->Unmap(mConstantBuffer.Get(), 0); + } + } +#endif // _XBOX_ONE && _TITLE + + // Looks up the underlying D3D constant buffer. + ID3D11Buffer* GetBuffer() const noexcept { return mConstantBuffer.Get(); } + + private: + Microsoft::WRL::ComPtr mConstantBuffer; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/CommonStates.h b/Sdk/External/DirectXTK/Inc/CommonStates.h new file mode 100644 index 0000000..c1208dd --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/CommonStates.h @@ -0,0 +1,66 @@ +//-------------------------------------------------------------------------------------- +// File: CommonStates.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include + + +namespace DirectX +{ + class CommonStates + { + public: + explicit CommonStates(_In_ ID3D11Device* device); + CommonStates(CommonStates&& moveFrom) noexcept; + CommonStates& operator= (CommonStates&& moveFrom) noexcept; + + CommonStates(CommonStates const&) = delete; + CommonStates& operator= (CommonStates const&) = delete; + + virtual ~CommonStates(); + + // Blend states. + ID3D11BlendState* __cdecl Opaque() const; + ID3D11BlendState* __cdecl AlphaBlend() const; + ID3D11BlendState* __cdecl Additive() const; + ID3D11BlendState* __cdecl NonPremultiplied() const; + + // Depth stencil states. + ID3D11DepthStencilState* __cdecl DepthNone() const; + ID3D11DepthStencilState* __cdecl DepthDefault() const; + ID3D11DepthStencilState* __cdecl DepthRead() const; + + // Rasterizer states. + ID3D11RasterizerState* __cdecl CullNone() const; + ID3D11RasterizerState* __cdecl CullClockwise() const; + ID3D11RasterizerState* __cdecl CullCounterClockwise() const; + ID3D11RasterizerState* __cdecl Wireframe() const; + + // Sampler states. + ID3D11SamplerState* __cdecl PointWrap() const; + ID3D11SamplerState* __cdecl PointClamp() const; + ID3D11SamplerState* __cdecl LinearWrap() const; + ID3D11SamplerState* __cdecl LinearClamp() const; + ID3D11SamplerState* __cdecl AnisotropicWrap() const; + ID3D11SamplerState* __cdecl AnisotropicClamp() const; + + private: + // Private implementation. + class Impl; + + std::shared_ptr pImpl; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/DDSTextureLoader.h b/Sdk/External/DirectXTK/Inc/DDSTextureLoader.h new file mode 100644 index 0000000..4d39c22 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/DDSTextureLoader.h @@ -0,0 +1,157 @@ +//-------------------------------------------------------------------------------------- +// File: DDSTextureLoader.h +// +// Functions for loading a DDS texture and creating a Direct3D runtime resource for it +// +// Note these functions are useful as a light-weight runtime loader for DDS files. For +// a full-featured DDS file reader, writer, and texture processing pipeline see +// the 'Texconv' sample and the 'DirectXTex' library. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include + + +namespace DirectX +{ +#ifndef DDS_ALPHA_MODE_DEFINED +#define DDS_ALPHA_MODE_DEFINED + enum DDS_ALPHA_MODE : uint32_t + { + DDS_ALPHA_MODE_UNKNOWN = 0, + DDS_ALPHA_MODE_STRAIGHT = 1, + DDS_ALPHA_MODE_PREMULTIPLIED = 2, + DDS_ALPHA_MODE_OPAQUE = 3, + DDS_ALPHA_MODE_CUSTOM = 4, + }; +#endif + + // Standard version + HRESULT __cdecl CreateDDSTextureFromMemory( + _In_ ID3D11Device* d3dDevice, + _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData, + _In_ size_t ddsDataSize, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + HRESULT __cdecl CreateDDSTextureFromFile( + _In_ ID3D11Device* d3dDevice, + _In_z_ const wchar_t* szFileName, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + // Standard version with optional auto-gen mipmap support + HRESULT __cdecl CreateDDSTextureFromMemory( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData, + _In_ size_t ddsDataSize, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + HRESULT __cdecl CreateDDSTextureFromFile( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_z_ const wchar_t* szFileName, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + // Extended version + HRESULT __cdecl CreateDDSTextureFromMemoryEx( + _In_ ID3D11Device* d3dDevice, + _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData, + _In_ size_t ddsDataSize, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ bool forceSRGB, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + HRESULT __cdecl CreateDDSTextureFromFileEx( + _In_ ID3D11Device* d3dDevice, + _In_z_ const wchar_t* szFileName, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ bool forceSRGB, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + // Extended version with optional auto-gen mipmap support + HRESULT __cdecl CreateDDSTextureFromMemoryEx( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData, + _In_ size_t ddsDataSize, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ bool forceSRGB, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; + + HRESULT __cdecl CreateDDSTextureFromFileEx( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_z_ const wchar_t* szFileName, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ bool forceSRGB, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr) noexcept; +} diff --git a/Sdk/External/DirectXTK/Inc/DirectXHelpers.h b/Sdk/External/DirectXTK/Inc/DirectXHelpers.h new file mode 100644 index 0000000..3191948 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/DirectXHelpers.h @@ -0,0 +1,197 @@ +//-------------------------------------------------------------------------------------- +// File: DirectXHelpers.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) ) +#if !defined(_XBOX_ONE) || !defined(_TITLE) +#pragma comment(lib,"dxguid.lib") +#endif +#endif + +#ifndef IID_GRAPHICS_PPV_ARGS +#define IID_GRAPHICS_PPV_ARGS(x) IID_PPV_ARGS(x) +#endif + +#include +#include + +#include + +// +// The core Direct3D headers provide the following helper C++ classes +// CD3D11_RECT +// CD3D11_BOX +// CD3D11_DEPTH_STENCIL_DESC +// CD3D11_BLEND_DESC, CD3D11_BLEND_DESC1 +// CD3D11_RASTERIZER_DESC, CD3D11_RASTERIZER_DESC1 +// CD3D11_BUFFER_DESC +// CD3D11_TEXTURE1D_DESC +// CD3D11_TEXTURE2D_DESC +// CD3D11_TEXTURE3D_DESC +// CD3D11_SHADER_RESOURCE_VIEW_DESC +// CD3D11_RENDER_TARGET_VIEW_DESC +// CD3D11_VIEWPORT +// CD3D11_DEPTH_STENCIL_VIEW_DESC +// CD3D11_UNORDERED_ACCESS_VIEW_DESC +// CD3D11_SAMPLER_DESC +// CD3D11_QUERY_DESC +// CD3D11_COUNTER_DESC +// + + +namespace DirectX +{ + class IEffect; + + // simliar to std::lock_guard for exception-safe Direct3D resource locking + class MapGuard : public D3D11_MAPPED_SUBRESOURCE + { + public: + MapGuard(_In_ ID3D11DeviceContext* context, + _In_ ID3D11Resource *resource, + _In_ unsigned int subresource, + _In_ D3D11_MAP mapType, + _In_ unsigned int mapFlags) noexcept(false) + : mContext(context), mResource(resource), mSubresource(subresource) + { + HRESULT hr = mContext->Map(resource, subresource, mapType, mapFlags, this); + if (FAILED(hr)) + { + throw std::exception(); + } + } + + MapGuard(MapGuard&&) = default; + MapGuard& operator= (MapGuard&&) = default; + + MapGuard(MapGuard const&) = delete; + MapGuard& operator= (MapGuard const&) = delete; + + ~MapGuard() + { + mContext->Unmap(mResource, mSubresource); + } + + uint8_t* get() const noexcept + { + return static_cast(pData); + } + uint8_t* get(size_t slice) const noexcept + { + return static_cast(pData) + (slice * DepthPitch); + } + + uint8_t* scanline(size_t row) const noexcept + { + return static_cast(pData) + (row * RowPitch); + } + uint8_t* scanline(size_t slice, size_t row) const noexcept + { + return static_cast(pData) + (slice * DepthPitch) + (row * RowPitch); + } + + private: + ID3D11DeviceContext* mContext; + ID3D11Resource* mResource; + unsigned int mSubresource; + }; + + + // Helper sets a D3D resource name string (used by PIX and debug layer leak reporting). + template + inline void SetDebugObjectName(_In_ ID3D11DeviceChild* resource, _In_z_ const char(&name)[TNameLength]) noexcept + { +#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) ) +#if defined(_XBOX_ONE) && defined(_TITLE) + wchar_t wname[MAX_PATH]; + int result = MultiByteToWideChar(CP_UTF8, 0, name, TNameLength, wname, MAX_PATH); + if (result > 0) + { + resource->SetName(wname); + } +#else + resource->SetPrivateData(WKPDID_D3DDebugObjectName, TNameLength - 1, name); +#endif +#else + UNREFERENCED_PARAMETER(resource); + UNREFERENCED_PARAMETER(name); +#endif + } + + template + inline void SetDebugObjectName(_In_ ID3D11DeviceChild* resource, _In_z_ const wchar_t(&name)[TNameLength]) + { +#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) ) +#if defined(_XBOX_ONE) && defined(_TITLE) + resource->SetName(name); +#else + char aname[MAX_PATH]; + int result = WideCharToMultiByte(CP_UTF8, 0, name, TNameLength, aname, MAX_PATH, nullptr, nullptr); + if (result > 0) + { + resource->SetPrivateData(WKPDID_D3DDebugObjectName, TNameLength - 1, aname); + } +#endif +#else + UNREFERENCED_PARAMETER(resource); + UNREFERENCED_PARAMETER(name); +#endif + } + + // Helper to check for power-of-2 + template + constexpr bool IsPowerOf2(T x) noexcept { return ((x != 0) && !(x & (x - 1))); } + + // Helpers for aligning values by a power of 2 + template + inline T AlignDown(T size, size_t alignment) noexcept + { + if (alignment > 0) + { + assert(((alignment - 1) & alignment) == 0); + auto mask = static_cast(alignment - 1); + return size & ~mask; + } + return size; + } + + template + inline T AlignUp(T size, size_t alignment) noexcept + { + if (alignment > 0) + { + assert(((alignment - 1) & alignment) == 0); + auto mask = static_cast(alignment - 1); + return (size + mask) & ~mask; + } + return size; + } + + // Helper for creating a Direct3D input layout to match a shader from an IEffect + HRESULT __cdecl CreateInputLayoutFromEffect(_In_ ID3D11Device* device, + _In_ IEffect* effect, + _In_reads_(count) const D3D11_INPUT_ELEMENT_DESC* desc, + size_t count, + _COM_Outptr_ ID3D11InputLayout** pInputLayout) noexcept; + + template + HRESULT CreateInputLayoutFromEffect(_In_ ID3D11Device* device, + _In_ IEffect* effect, + _COM_Outptr_ ID3D11InputLayout** pInputLayout) noexcept + { + return CreateInputLayoutFromEffect(device, effect, T::InputElements, T::InputElementCount, pInputLayout); + } +} diff --git a/Sdk/External/DirectXTK/Inc/Effects.h b/Sdk/External/DirectXTK/Inc/Effects.h new file mode 100644 index 0000000..b224a03 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/Effects.h @@ -0,0 +1,938 @@ +//-------------------------------------------------------------------------------------- +// File: Effects.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include +#include + + +namespace DirectX +{ + //---------------------------------------------------------------------------------- + // Abstract interface representing any effect which can be applied onto a D3D device context. + class IEffect + { + public: + virtual ~IEffect() = default; + + IEffect(const IEffect&) = delete; + IEffect& operator=(const IEffect&) = delete; + + IEffect(IEffect&&) = delete; + IEffect& operator=(IEffect&&) = delete; + + virtual void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) = 0; + + virtual void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) = 0; + + protected: + IEffect() = default; + }; + + + // Abstract interface for effects with world, view, and projection matrices. + class IEffectMatrices + { + public: + virtual ~IEffectMatrices() = default; + + IEffectMatrices(const IEffectMatrices&) = delete; + IEffectMatrices& operator=(const IEffectMatrices&) = delete; + + IEffectMatrices(IEffectMatrices&&) = delete; + IEffectMatrices& operator=(IEffectMatrices&&) = delete; + + virtual void XM_CALLCONV SetWorld(FXMMATRIX value) = 0; + virtual void XM_CALLCONV SetView(FXMMATRIX value) = 0; + virtual void XM_CALLCONV SetProjection(FXMMATRIX value) = 0; + virtual void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection); + + protected: + IEffectMatrices() = default; + }; + + + // Abstract interface for effects which support directional lighting. + class IEffectLights + { + public: + virtual ~IEffectLights() = default; + + IEffectLights(const IEffectLights&) = delete; + IEffectLights& operator=(const IEffectLights&) = delete; + + IEffectLights(IEffectLights&&) = delete; + IEffectLights& operator=(IEffectLights&&) = delete; + + virtual void __cdecl SetLightingEnabled(bool value) = 0; + virtual void __cdecl SetPerPixelLighting(bool value) = 0; + virtual void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) = 0; + + virtual void __cdecl SetLightEnabled(int whichLight, bool value) = 0; + virtual void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) = 0; + virtual void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) = 0; + virtual void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) = 0; + + virtual void __cdecl EnableDefaultLighting() = 0; + + static constexpr int MaxDirectionalLights = 3; + + protected: + IEffectLights() = default; + }; + + + // Abstract interface for effects which support fog. + class IEffectFog + { + public: + virtual ~IEffectFog() = default; + + IEffectFog(const IEffectFog&) = delete; + IEffectFog& operator=(const IEffectFog&) = delete; + + IEffectFog(IEffectFog&&) = delete; + IEffectFog& operator=(IEffectFog&&) = delete; + + virtual void __cdecl SetFogEnabled(bool value) = 0; + virtual void __cdecl SetFogStart(float value) = 0; + virtual void __cdecl SetFogEnd(float value) = 0; + virtual void XM_CALLCONV SetFogColor(FXMVECTOR value) = 0; + + protected: + IEffectFog() = default; + }; + + + // Abstract interface for effects which support skinning + class IEffectSkinning + { + public: + virtual ~IEffectSkinning() = default; + + IEffectSkinning(const IEffectSkinning&) = delete; + IEffectSkinning& operator=(const IEffectSkinning&) = delete; + + IEffectSkinning(IEffectSkinning&&) = delete; + IEffectSkinning& operator=(IEffectSkinning&&) = delete; + + virtual void __cdecl SetWeightsPerVertex(int value) = 0; + virtual void __cdecl SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) = 0; + virtual void __cdecl ResetBoneTransforms() = 0; + + static constexpr int MaxBones = 72; + + protected: + IEffectSkinning() = default; + }; + + //---------------------------------------------------------------------------------- + // Built-in shader supports optional texture mapping, vertex coloring, directional lighting, and fog. + class BasicEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog + { + public: + explicit BasicEffect(_In_ ID3D11Device* device); + BasicEffect(BasicEffect&& moveFrom) noexcept; + BasicEffect& operator= (BasicEffect&& moveFrom) noexcept; + + BasicEffect(BasicEffect const&) = delete; + BasicEffect& operator= (BasicEffect const&) = delete; + + ~BasicEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void XM_CALLCONV SetEmissiveColor(FXMVECTOR value); + void XM_CALLCONV SetSpecularColor(FXMVECTOR value); + void __cdecl SetSpecularPower(float value); + void __cdecl DisableSpecular(); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Light settings. + void __cdecl SetLightingEnabled(bool value) override; + void __cdecl SetPerPixelLighting(bool value) override; + void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override; + + void __cdecl SetLightEnabled(int whichLight, bool value) override; + void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override; + + void __cdecl EnableDefaultLighting() override; + + // Fog settings. + void __cdecl SetFogEnabled(bool value) override; + void __cdecl SetFogStart(float value) override; + void __cdecl SetFogEnd(float value) override; + void XM_CALLCONV SetFogColor(FXMVECTOR value) override; + + // Vertex color setting. + void __cdecl SetVertexColorEnabled(bool value); + + // Texture setting. + void __cdecl SetTextureEnabled(bool value); + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + + // Normal compression settings. + void __cdecl SetBiasedVertexNormals(bool value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + + // Built-in shader supports per-pixel alpha testing. + class AlphaTestEffect : public IEffect, public IEffectMatrices, public IEffectFog + { + public: + explicit AlphaTestEffect(_In_ ID3D11Device* device); + AlphaTestEffect(AlphaTestEffect&& moveFrom) noexcept; + AlphaTestEffect& operator= (AlphaTestEffect&& moveFrom) noexcept; + + AlphaTestEffect(AlphaTestEffect const&) = delete; + AlphaTestEffect& operator= (AlphaTestEffect const&) = delete; + + ~AlphaTestEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Fog settings. + void __cdecl SetFogEnabled(bool value) override; + void __cdecl SetFogStart(float value) override; + void __cdecl SetFogEnd(float value) override; + void XM_CALLCONV SetFogColor(FXMVECTOR value) override; + + // Vertex color setting. + void __cdecl SetVertexColorEnabled(bool value); + + // Texture setting. + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + + // Alpha test settings. + void __cdecl SetAlphaFunction(D3D11_COMPARISON_FUNC value); + void __cdecl SetReferenceAlpha(int value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + + // Built-in shader supports two layer multitexturing (eg. for lightmaps or detail textures). + class DualTextureEffect : public IEffect, public IEffectMatrices, public IEffectFog + { + public: + explicit DualTextureEffect(_In_ ID3D11Device* device); + DualTextureEffect(DualTextureEffect&& moveFrom) noexcept; + DualTextureEffect& operator= (DualTextureEffect&& moveFrom) noexcept; + + DualTextureEffect(DualTextureEffect const&) = delete; + DualTextureEffect& operator= (DualTextureEffect const&) = delete; + + ~DualTextureEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Fog settings. + void __cdecl SetFogEnabled(bool value) override; + void __cdecl SetFogStart(float value) override; + void __cdecl SetFogEnd(float value) override; + void XM_CALLCONV SetFogColor(FXMVECTOR value) override; + + // Vertex color setting. + void __cdecl SetVertexColorEnabled(bool value); + + // Texture settings. + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetTexture2(_In_opt_ ID3D11ShaderResourceView* value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + + // Built-in shader supports cubic environment mapping. + class EnvironmentMapEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog + { + public: + enum Mapping + { + Mapping_Cube = 0, // Cubic environment map + Mapping_Sphere, // Spherical environment map + Mapping_DualParabola, // Dual-parabola environment map (requires Feature Level 10.0) + }; + + explicit EnvironmentMapEffect(_In_ ID3D11Device* device); + EnvironmentMapEffect(EnvironmentMapEffect&& moveFrom) noexcept; + EnvironmentMapEffect& operator= (EnvironmentMapEffect&& moveFrom) noexcept; + + EnvironmentMapEffect(EnvironmentMapEffect const&) = delete; + EnvironmentMapEffect& operator= (EnvironmentMapEffect const&) = delete; + + ~EnvironmentMapEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void XM_CALLCONV SetEmissiveColor(FXMVECTOR value); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Light settings. + void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override; + + void __cdecl SetPerPixelLighting(bool value) override; + void __cdecl SetLightEnabled(int whichLight, bool value) override; + void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override; + + void __cdecl EnableDefaultLighting() override; + + // Fog settings. + void __cdecl SetFogEnabled(bool value) override; + void __cdecl SetFogStart(float value) override; + void __cdecl SetFogEnd(float value) override; + void XM_CALLCONV SetFogColor(FXMVECTOR value) override; + + // Texture setting. + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetEnvironmentMap(_In_opt_ ID3D11ShaderResourceView* value); + + // Environment map settings. + void __cdecl SetMode(Mapping mapping); + void __cdecl SetEnvironmentMapAmount(float value); + void XM_CALLCONV SetEnvironmentMapSpecular(FXMVECTOR value); + void __cdecl SetFresnelFactor(float value); + + // Normal compression settings. + void __cdecl SetBiasedVertexNormals(bool value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Unsupported interface methods. + void __cdecl SetLightingEnabled(bool value) override; + void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override; + }; + + + + // Built-in shader supports skinned animation. + class SkinnedEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog, public IEffectSkinning + { + public: + explicit SkinnedEffect(_In_ ID3D11Device* device); + SkinnedEffect(SkinnedEffect&& moveFrom) noexcept; + SkinnedEffect& operator= (SkinnedEffect&& moveFrom) noexcept; + + SkinnedEffect(SkinnedEffect const&) = delete; + SkinnedEffect& operator= (SkinnedEffect const&) = delete; + + ~SkinnedEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void XM_CALLCONV SetEmissiveColor(FXMVECTOR value); + void XM_CALLCONV SetSpecularColor(FXMVECTOR value); + void __cdecl SetSpecularPower(float value); + void __cdecl DisableSpecular(); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Light settings. + void __cdecl SetPerPixelLighting(bool value) override; + void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override; + + void __cdecl SetLightEnabled(int whichLight, bool value) override; + void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override; + + void __cdecl EnableDefaultLighting() override; + + // Fog settings. + void __cdecl SetFogEnabled(bool value) override; + void __cdecl SetFogStart(float value) override; + void __cdecl SetFogEnd(float value) override; + void XM_CALLCONV SetFogColor(FXMVECTOR value) override; + + // Texture setting. + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + + // Animation settings. + void __cdecl SetWeightsPerVertex(int value) override; + void __cdecl SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) override; + void __cdecl ResetBoneTransforms() override; + + // Normal compression settings. + void __cdecl SetBiasedVertexNormals(bool value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Unsupported interface method. + void __cdecl SetLightingEnabled(bool value) override; + }; + + //---------------------------------------------------------------------------------- + // Built-in effect for Visual Studio Shader Designer (DGSL) shaders + class DGSLEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectSkinning + { + public: + explicit DGSLEffect(_In_ ID3D11Device* device, _In_opt_ ID3D11PixelShader* pixelShader = nullptr, + _In_ bool enableSkinning = false); + DGSLEffect(DGSLEffect&& moveFrom) noexcept; + DGSLEffect& operator= (DGSLEffect&& moveFrom) noexcept; + + DGSLEffect(DGSLEffect const&) = delete; + DGSLEffect& operator= (DGSLEffect const&) = delete; + + ~DGSLEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetAmbientColor(FXMVECTOR value); + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void XM_CALLCONV SetEmissiveColor(FXMVECTOR value); + void XM_CALLCONV SetSpecularColor(FXMVECTOR value); + void __cdecl SetSpecularPower(float value); + void __cdecl DisableSpecular(); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Additional settings. + void XM_CALLCONV SetUVTransform(FXMMATRIX value); + void __cdecl SetViewport(float width, float height); + void __cdecl SetTime(float time); + void __cdecl SetAlphaDiscardEnable(bool value); + + // Light settings. + void __cdecl SetLightingEnabled(bool value) override; + void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override; + + void __cdecl SetLightEnabled(int whichLight, bool value) override; + void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override; + + void __cdecl EnableDefaultLighting() override; + + static constexpr int MaxDirectionalLights = 4; + + // Vertex color setting. + void __cdecl SetVertexColorEnabled(bool value); + + // Texture settings. + void __cdecl SetTextureEnabled(bool value); + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetTexture(int whichTexture, _In_opt_ ID3D11ShaderResourceView* value); + + static constexpr int MaxTextures = 8; + + // Animation setting. + void __cdecl SetWeightsPerVertex(int value) override; + void __cdecl SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) override; + void __cdecl ResetBoneTransforms() override; + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Unsupported interface methods. + void __cdecl SetPerPixelLighting(bool value) override; + }; + + //---------------------------------------------------------------------------------- + // Built-in shader extends BasicEffect with normal maps and optional specular maps + class NormalMapEffect : public IEffect, public IEffectMatrices, public IEffectLights, public IEffectFog + { + public: + explicit NormalMapEffect(_In_ ID3D11Device* device); + NormalMapEffect(NormalMapEffect&& moveFrom) noexcept; + NormalMapEffect& operator= (NormalMapEffect&& moveFrom) noexcept; + + NormalMapEffect(NormalMapEffect const&) = delete; + NormalMapEffect& operator= (NormalMapEffect const&) = delete; + + ~NormalMapEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Material settings. + void XM_CALLCONV SetDiffuseColor(FXMVECTOR value); + void XM_CALLCONV SetEmissiveColor(FXMVECTOR value); + void XM_CALLCONV SetSpecularColor(FXMVECTOR value); + void __cdecl SetSpecularPower(float value); + void __cdecl DisableSpecular(); + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetColorAndAlpha(FXMVECTOR value); + + // Light settings. + void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override; + + void __cdecl SetLightEnabled(int whichLight, bool value) override; + void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override; + + void __cdecl EnableDefaultLighting() override; + + // Fog settings. + void __cdecl SetFogEnabled(bool value) override; + void __cdecl SetFogStart(float value) override; + void __cdecl SetFogEnd(float value) override; + void XM_CALLCONV SetFogColor(FXMVECTOR value) override; + + // Vertex color setting. + void __cdecl SetVertexColorEnabled(bool value); + + // Texture setting - albedo, normal and specular intensity + void __cdecl SetTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetNormalTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetSpecularTexture(_In_opt_ ID3D11ShaderResourceView* value); + + // Normal compression settings. + void __cdecl SetBiasedVertexNormals(bool value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Unsupported interface methods. + void __cdecl SetLightingEnabled(bool value) override; + void __cdecl SetPerPixelLighting(bool value) override; + }; + + //---------------------------------------------------------------------------------- + // Built-in shader for Physically-Based Rendering (Roughness/Metalness) with Image-based lighting + class PBREffect : public IEffect, public IEffectMatrices, public IEffectLights + { + public: + explicit PBREffect(_In_ ID3D11Device* device); + PBREffect(PBREffect&& moveFrom) noexcept; + PBREffect& operator= (PBREffect&& moveFrom) noexcept; + + PBREffect(PBREffect const&) = delete; + PBREffect& operator= (PBREffect const&) = delete; + + ~PBREffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Light settings. + void __cdecl SetLightEnabled(int whichLight, bool value) override; + void XM_CALLCONV SetLightDirection(int whichLight, FXMVECTOR value) override; + void XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value) override; + + void __cdecl EnableDefaultLighting() override; + + // PBR Settings. + void __cdecl SetAlpha(float value); + void XM_CALLCONV SetConstantAlbedo(FXMVECTOR value); + void __cdecl SetConstantMetallic(float value); + void __cdecl SetConstantRoughness(float value); + + // Texture settings. + void __cdecl SetAlbedoTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetNormalTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetRMATexture(_In_opt_ ID3D11ShaderResourceView* value); + + void __cdecl SetEmissiveTexture(_In_opt_ ID3D11ShaderResourceView* value); + + void __cdecl SetSurfaceTextures( + _In_opt_ ID3D11ShaderResourceView* albedo, + _In_opt_ ID3D11ShaderResourceView* normal, + _In_opt_ ID3D11ShaderResourceView* roughnessMetallicAmbientOcclusion); + + void __cdecl SetIBLTextures( + _In_opt_ ID3D11ShaderResourceView* radiance, + int numRadianceMips, + _In_opt_ ID3D11ShaderResourceView* irradiance); + + // Normal compression settings. + void __cdecl SetBiasedVertexNormals(bool value); + + // Velocity buffer settings. + void __cdecl SetVelocityGeneration(bool value); + + // Render target size, required for velocity buffer output. + void __cdecl SetRenderTargetSizeInPixels(int width, int height); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + // Unsupported interface methods. + void __cdecl SetLightingEnabled(bool value) override; + void __cdecl SetPerPixelLighting(bool value) override; + void XM_CALLCONV SetAmbientLightColor(FXMVECTOR value) override; + void XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value) override; + }; + + //---------------------------------------------------------------------------------- + // Built-in shader for debug visualization of normals, tangents, etc. + class DebugEffect : public IEffect, public IEffectMatrices + { + public: + enum Mode + { + Mode_Default = 0, // Hemispherical ambient lighting + Mode_Normals, // RGB normals + Mode_Tangents, // RGB tangents + Mode_BiTangents, // RGB bi-tangents + }; + + explicit DebugEffect(_In_ ID3D11Device* device); + DebugEffect(DebugEffect&& moveFrom) noexcept; + DebugEffect& operator= (DebugEffect&& moveFrom) noexcept; + + DebugEffect(DebugEffect const&) = delete; + DebugEffect& operator= (DebugEffect const&) = delete; + + ~DebugEffect() override; + + // IEffect methods. + void __cdecl Apply(_In_ ID3D11DeviceContext* deviceContext) override; + + void __cdecl GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) override; + + // Camera settings. + void XM_CALLCONV SetWorld(FXMMATRIX value) override; + void XM_CALLCONV SetView(FXMMATRIX value) override; + void XM_CALLCONV SetProjection(FXMMATRIX value) override; + void XM_CALLCONV SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) override; + + // Debug Settings. + void __cdecl SetMode(Mode debugMode); + void XM_CALLCONV SetHemisphericalAmbientColor(FXMVECTOR upper, FXMVECTOR lower); + void __cdecl SetAlpha(float value); + + // Vertex color setting. + void __cdecl SetVertexColorEnabled(bool value); + + // Normal compression settings. + void __cdecl SetBiasedVertexNormals(bool value); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + //---------------------------------------------------------------------------------- + // Abstract interface to factory for sharing effects and texture resources + class IEffectFactory + { + public: + virtual ~IEffectFactory() = default; + + IEffectFactory(const IEffectFactory&) = delete; + IEffectFactory& operator=(const IEffectFactory&) = delete; + + IEffectFactory(IEffectFactory&&) = delete; + IEffectFactory& operator=(IEffectFactory&&) = delete; + + struct EffectInfo + { + const wchar_t* name; + bool perVertexColor; + bool enableSkinning; + bool enableDualTexture; + bool enableNormalMaps; + bool biasedVertexNormals; + float specularPower; + float alpha; + XMFLOAT3 ambientColor; + XMFLOAT3 diffuseColor; + XMFLOAT3 specularColor; + XMFLOAT3 emissiveColor; + const wchar_t* diffuseTexture; + const wchar_t* specularTexture; + const wchar_t* normalTexture; + const wchar_t* emissiveTexture; + + EffectInfo() noexcept : + name(nullptr), + perVertexColor(false), + enableSkinning(false), + enableDualTexture(false), + enableNormalMaps(false), + biasedVertexNormals(false), + specularPower(0), + alpha(0), + ambientColor(0, 0, 0), + diffuseColor(0, 0, 0), + specularColor(0, 0, 0), + emissiveColor(0, 0, 0), + diffuseTexture(nullptr), + specularTexture(nullptr), + normalTexture(nullptr), + emissiveTexture(nullptr) + {} + }; + + virtual std::shared_ptr __cdecl CreateEffect(_In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext) = 0; + + virtual void __cdecl CreateTexture(_In_z_ const wchar_t* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView) = 0; + + protected: + IEffectFactory() = default; + }; + + + // Factory for sharing effects and texture resources + class EffectFactory : public IEffectFactory + { + public: + explicit EffectFactory(_In_ ID3D11Device* device); + EffectFactory(EffectFactory&& moveFrom) noexcept; + EffectFactory& operator= (EffectFactory&& moveFrom) noexcept; + + EffectFactory(EffectFactory const&) = delete; + EffectFactory& operator= (EffectFactory const&) = delete; + + ~EffectFactory() override; + + // IEffectFactory methods. + std::shared_ptr __cdecl CreateEffect(_In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext) override; + void __cdecl CreateTexture(_In_z_ const wchar_t* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView) override; + + // Settings. + void __cdecl ReleaseCache(); + + void __cdecl SetSharing(bool enabled) noexcept; + + void __cdecl EnableNormalMapEffect(bool enabled) noexcept; + void __cdecl EnableForceSRGB(bool forceSRGB) noexcept; + + void __cdecl SetDirectory(_In_opt_z_ const wchar_t* path) noexcept; + + // Properties. + ID3D11Device* GetDevice() const noexcept; + + private: + // Private implementation. + class Impl; + + std::shared_ptr pImpl; + }; + + + // Factory for Physically Based Rendering (PBR) + class PBREffectFactory : public IEffectFactory + { + public: + explicit PBREffectFactory(_In_ ID3D11Device* device); + PBREffectFactory(PBREffectFactory&& moveFrom) noexcept; + PBREffectFactory& operator= (PBREffectFactory&& moveFrom) noexcept; + + PBREffectFactory(PBREffectFactory const&) = delete; + PBREffectFactory& operator= (PBREffectFactory const&) = delete; + + ~PBREffectFactory() override; + + // IEffectFactory methods. + std::shared_ptr __cdecl CreateEffect(_In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext) override; + void __cdecl CreateTexture(_In_z_ const wchar_t* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView) override; + + // Settings. + void __cdecl ReleaseCache(); + + void __cdecl SetSharing(bool enabled) noexcept; + + void __cdecl EnableForceSRGB(bool forceSRGB) noexcept; + + void __cdecl SetDirectory(_In_opt_z_ const wchar_t* path) noexcept; + + // Properties. + ID3D11Device* GetDevice() const noexcept; + + private: + // Private implementation. + class Impl; + + std::shared_ptr pImpl; + }; + + + // Factory for sharing Visual Studio Shader Designer (DGSL) shaders and texture resources + class DGSLEffectFactory : public IEffectFactory + { + public: + explicit DGSLEffectFactory(_In_ ID3D11Device* device); + DGSLEffectFactory(DGSLEffectFactory&& moveFrom) noexcept; + DGSLEffectFactory& operator= (DGSLEffectFactory&& moveFrom) noexcept; + + DGSLEffectFactory(DGSLEffectFactory const&) = delete; + DGSLEffectFactory& operator= (DGSLEffectFactory const&) = delete; + + ~DGSLEffectFactory() override; + + // IEffectFactory methods. + std::shared_ptr __cdecl CreateEffect(_In_ const EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext) override; + void __cdecl CreateTexture(_In_z_ const wchar_t* name, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView) override; + + // DGSL methods. + struct DGSLEffectInfo : public EffectInfo + { + static constexpr int BaseTextureOffset = 4; + + const wchar_t* textures[DGSLEffect::MaxTextures - BaseTextureOffset]; + const wchar_t* pixelShader; + + DGSLEffectInfo() noexcept : + EffectInfo(), + textures{}, + pixelShader(nullptr) + {} + }; + + virtual std::shared_ptr __cdecl CreateDGSLEffect(_In_ const DGSLEffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext); + + virtual void __cdecl CreatePixelShader(_In_z_ const wchar_t* shader, _Outptr_ ID3D11PixelShader** pixelShader); + + // Settings. + void __cdecl ReleaseCache(); + + void __cdecl SetSharing(bool enabled) noexcept; + + void __cdecl EnableForceSRGB(bool forceSRGB) noexcept; + + void __cdecl SetDirectory(_In_opt_z_ const wchar_t* path) noexcept; + + // Properties. + ID3D11Device* GetDevice() const noexcept; + + private: + // Private implementation. + class Impl; + + std::shared_ptr pImpl; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/GamePad.h b/Sdk/External/DirectXTK/Inc/GamePad.h new file mode 100644 index 0000000..c9e81ad --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/GamePad.h @@ -0,0 +1,303 @@ +//-------------------------------------------------------------------------------------- +// File: GamePad.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if (_WIN32_WINNT < 0x0A00 /*_WIN32_WINNT_WIN10*/) || defined(_GAMING_DESKTOP) +#ifndef _XBOX_ONE +#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY != WINAPI_FAMILY_PHONE_APP) +#if (_WIN32_WINNT >= 0x0602 /*_WIN32_WINNT_WIN8*/ ) +#pragma comment(lib,"xinput.lib") +#else +#pragma comment(lib,"xinput9_1_0.lib") +#endif +#endif +#endif +#endif + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) +interface IGameInputDevice; +#endif + +#include +#include + +#if (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/) && !defined(_GAMING_DESKTOP) +#pragma comment(lib,"runtimeobject.lib") +#include +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-pragmas" +#endif + + +namespace DirectX +{ + class GamePad + { + public: + GamePad() noexcept(false); + GamePad(GamePad&& moveFrom) noexcept; + GamePad& operator= (GamePad&& moveFrom) noexcept; + + GamePad(GamePad const&) = delete; + GamePad& operator=(GamePad const&) = delete; + + virtual ~GamePad(); + + #if ((_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/) && !defined(_GAMING_DESKTOP)) || defined(_XBOX_ONE) + static constexpr int MAX_PLAYER_COUNT = 8; + #else + static constexpr int MAX_PLAYER_COUNT = 4; + #endif + + static constexpr int c_MostRecent = -1; + + #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + static constexpr int c_MergedInput = -2; + #endif + + enum DeadZone + { + DEAD_ZONE_INDEPENDENT_AXES = 0, + DEAD_ZONE_CIRCULAR, + DEAD_ZONE_NONE, + }; + + struct Buttons + { + bool a; + bool b; + bool x; + bool y; + bool leftStick; + bool rightStick; + bool leftShoulder; + bool rightShoulder; + union + { + bool back; + bool view; + }; + union + { + bool start; + bool menu; + }; + }; + + struct DPad + { + bool up; + bool down; + bool right; + bool left; + }; + + struct ThumbSticks + { + float leftX; + float leftY; + float rightX; + float rightY; + }; + + struct Triggers + { + float left; + float right; + }; + + struct State + { + bool connected; + uint64_t packet; + Buttons buttons; + DPad dpad; + ThumbSticks thumbSticks; + Triggers triggers; + + bool __cdecl IsConnected() const noexcept { return connected; } + + // Is the button pressed currently? + bool __cdecl IsAPressed() const noexcept { return buttons.a; } + bool __cdecl IsBPressed() const noexcept { return buttons.b; } + bool __cdecl IsXPressed() const noexcept { return buttons.x; } + bool __cdecl IsYPressed() const noexcept { return buttons.y; } + + bool __cdecl IsLeftStickPressed() const noexcept { return buttons.leftStick; } + bool __cdecl IsRightStickPressed() const noexcept { return buttons.rightStick; } + + bool __cdecl IsLeftShoulderPressed() const noexcept { return buttons.leftShoulder; } + bool __cdecl IsRightShoulderPressed() const noexcept { return buttons.rightShoulder; } + + bool __cdecl IsBackPressed() const noexcept { return buttons.back; } + bool __cdecl IsViewPressed() const noexcept { return buttons.view; } + bool __cdecl IsStartPressed() const noexcept { return buttons.start; } + bool __cdecl IsMenuPressed() const noexcept { return buttons.menu; } + + bool __cdecl IsDPadDownPressed() const noexcept { return dpad.down; } + bool __cdecl IsDPadUpPressed() const noexcept { return dpad.up; } + bool __cdecl IsDPadLeftPressed() const noexcept { return dpad.left; } + bool __cdecl IsDPadRightPressed() const noexcept { return dpad.right; } + + bool __cdecl IsLeftThumbStickUp() const noexcept { return (thumbSticks.leftY > 0.5f) != 0; } + bool __cdecl IsLeftThumbStickDown() const noexcept { return (thumbSticks.leftY < -0.5f) != 0; } + bool __cdecl IsLeftThumbStickLeft() const noexcept { return (thumbSticks.leftX < -0.5f) != 0; } + bool __cdecl IsLeftThumbStickRight() const noexcept { return (thumbSticks.leftX > 0.5f) != 0; } + + bool __cdecl IsRightThumbStickUp() const noexcept { return (thumbSticks.rightY > 0.5f) != 0; } + bool __cdecl IsRightThumbStickDown() const noexcept { return (thumbSticks.rightY < -0.5f) != 0; } + bool __cdecl IsRightThumbStickLeft() const noexcept { return (thumbSticks.rightX < -0.5f) != 0; } + bool __cdecl IsRightThumbStickRight() const noexcept { return (thumbSticks.rightX > 0.5f) != 0; } + + bool __cdecl IsLeftTriggerPressed() const noexcept { return (triggers.left > 0.5f) != 0; } + bool __cdecl IsRightTriggerPressed() const noexcept { return (triggers.right > 0.5f) != 0; } + }; + + struct Capabilities + { + enum Type + { + UNKNOWN = 0, + GAMEPAD, + WHEEL, + ARCADE_STICK, + FLIGHT_STICK, + DANCE_PAD, + GUITAR, + GUITAR_ALTERNATE, + DRUM_KIT, + GUITAR_BASS = 11, + ARCADE_PAD = 19, + }; + + bool connected; + Type gamepadType; + #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + APP_LOCAL_DEVICE_ID id; + #elif (_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/) && !defined(_GAMING_DESKTOP) + std::wstring id; + #else + uint64_t id; + #endif + uint16_t vid; + uint16_t pid; + + Capabilities() noexcept : connected(false), gamepadType(UNKNOWN), id{}, vid(0), pid(0) {} + + bool __cdecl IsConnected() const noexcept { return connected; } + }; + + class ButtonStateTracker + { + public: + enum ButtonState + { + UP = 0, // Button is up + HELD = 1, // Button is held down + RELEASED = 2, // Button was just released + PRESSED = 3, // Buton was just pressed + }; + + ButtonState a; + ButtonState b; + ButtonState x; + ButtonState y; + + ButtonState leftStick; + ButtonState rightStick; + + ButtonState leftShoulder; + ButtonState rightShoulder; + + union + { + ButtonState back; + ButtonState view; + }; + + union + { + ButtonState start; + ButtonState menu; + }; + + ButtonState dpadUp; + ButtonState dpadDown; + ButtonState dpadLeft; + ButtonState dpadRight; + + ButtonState leftStickUp; + ButtonState leftStickDown; + ButtonState leftStickLeft; + ButtonState leftStickRight; + + ButtonState rightStickUp; + ButtonState rightStickDown; + ButtonState rightStickLeft; + ButtonState rightStickRight; + + ButtonState leftTrigger; + ButtonState rightTrigger; + + #pragma prefast(suppress: 26495, "Reset() performs the initialization") + ButtonStateTracker() noexcept { Reset(); } + + void __cdecl Update(const State& state) noexcept; + + void __cdecl Reset() noexcept; + + State __cdecl GetLastState() const noexcept { return lastState; } + + private: + State lastState; + }; + + // Retrieve the current state of the gamepad of the associated player index + State __cdecl GetState(int player, DeadZone deadZoneMode = DEAD_ZONE_INDEPENDENT_AXES); + + // Retrieve the current capabilities of the gamepad of the associated player index + Capabilities __cdecl GetCapabilities(int player); + + // Set the vibration motor speeds of the gamepad + bool __cdecl SetVibration(int player, float leftMotor, float rightMotor, float leftTrigger = 0.f, float rightTrigger = 0.f) noexcept; + + // Handle suspending/resuming + void __cdecl Suspend() noexcept; + void __cdecl Resume() noexcept; + + #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + void __cdecl RegisterEvents(void* ctrlChanged) noexcept; + #elif ((_WIN32_WINNT >= 0x0A00 /*_WIN32_WINNT_WIN10*/ ) && !defined(_GAMING_DESKTOP)) || defined(_XBOX_ONE) + void __cdecl RegisterEvents(void* ctrlChanged, void* userChanged) noexcept; + #endif + + #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + // Underlying device access + void __cdecl GetDevice(int player, _Outptr_ IGameInputDevice** device) noexcept; + #endif + + // Singleton + static GamePad& __cdecl Get(); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/Sdk/External/DirectXTK/Inc/GeometricPrimitive.h b/Sdk/External/DirectXTK/Inc/GeometricPrimitive.h new file mode 100644 index 0000000..55f4f87 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/GeometricPrimitive.h @@ -0,0 +1,89 @@ +//-------------------------------------------------------------------------------------- +// File: GeometricPrimitive.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include "VertexTypes.h" + +#include +#include +#include +#include + + +namespace DirectX +{ + class IEffect; + + class GeometricPrimitive + { + public: + GeometricPrimitive(GeometricPrimitive&&) = default; + GeometricPrimitive& operator= (GeometricPrimitive&&) = default; + + GeometricPrimitive(GeometricPrimitive const&) = delete; + GeometricPrimitive& operator= (GeometricPrimitive const&) = delete; + + using VertexType = VertexPositionNormalTexture; + + virtual ~GeometricPrimitive(); + + // Factory methods. + static std::unique_ptr __cdecl CreateCube(_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true); + static std::unique_ptr __cdecl CreateBox(_In_ ID3D11DeviceContext* deviceContext, const XMFLOAT3& size, bool rhcoords = true, bool invertn = false); + static std::unique_ptr __cdecl CreateSphere(_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, size_t tessellation = 16, bool rhcoords = true, bool invertn = false); + static std::unique_ptr __cdecl CreateGeoSphere(_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, size_t tessellation = 3, bool rhcoords = true); + static std::unique_ptr __cdecl CreateCylinder(_In_ ID3D11DeviceContext* deviceContext, float height = 1, float diameter = 1, size_t tessellation = 32, bool rhcoords = true); + static std::unique_ptr __cdecl CreateCone(_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, float height = 1, size_t tessellation = 32, bool rhcoords = true); + static std::unique_ptr __cdecl CreateTorus(_In_ ID3D11DeviceContext* deviceContext, float diameter = 1, float thickness = 0.333f, size_t tessellation = 32, bool rhcoords = true); + static std::unique_ptr __cdecl CreateTetrahedron(_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true); + static std::unique_ptr __cdecl CreateOctahedron(_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true); + static std::unique_ptr __cdecl CreateDodecahedron(_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true); + static std::unique_ptr __cdecl CreateIcosahedron(_In_ ID3D11DeviceContext* deviceContext, float size = 1, bool rhcoords = true); + static std::unique_ptr __cdecl CreateTeapot(_In_ ID3D11DeviceContext* deviceContext, float size = 1, size_t tessellation = 8, bool rhcoords = true); + static std::unique_ptr __cdecl CreateCustom(_In_ ID3D11DeviceContext* deviceContext, const std::vector& vertices, const std::vector& indices); + + static void __cdecl CreateCube(std::vector& vertices, std::vector& indices, float size = 1, bool rhcoords = true); + static void __cdecl CreateBox(std::vector& vertices, std::vector& indices, const XMFLOAT3& size, bool rhcoords = true, bool invertn = false); + static void __cdecl CreateSphere(std::vector& vertices, std::vector& indices, float diameter = 1, size_t tessellation = 16, bool rhcoords = true, bool invertn = false); + static void __cdecl CreateGeoSphere(std::vector& vertices, std::vector& indices, float diameter = 1, size_t tessellation = 3, bool rhcoords = true); + static void __cdecl CreateCylinder(std::vector& vertices, std::vector& indices, float height = 1, float diameter = 1, size_t tessellation = 32, bool rhcoords = true); + static void __cdecl CreateCone(std::vector& vertices, std::vector& indices, float diameter = 1, float height = 1, size_t tessellation = 32, bool rhcoords = true); + static void __cdecl CreateTorus(std::vector& vertices, std::vector& indices, float diameter = 1, float thickness = 0.333f, size_t tessellation = 32, bool rhcoords = true); + static void __cdecl CreateTetrahedron(std::vector& vertices, std::vector& indices, float size = 1, bool rhcoords = true); + static void __cdecl CreateOctahedron(std::vector& vertices, std::vector& indices, float size = 1, bool rhcoords = true); + static void __cdecl CreateDodecahedron(std::vector& vertices, std::vector& indices, float size = 1, bool rhcoords = true); + static void __cdecl CreateIcosahedron(std::vector& vertices, std::vector& indices, float size = 1, bool rhcoords = true); + static void __cdecl CreateTeapot(std::vector& vertices, std::vector& indices, float size = 1, size_t tessellation = 8, bool rhcoords = true); + + // Draw the primitive. + void XM_CALLCONV Draw(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection, + FXMVECTOR color = Colors::White, + _In_opt_ ID3D11ShaderResourceView* texture = nullptr, + bool wireframe = false, + _In_opt_ std::function setCustomState = nullptr) const; + + // Draw the primitive using a custom effect. + void __cdecl Draw(_In_ IEffect* effect, + _In_ ID3D11InputLayout* inputLayout, + bool alpha = false, bool wireframe = false, + _In_opt_ std::function setCustomState = nullptr) const; + + // Create input layout for drawing with a custom effect. + void __cdecl CreateInputLayout(_In_ IEffect* effect, _Outptr_ ID3D11InputLayout** inputLayout) const; + + private: + GeometricPrimitive() noexcept(false); + + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/GraphicsMemory.h b/Sdk/External/DirectXTK/Inc/GraphicsMemory.h new file mode 100644 index 0000000..d7885c8 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/GraphicsMemory.h @@ -0,0 +1,52 @@ +//-------------------------------------------------------------------------------------- +// File: GraphicsMemory.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include + + +namespace DirectX +{ + class GraphicsMemory + { + public: + #if defined(_XBOX_ONE) && defined(_TITLE) + GraphicsMemory(_In_ ID3D11DeviceX* device, unsigned int backBufferCount = 2); + #else + GraphicsMemory(_In_ ID3D11Device* device, unsigned int backBufferCount = 2); + #endif + GraphicsMemory(GraphicsMemory&& moveFrom) noexcept; + GraphicsMemory& operator= (GraphicsMemory&& moveFrom) noexcept; + + GraphicsMemory(GraphicsMemory const&) = delete; + GraphicsMemory& operator=(GraphicsMemory const&) = delete; + + virtual ~GraphicsMemory(); + + void* __cdecl Allocate(_In_opt_ ID3D11DeviceContext* context, size_t size, int alignment); + + void __cdecl Commit(); + + // Singleton + static GraphicsMemory& __cdecl Get(); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/Keyboard.h b/Sdk/External/DirectXTK/Inc/Keyboard.h new file mode 100644 index 0000000..044360c --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/Keyboard.h @@ -0,0 +1,496 @@ +//-------------------------------------------------------------------------------------- +// File: Keyboard.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include + +#if (defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)) || (defined(_XBOX_ONE) && defined(_TITLE)) +namespace ABI { namespace Windows { namespace UI { namespace Core { struct ICoreWindow; } } } } +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-pragmas" +#endif + + +namespace DirectX +{ + class Keyboard + { + public: + Keyboard() noexcept(false); + Keyboard(Keyboard&& moveFrom) noexcept; + Keyboard& operator= (Keyboard&& moveFrom) noexcept; + + Keyboard(Keyboard const&) = delete; + Keyboard& operator=(Keyboard const&) = delete; + + virtual ~Keyboard(); + + enum Keys : unsigned char + { + None = 0, + + Back = 0x8, + Tab = 0x9, + + Enter = 0xd, + + Pause = 0x13, + CapsLock = 0x14, + Kana = 0x15, + + Kanji = 0x19, + + Escape = 0x1b, + ImeConvert = 0x1c, + ImeNoConvert = 0x1d, + + Space = 0x20, + PageUp = 0x21, + PageDown = 0x22, + End = 0x23, + Home = 0x24, + Left = 0x25, + Up = 0x26, + Right = 0x27, + Down = 0x28, + Select = 0x29, + Print = 0x2a, + Execute = 0x2b, + PrintScreen = 0x2c, + Insert = 0x2d, + Delete = 0x2e, + Help = 0x2f, + D0 = 0x30, + D1 = 0x31, + D2 = 0x32, + D3 = 0x33, + D4 = 0x34, + D5 = 0x35, + D6 = 0x36, + D7 = 0x37, + D8 = 0x38, + D9 = 0x39, + + A = 0x41, + B = 0x42, + C = 0x43, + D = 0x44, + E = 0x45, + F = 0x46, + G = 0x47, + H = 0x48, + I = 0x49, + J = 0x4a, + K = 0x4b, + L = 0x4c, + M = 0x4d, + N = 0x4e, + O = 0x4f, + P = 0x50, + Q = 0x51, + R = 0x52, + S = 0x53, + T = 0x54, + U = 0x55, + V = 0x56, + W = 0x57, + X = 0x58, + Y = 0x59, + Z = 0x5a, + LeftWindows = 0x5b, + RightWindows = 0x5c, + Apps = 0x5d, + + Sleep = 0x5f, + NumPad0 = 0x60, + NumPad1 = 0x61, + NumPad2 = 0x62, + NumPad3 = 0x63, + NumPad4 = 0x64, + NumPad5 = 0x65, + NumPad6 = 0x66, + NumPad7 = 0x67, + NumPad8 = 0x68, + NumPad9 = 0x69, + Multiply = 0x6a, + Add = 0x6b, + Separator = 0x6c, + Subtract = 0x6d, + + Decimal = 0x6e, + Divide = 0x6f, + F1 = 0x70, + F2 = 0x71, + F3 = 0x72, + F4 = 0x73, + F5 = 0x74, + F6 = 0x75, + F7 = 0x76, + F8 = 0x77, + F9 = 0x78, + F10 = 0x79, + F11 = 0x7a, + F12 = 0x7b, + F13 = 0x7c, + F14 = 0x7d, + F15 = 0x7e, + F16 = 0x7f, + F17 = 0x80, + F18 = 0x81, + F19 = 0x82, + F20 = 0x83, + F21 = 0x84, + F22 = 0x85, + F23 = 0x86, + F24 = 0x87, + + NumLock = 0x90, + Scroll = 0x91, + + LeftShift = 0xa0, + RightShift = 0xa1, + LeftControl = 0xa2, + RightControl = 0xa3, + LeftAlt = 0xa4, + RightAlt = 0xa5, + BrowserBack = 0xa6, + BrowserForward = 0xa7, + BrowserRefresh = 0xa8, + BrowserStop = 0xa9, + BrowserSearch = 0xaa, + BrowserFavorites = 0xab, + BrowserHome = 0xac, + VolumeMute = 0xad, + VolumeDown = 0xae, + VolumeUp = 0xaf, + MediaNextTrack = 0xb0, + MediaPreviousTrack = 0xb1, + MediaStop = 0xb2, + MediaPlayPause = 0xb3, + LaunchMail = 0xb4, + SelectMedia = 0xb5, + LaunchApplication1 = 0xb6, + LaunchApplication2 = 0xb7, + + OemSemicolon = 0xba, + OemPlus = 0xbb, + OemComma = 0xbc, + OemMinus = 0xbd, + OemPeriod = 0xbe, + OemQuestion = 0xbf, + OemTilde = 0xc0, + + OemOpenBrackets = 0xdb, + OemPipe = 0xdc, + OemCloseBrackets = 0xdd, + OemQuotes = 0xde, + Oem8 = 0xdf, + + OemBackslash = 0xe2, + + ProcessKey = 0xe5, + + OemCopy = 0xf2, + OemAuto = 0xf3, + OemEnlW = 0xf4, + + Attn = 0xf6, + Crsel = 0xf7, + Exsel = 0xf8, + EraseEof = 0xf9, + Play = 0xfa, + Zoom = 0xfb, + + Pa1 = 0xfd, + OemClear = 0xfe, + }; + + struct State + { + bool Reserved0 : 8; + bool Back : 1; // VK_BACK, 0x8 + bool Tab : 1; // VK_TAB, 0x9 + bool Reserved1 : 3; + bool Enter : 1; // VK_RETURN, 0xD + bool Reserved2 : 2; + bool Reserved3 : 3; + bool Pause : 1; // VK_PAUSE, 0x13 + bool CapsLock : 1; // VK_CAPITAL, 0x14 + bool Kana : 1; // VK_KANA, 0x15 + bool Reserved4 : 2; + bool Reserved5 : 1; + bool Kanji : 1; // VK_KANJI, 0x19 + bool Reserved6 : 1; + bool Escape : 1; // VK_ESCAPE, 0x1B + bool ImeConvert : 1; // VK_CONVERT, 0x1C + bool ImeNoConvert : 1; // VK_NONCONVERT, 0x1D + bool Reserved7 : 2; + bool Space : 1; // VK_SPACE, 0x20 + bool PageUp : 1; // VK_PRIOR, 0x21 + bool PageDown : 1; // VK_NEXT, 0x22 + bool End : 1; // VK_END, 0x23 + bool Home : 1; // VK_HOME, 0x24 + bool Left : 1; // VK_LEFT, 0x25 + bool Up : 1; // VK_UP, 0x26 + bool Right : 1; // VK_RIGHT, 0x27 + bool Down : 1; // VK_DOWN, 0x28 + bool Select : 1; // VK_SELECT, 0x29 + bool Print : 1; // VK_PRINT, 0x2A + bool Execute : 1; // VK_EXECUTE, 0x2B + bool PrintScreen : 1; // VK_SNAPSHOT, 0x2C + bool Insert : 1; // VK_INSERT, 0x2D + bool Delete : 1; // VK_DELETE, 0x2E + bool Help : 1; // VK_HELP, 0x2F + bool D0 : 1; // 0x30 + bool D1 : 1; // 0x31 + bool D2 : 1; // 0x32 + bool D3 : 1; // 0x33 + bool D4 : 1; // 0x34 + bool D5 : 1; // 0x35 + bool D6 : 1; // 0x36 + bool D7 : 1; // 0x37 + bool D8 : 1; // 0x38 + bool D9 : 1; // 0x39 + bool Reserved8 : 6; + bool Reserved9 : 1; + bool A : 1; // 0x41 + bool B : 1; // 0x42 + bool C : 1; // 0x43 + bool D : 1; // 0x44 + bool E : 1; // 0x45 + bool F : 1; // 0x46 + bool G : 1; // 0x47 + bool H : 1; // 0x48 + bool I : 1; // 0x49 + bool J : 1; // 0x4A + bool K : 1; // 0x4B + bool L : 1; // 0x4C + bool M : 1; // 0x4D + bool N : 1; // 0x4E + bool O : 1; // 0x4F + bool P : 1; // 0x50 + bool Q : 1; // 0x51 + bool R : 1; // 0x52 + bool S : 1; // 0x53 + bool T : 1; // 0x54 + bool U : 1; // 0x55 + bool V : 1; // 0x56 + bool W : 1; // 0x57 + bool X : 1; // 0x58 + bool Y : 1; // 0x59 + bool Z : 1; // 0x5A + bool LeftWindows : 1; // VK_LWIN, 0x5B + bool RightWindows : 1; // VK_RWIN, 0x5C + bool Apps : 1; // VK_APPS, 0x5D + bool Reserved10 : 1; + bool Sleep : 1; // VK_SLEEP, 0x5F + bool NumPad0 : 1; // VK_NUMPAD0, 0x60 + bool NumPad1 : 1; // VK_NUMPAD1, 0x61 + bool NumPad2 : 1; // VK_NUMPAD2, 0x62 + bool NumPad3 : 1; // VK_NUMPAD3, 0x63 + bool NumPad4 : 1; // VK_NUMPAD4, 0x64 + bool NumPad5 : 1; // VK_NUMPAD5, 0x65 + bool NumPad6 : 1; // VK_NUMPAD6, 0x66 + bool NumPad7 : 1; // VK_NUMPAD7, 0x67 + bool NumPad8 : 1; // VK_NUMPAD8, 0x68 + bool NumPad9 : 1; // VK_NUMPAD9, 0x69 + bool Multiply : 1; // VK_MULTIPLY, 0x6A + bool Add : 1; // VK_ADD, 0x6B + bool Separator : 1; // VK_SEPARATOR, 0x6C + bool Subtract : 1; // VK_SUBTRACT, 0x6D + bool Decimal : 1; // VK_DECIMANL, 0x6E + bool Divide : 1; // VK_DIVIDE, 0x6F + bool F1 : 1; // VK_F1, 0x70 + bool F2 : 1; // VK_F2, 0x71 + bool F3 : 1; // VK_F3, 0x72 + bool F4 : 1; // VK_F4, 0x73 + bool F5 : 1; // VK_F5, 0x74 + bool F6 : 1; // VK_F6, 0x75 + bool F7 : 1; // VK_F7, 0x76 + bool F8 : 1; // VK_F8, 0x77 + bool F9 : 1; // VK_F9, 0x78 + bool F10 : 1; // VK_F10, 0x79 + bool F11 : 1; // VK_F11, 0x7A + bool F12 : 1; // VK_F12, 0x7B + bool F13 : 1; // VK_F13, 0x7C + bool F14 : 1; // VK_F14, 0x7D + bool F15 : 1; // VK_F15, 0x7E + bool F16 : 1; // VK_F16, 0x7F + bool F17 : 1; // VK_F17, 0x80 + bool F18 : 1; // VK_F18, 0x81 + bool F19 : 1; // VK_F19, 0x82 + bool F20 : 1; // VK_F20, 0x83 + bool F21 : 1; // VK_F21, 0x84 + bool F22 : 1; // VK_F22, 0x85 + bool F23 : 1; // VK_F23, 0x86 + bool F24 : 1; // VK_F24, 0x87 + bool Reserved11 : 8; + bool NumLock : 1; // VK_NUMLOCK, 0x90 + bool Scroll : 1; // VK_SCROLL, 0x91 + bool Reserved12 : 6; + bool Reserved13 : 8; + bool LeftShift : 1; // VK_LSHIFT, 0xA0 + bool RightShift : 1; // VK_RSHIFT, 0xA1 + bool LeftControl : 1; // VK_LCONTROL, 0xA2 + bool RightControl : 1; // VK_RCONTROL, 0xA3 + bool LeftAlt : 1; // VK_LMENU, 0xA4 + bool RightAlt : 1; // VK_RMENU, 0xA5 + bool BrowserBack : 1; // VK_BROWSER_BACK, 0xA6 + bool BrowserForward : 1; // VK_BROWSER_FORWARD, 0xA7 + bool BrowserRefresh : 1; // VK_BROWSER_REFRESH, 0xA8 + bool BrowserStop : 1; // VK_BROWSER_STOP, 0xA9 + bool BrowserSearch : 1; // VK_BROWSER_SEARCH, 0xAA + bool BrowserFavorites : 1; // VK_BROWSER_FAVORITES, 0xAB + bool BrowserHome : 1; // VK_BROWSER_HOME, 0xAC + bool VolumeMute : 1; // VK_VOLUME_MUTE, 0xAD + bool VolumeDown : 1; // VK_VOLUME_DOWN, 0xAE + bool VolumeUp : 1; // VK_VOLUME_UP, 0xAF + bool MediaNextTrack : 1; // VK_MEDIA_NEXT_TRACK, 0xB0 + bool MediaPreviousTrack : 1;// VK_MEDIA_PREV_TRACK, 0xB1 + bool MediaStop : 1; // VK_MEDIA_STOP, 0xB2 + bool MediaPlayPause : 1; // VK_MEDIA_PLAY_PAUSE, 0xB3 + bool LaunchMail : 1; // VK_LAUNCH_MAIL, 0xB4 + bool SelectMedia : 1; // VK_LAUNCH_MEDIA_SELECT, 0xB5 + bool LaunchApplication1 : 1;// VK_LAUNCH_APP1, 0xB6 + bool LaunchApplication2 : 1;// VK_LAUNCH_APP2, 0xB7 + bool Reserved14 : 2; + bool OemSemicolon : 1; // VK_OEM_1, 0xBA + bool OemPlus : 1; // VK_OEM_PLUS, 0xBB + bool OemComma : 1; // VK_OEM_COMMA, 0xBC + bool OemMinus : 1; // VK_OEM_MINUS, 0xBD + bool OemPeriod : 1; // VK_OEM_PERIOD, 0xBE + bool OemQuestion : 1; // VK_OEM_2, 0xBF + bool OemTilde : 1; // VK_OEM_3, 0xC0 + bool Reserved15 : 7; + bool Reserved16 : 8; + bool Reserved17 : 8; + bool Reserved18 : 3; + bool OemOpenBrackets : 1; // VK_OEM_4, 0xDB + bool OemPipe : 1; // VK_OEM_5, 0xDC + bool OemCloseBrackets : 1; // VK_OEM_6, 0xDD + bool OemQuotes : 1; // VK_OEM_7, 0xDE + bool Oem8 : 1; // VK_OEM_8, 0xDF + bool Reserved19 : 2; + bool OemBackslash : 1; // VK_OEM_102, 0xE2 + bool Reserved20 : 2; + bool ProcessKey : 1; // VK_PROCESSKEY, 0xE5 + bool Reserved21 : 2; + bool Reserved22 : 8; + bool Reserved23 : 2; + bool OemCopy : 1; // 0XF2 + bool OemAuto : 1; // 0xF3 + bool OemEnlW : 1; // 0xF4 + bool Reserved24 : 1; + bool Attn : 1; // VK_ATTN, 0xF6 + bool Crsel : 1; // VK_CRSEL, 0xF7 + bool Exsel : 1; // VK_EXSEL, 0xF8 + bool EraseEof : 1; // VK_EREOF, 0xF9 + bool Play : 1; // VK_PLAY, 0xFA + bool Zoom : 1; // VK_ZOOM, 0xFB + bool Reserved25 : 1; + bool Pa1 : 1; // VK_PA1, 0xFD + bool OemClear : 1; // VK_OEM_CLEAR, 0xFE + bool Reserved26: 1; + + bool __cdecl IsKeyDown(Keys key) const noexcept + { + if (key <= 0xfe) + { + auto ptr = reinterpret_cast(this); + unsigned int bf = 1u << (key & 0x1f); + return (ptr[(key >> 5)] & bf) != 0; + } + return false; + } + + bool __cdecl IsKeyUp(Keys key) const noexcept + { + if (key <= 0xfe) + { + auto ptr = reinterpret_cast(this); + unsigned int bf = 1u << (key & 0x1f); + return (ptr[(key >> 5)] & bf) == 0; + } + return false; + } + }; + + class KeyboardStateTracker + { + public: + State released; + State pressed; + + #pragma prefast(suppress: 26495, "Reset() performs the initialization") + KeyboardStateTracker() noexcept { Reset(); } + + void __cdecl Update(const State& state) noexcept; + + void __cdecl Reset() noexcept; + + bool __cdecl IsKeyPressed(Keys key) const noexcept { return pressed.IsKeyDown(key); } + bool __cdecl IsKeyReleased(Keys key) const noexcept { return released.IsKeyDown(key); } + + State __cdecl GetLastState() const noexcept { return lastState; } + + public: + State lastState; + }; + + // Retrieve the current state of the keyboard + State __cdecl GetState() const; + + // Reset the keyboard state + void __cdecl Reset() noexcept; + + // Feature detection + bool __cdecl IsConnected() const; + + #if (!defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)) && defined(WM_USER) + static void __cdecl ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam); + #endif + + #if (defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)) || (defined(_XBOX_ONE) && defined(_TITLE)) + void __cdecl SetWindow(ABI::Windows::UI::Core::ICoreWindow* window); + #ifdef __cplusplus_winrt + void __cdecl SetWindow(Windows::UI::Core::CoreWindow^ window) + { + // See https://msdn.microsoft.com/en-us/library/hh755802.aspx + SetWindow(reinterpret_cast(window)); + } + #endif + #ifdef CPPWINRT_VERSION + void __cdecl SetWindow(winrt::Windows::UI::Core::CoreWindow window) + { + // See https://docs.microsoft.com/en-us/windows/uwp/cpp-and-winrt-apis/interop-winrt-abi + SetWindow(reinterpret_cast(winrt::get_abi(window))); + } + #endif + #endif // WINAPI_FAMILY == WINAPI_FAMILY_APP + + // Singleton + static Keyboard& __cdecl Get(); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/Sdk/External/DirectXTK/Inc/Model.h b/Sdk/External/DirectXTK/Inc/Model.h new file mode 100644 index 0000000..a2a44e1 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/Model.h @@ -0,0 +1,221 @@ +//-------------------------------------------------------------------------------------- +// File: Model.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#include +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DirectX +{ + class IEffect; + class IEffectFactory; + class CommonStates; + class ModelMesh; + + //---------------------------------------------------------------------------------- + // Model loading options + enum ModelLoaderFlags : uint32_t + { + ModelLoader_Clockwise = 0x0, + ModelLoader_CounterClockwise = 0x1, + ModelLoader_PremultipledAlpha = 0x2, + ModelLoader_MaterialColorsSRGB = 0x4, + ModelLoader_AllowLargeModels = 0x8, + }; + + //---------------------------------------------------------------------------------- + // Each mesh part is a submesh with a single effect + class ModelMeshPart + { + public: + ModelMeshPart() noexcept; + + ModelMeshPart(ModelMeshPart&&) = default; + ModelMeshPart& operator= (ModelMeshPart&&) = default; + + ModelMeshPart(ModelMeshPart const&) = default; + ModelMeshPart& operator= (ModelMeshPart const&) = default; + + virtual ~ModelMeshPart(); + + uint32_t indexCount; + uint32_t startIndex; + int32_t vertexOffset; + uint32_t vertexStride; + D3D_PRIMITIVE_TOPOLOGY primitiveType; + DXGI_FORMAT indexFormat; + Microsoft::WRL::ComPtr inputLayout; + Microsoft::WRL::ComPtr indexBuffer; + Microsoft::WRL::ComPtr vertexBuffer; + std::shared_ptr effect; + std::shared_ptr> vbDecl; + bool isAlpha; + + using Collection = std::vector>; + + // Draw mesh part with custom effect + void __cdecl Draw( + _In_ ID3D11DeviceContext* deviceContext, + _In_ IEffect* ieffect, + _In_ ID3D11InputLayout* iinputLayout, + _In_opt_ std::function setCustomState = nullptr) const; + + void __cdecl DrawInstanced( + _In_ ID3D11DeviceContext* deviceContext, + _In_ IEffect* ieffect, + _In_ ID3D11InputLayout* iinputLayout, + uint32_t instanceCount, + uint32_t startInstanceLocation = 0, + _In_opt_ std::function setCustomState = nullptr) const; + + // Create input layout for drawing with a custom effect. + void __cdecl CreateInputLayout(_In_ ID3D11Device* device, _In_ IEffect* ieffect, _Outptr_ ID3D11InputLayout** iinputLayout) const; + + // Change effect used by part and regenerate input layout (be sure to call Model::Modified as well) + void __cdecl ModifyEffect(_In_ ID3D11Device* device, _In_ std::shared_ptr& ieffect, bool isalpha = false); + }; + + + //---------------------------------------------------------------------------------- + // A mesh consists of one or more model mesh parts + class ModelMesh + { + public: + ModelMesh() noexcept; + + ModelMesh(ModelMesh&&) = default; + ModelMesh& operator= (ModelMesh&&) = default; + + ModelMesh(ModelMesh const&) = default; + ModelMesh& operator= (ModelMesh const&) = default; + + virtual ~ModelMesh(); + + BoundingSphere boundingSphere; + BoundingBox boundingBox; + ModelMeshPart::Collection meshParts; + std::wstring name; + bool ccw; + bool pmalpha; + + using Collection = std::vector>; + + // Setup states for drawing mesh + void __cdecl PrepareForRendering(_In_ ID3D11DeviceContext* deviceContext, const CommonStates& states, bool alpha = false, bool wireframe = false) const; + + // Draw the mesh + void XM_CALLCONV Draw( + _In_ ID3D11DeviceContext* deviceContext, + FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection, + bool alpha = false, + _In_opt_ std::function setCustomState = nullptr) const; + }; + + + //---------------------------------------------------------------------------------- + // A model consists of one or more meshes + class Model + { + public: + Model() = default; + + Model(Model&&) = default; + Model& operator= (Model&&) = default; + + Model(Model const&) = default; + Model& operator= (Model const&) = default; + + virtual ~Model(); + + ModelMesh::Collection meshes; + std::wstring name; + + // Draw all the meshes in the model + void XM_CALLCONV Draw( + _In_ ID3D11DeviceContext* deviceContext, + const CommonStates& states, + FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection, + bool wireframe = false, + _In_opt_ std::function setCustomState = nullptr) const; + + // Notify model that effects, parts list, or mesh list has changed + void __cdecl Modified() noexcept { mEffectCache.clear(); } + + // Update all effects used by the model + void __cdecl UpdateEffects(_In_ std::function setEffect); + + // Loads a model from a Visual Studio Starter Kit .CMO file + static std::unique_ptr __cdecl CreateFromCMO( + _In_ ID3D11Device* device, + _In_reads_bytes_(dataSize) const uint8_t* meshData, size_t dataSize, + _In_ IEffectFactory& fxFactory, + ModelLoaderFlags flags = ModelLoader_CounterClockwise); + static std::unique_ptr __cdecl CreateFromCMO( + _In_ ID3D11Device* device, + _In_z_ const wchar_t* szFileName, + _In_ IEffectFactory& fxFactory, + ModelLoaderFlags flags = ModelLoader_CounterClockwise); + + // Loads a model from a DirectX SDK .SDKMESH file + static std::unique_ptr __cdecl CreateFromSDKMESH( + _In_ ID3D11Device* device, + _In_reads_bytes_(dataSize) const uint8_t* meshData, _In_ size_t dataSize, + _In_ IEffectFactory& fxFactory, + ModelLoaderFlags flags = ModelLoader_Clockwise); + static std::unique_ptr __cdecl CreateFromSDKMESH( + _In_ ID3D11Device* device, + _In_z_ const wchar_t* szFileName, + _In_ IEffectFactory& fxFactory, + ModelLoaderFlags flags = ModelLoader_Clockwise); + + // Loads a model from a .VBO file + static std::unique_ptr __cdecl CreateFromVBO( + _In_ ID3D11Device* device, + _In_reads_bytes_(dataSize) const uint8_t* meshData, _In_ size_t dataSize, + _In_opt_ std::shared_ptr ieffect = nullptr, + ModelLoaderFlags flags = ModelLoader_Clockwise); + static std::unique_ptr __cdecl CreateFromVBO( + _In_ ID3D11Device* device, + _In_z_ const wchar_t* szFileName, + _In_opt_ std::shared_ptr ieffect = nullptr, + ModelLoaderFlags flags = ModelLoader_Clockwise); + + private: + std::set mEffectCache; + }; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" +#endif + + DEFINE_ENUM_FLAG_OPERATORS(ModelLoaderFlags); + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} diff --git a/Sdk/External/DirectXTK/Inc/Mouse.h b/Sdk/External/DirectXTK/Inc/Mouse.h new file mode 100644 index 0000000..51aa360 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/Mouse.h @@ -0,0 +1,147 @@ +//-------------------------------------------------------------------------------------- +// File: Mouse.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include + +#if (defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)) || (defined(_XBOX_ONE) && defined(_TITLE) && (_XDK_VER >= 0x42D907D1)) +namespace ABI { namespace Windows { namespace UI { namespace Core { struct ICoreWindow; } } } } +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-pragmas" +#endif + + +namespace DirectX +{ + class Mouse + { + public: + Mouse() noexcept(false); + Mouse(Mouse&& moveFrom) noexcept; + Mouse& operator= (Mouse&& moveFrom) noexcept; + + Mouse(Mouse const&) = delete; + Mouse& operator=(Mouse const&) = delete; + + virtual ~Mouse(); + + enum Mode + { + MODE_ABSOLUTE = 0, + MODE_RELATIVE, + }; + + struct State + { + bool leftButton; + bool middleButton; + bool rightButton; + bool xButton1; + bool xButton2; + int x; + int y; + int scrollWheelValue; + Mode positionMode; + }; + + class ButtonStateTracker + { + public: + enum ButtonState + { + UP = 0, // Button is up + HELD = 1, // Button is held down + RELEASED = 2, // Button was just released + PRESSED = 3, // Buton was just pressed + }; + + ButtonState leftButton; + ButtonState middleButton; + ButtonState rightButton; + ButtonState xButton1; + ButtonState xButton2; + + #pragma prefast(suppress: 26495, "Reset() performs the initialization") + ButtonStateTracker() noexcept { Reset(); } + + void __cdecl Update(const State& state) noexcept; + + void __cdecl Reset() noexcept; + + State __cdecl GetLastState() const noexcept { return lastState; } + + private: + State lastState; + }; + + // Retrieve the current state of the mouse + State __cdecl GetState() const; + + // Resets the accumulated scroll wheel value + void __cdecl ResetScrollWheelValue() noexcept; + + // Sets mouse mode (defaults to absolute) + void __cdecl SetMode(Mode mode); + + // Feature detection + bool __cdecl IsConnected() const; + + // Cursor visibility + bool __cdecl IsVisible() const noexcept; + void __cdecl SetVisible(bool visible); + + #ifdef WM_USER + #if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) + void __cdecl SetWindow(HWND window); + static void __cdecl ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam); + #elif (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + static void __cdecl ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam); + static void __cdecl SetResolution(bool use4k); + #endif + #endif + + #if (defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)) || (defined(_XBOX_ONE) && defined(_TITLE) && (_XDK_VER >= 0x42D907D1)) + void __cdecl SetWindow(ABI::Windows::UI::Core::ICoreWindow* window); + #ifdef __cplusplus_winrt + void __cdecl SetWindow(Windows::UI::Core::CoreWindow^ window) + { + // See https://msdn.microsoft.com/en-us/library/hh755802.aspx + SetWindow(reinterpret_cast(window)); + } + #endif + #ifdef CPPWINRT_VERSION + void __cdecl SetWindow(winrt::Windows::UI::Core::CoreWindow window) + { + // See https://docs.microsoft.com/en-us/windows/uwp/cpp-and-winrt-apis/interop-winrt-abi + SetWindow(reinterpret_cast(winrt::get_abi(window))); + } + #endif + + static void __cdecl SetDpi(float dpi); + #endif // WINAPI_FAMILY == WINAPI_FAMILY_APP + + // Singleton + static Mouse& __cdecl Get(); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/Sdk/External/DirectXTK/Inc/PostProcess.h b/Sdk/External/DirectXTK/Inc/PostProcess.h new file mode 100644 index 0000000..deba9a1 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/PostProcess.h @@ -0,0 +1,209 @@ +//-------------------------------------------------------------------------------------- +// File: PostProcess.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include +#include +#include + + +namespace DirectX +{ + //---------------------------------------------------------------------------------- + // Abstract interface representing a post-process pass + class IPostProcess + { + public: + virtual ~IPostProcess() = default; + + IPostProcess(const IPostProcess&) = delete; + IPostProcess& operator=(const IPostProcess&) = delete; + + IPostProcess(IPostProcess&&) = delete; + IPostProcess& operator=(IPostProcess&&) = delete; + + virtual void __cdecl Process(_In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState = nullptr) = 0; + + protected: + IPostProcess() = default; + }; + + + //---------------------------------------------------------------------------------- + // Basic post-process + class BasicPostProcess : public IPostProcess + { + public: + enum Effect : unsigned int + { + Copy, + Monochrome, + Sepia, + DownScale_2x2, + DownScale_4x4, + GaussianBlur_5x5, + BloomExtract, + BloomBlur, + Effect_Max + }; + + explicit BasicPostProcess(_In_ ID3D11Device* device); + BasicPostProcess(BasicPostProcess&& moveFrom) noexcept; + BasicPostProcess& operator= (BasicPostProcess&& moveFrom) noexcept; + + BasicPostProcess(BasicPostProcess const&) = delete; + BasicPostProcess& operator= (BasicPostProcess const&) = delete; + + ~BasicPostProcess() override; + + // IPostProcess methods. + void __cdecl Process( + _In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState = nullptr) override; + + // Shader control + void __cdecl SetEffect(Effect fx); + + // Properties + void __cdecl SetSourceTexture(_In_opt_ ID3D11ShaderResourceView* value); + + // Sets multiplier for GaussianBlur_5x5 + void __cdecl SetGaussianParameter(float multiplier); + + // Sets parameters for BloomExtract + void __cdecl SetBloomExtractParameter(float threshold); + + // Sets parameters for BloomBlur + void __cdecl SetBloomBlurParameters(bool horizontal, float size, float brightness); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + //---------------------------------------------------------------------------------- + // Dual-texure post-process + class DualPostProcess : public IPostProcess + { + public: + enum Effect : unsigned int + { + Merge, + BloomCombine, + Effect_Max + }; + + explicit DualPostProcess(_In_ ID3D11Device* device); + DualPostProcess(DualPostProcess&& moveFrom) noexcept; + DualPostProcess& operator= (DualPostProcess&& moveFrom) noexcept; + + DualPostProcess(DualPostProcess const&) = delete; + DualPostProcess& operator= (DualPostProcess const&) = delete; + + ~DualPostProcess() override; + + // IPostProcess methods. + void __cdecl Process(_In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState = nullptr) override; + + // Shader control + void __cdecl SetEffect(Effect fx); + + // Properties + void __cdecl SetSourceTexture(_In_opt_ ID3D11ShaderResourceView* value); + void __cdecl SetSourceTexture2(_In_opt_ ID3D11ShaderResourceView* value); + + // Sets parameters for Merge + void __cdecl SetMergeParameters(float weight1, float weight2); + + // Sets parameters for BloomCombine + void __cdecl SetBloomCombineParameters(float bloom, float base, float bloomSaturation, float baseSaturation); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + + + //---------------------------------------------------------------------------------- + // Tone-map post-process + class ToneMapPostProcess : public IPostProcess + { + public: + // Tone-mapping operator + enum Operator : unsigned int + { + None, // Pass-through + Saturate, // Clamp [0,1] + Reinhard, // x/(1+x) + ACESFilmic, + Operator_Max + }; + + // Electro-Optical Transfer Function (EOTF) + enum TransferFunction : unsigned int + { + Linear, // Pass-through + SRGB, // sRGB (Rec.709 and approximate sRGB display curve) + ST2084, // HDR10 (Rec.2020 color primaries and ST.2084 display curve) + TransferFunction_Max + }; + + explicit ToneMapPostProcess(_In_ ID3D11Device* device); + ToneMapPostProcess(ToneMapPostProcess&& moveFrom) noexcept; + ToneMapPostProcess& operator= (ToneMapPostProcess&& moveFrom) noexcept; + + ToneMapPostProcess(ToneMapPostProcess const&) = delete; + ToneMapPostProcess& operator= (ToneMapPostProcess const&) = delete; + + ~ToneMapPostProcess() override; + + // IPostProcess methods. + void __cdecl Process(_In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState = nullptr) override; + + // Shader control + void __cdecl SetOperator(Operator op); + + void __cdecl SetTransferFunction(TransferFunction func); + + #if defined(_XBOX_ONE) && defined(_TITLE) + // Uses Multiple Render Targets to generate both HDR10 and GameDVR SDR signals + void __cdecl SetMRTOutput(bool value = true); + #endif + + // Properties + void __cdecl SetHDRSourceTexture(_In_opt_ ID3D11ShaderResourceView* value); + + // Sets exposure value for LDR tonemap operators + void SetExposure(float exposureValue); + + // Sets ST.2084 parameter for how bright white should be in nits + void SetST2084Parameter(float paperWhiteNits); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/PrimitiveBatch.h b/Sdk/External/DirectXTK/Inc/PrimitiveBatch.h new file mode 100644 index 0000000..9693188 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/PrimitiveBatch.h @@ -0,0 +1,141 @@ +//-------------------------------------------------------------------------------------- +// File: PrimitiveBatch.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include +#include +#include + + +namespace DirectX +{ + namespace Internal + { + // Base class, not to be used directly: clients should access this via the derived PrimitiveBatch. + class PrimitiveBatchBase + { + protected: + PrimitiveBatchBase(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices, size_t maxVertices, size_t vertexSize); + PrimitiveBatchBase(PrimitiveBatchBase&& moveFrom) noexcept; + PrimitiveBatchBase& operator= (PrimitiveBatchBase&& moveFrom) noexcept; + + PrimitiveBatchBase(PrimitiveBatchBase const&) = delete; + PrimitiveBatchBase& operator= (PrimitiveBatchBase const&) = delete; + + virtual ~PrimitiveBatchBase(); + + public: + // Begin/End a batch of primitive drawing operations. + void __cdecl Begin(); + void __cdecl End(); + + protected: + // Internal, untyped drawing method. + void __cdecl Draw(D3D11_PRIMITIVE_TOPOLOGY topology, bool isIndexed, _In_opt_count_(indexCount) uint16_t const* indices, size_t indexCount, size_t vertexCount, _Out_ void** pMappedVertices); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + }; + } + + + // Template makes the API typesafe, eg. PrimitiveBatch. + template + class PrimitiveBatch : public Internal::PrimitiveBatchBase + { + static const size_t DefaultBatchSize = 2048; + + public: + explicit PrimitiveBatch(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices = DefaultBatchSize * 3, size_t maxVertices = DefaultBatchSize) + : PrimitiveBatchBase(deviceContext, maxIndices, maxVertices, sizeof(TVertex)) + { } + + PrimitiveBatch(PrimitiveBatch&& moveFrom) noexcept + : PrimitiveBatchBase(std::move(moveFrom)) + { } + + PrimitiveBatch& operator= (PrimitiveBatch&& moveFrom) noexcept + { + PrimitiveBatchBase::operator=(std::move(moveFrom)); + return *this; + } + + PrimitiveBatch(PrimitiveBatch const&) = delete; + PrimitiveBatch& operator= (PrimitiveBatch const&) = delete; + + // Similar to the D3D9 API DrawPrimitiveUP. + void Draw(D3D11_PRIMITIVE_TOPOLOGY topology, _In_reads_(vertexCount) TVertex const* vertices, size_t vertexCount) + { + void* mappedVertices; + + PrimitiveBatchBase::Draw(topology, false, nullptr, 0, vertexCount, &mappedVertices); + + memcpy(mappedVertices, vertices, vertexCount * sizeof(TVertex)); + } + + + // Similar to the D3D9 API DrawIndexedPrimitiveUP. + void DrawIndexed(D3D11_PRIMITIVE_TOPOLOGY topology, _In_reads_(indexCount) uint16_t const* indices, size_t indexCount, _In_reads_(vertexCount) TVertex const* vertices, size_t vertexCount) + { + void* mappedVertices; + + PrimitiveBatchBase::Draw(topology, true, indices, indexCount, vertexCount, &mappedVertices); + + memcpy(mappedVertices, vertices, vertexCount * sizeof(TVertex)); + } + + + void DrawLine(TVertex const& v1, TVertex const& v2) + { + TVertex* mappedVertices; + + PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY_LINELIST, false, nullptr, 0, 2, reinterpret_cast(&mappedVertices)); + + mappedVertices[0] = v1; + mappedVertices[1] = v2; + } + + + void DrawTriangle(TVertex const& v1, TVertex const& v2, TVertex const& v3) + { + TVertex* mappedVertices; + + PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, false, nullptr, 0, 3, reinterpret_cast(&mappedVertices)); + + mappedVertices[0] = v1; + mappedVertices[1] = v2; + mappedVertices[2] = v3; + } + + + void DrawQuad(TVertex const& v1, TVertex const& v2, TVertex const& v3, TVertex const& v4) + { + static const uint16_t quadIndices[] = { 0, 1, 2, 0, 2, 3 }; + + TVertex* mappedVertices; + + PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, true, quadIndices, 6, 4, reinterpret_cast(&mappedVertices)); + + mappedVertices[0] = v1; + mappedVertices[1] = v2; + mappedVertices[2] = v3; + mappedVertices[3] = v4; + } + }; +} diff --git a/Sdk/External/DirectXTK/Inc/ScreenGrab.h b/Sdk/External/DirectXTK/Inc/ScreenGrab.h new file mode 100644 index 0000000..81f7801 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/ScreenGrab.h @@ -0,0 +1,48 @@ + +//-------------------------------------------------------------------------------------- +// File: ScreenGrab.h +// +// Function for capturing a 2D texture and saving it to a file (aka a 'screenshot' +// when used on a Direct3D Render Target). +// +// Note these functions are useful as a light-weight runtime screen grabber. For +// full-featured texture capture, DDS writer, and texture processing pipeline, +// see the 'Texconv' sample and the 'DirectXTex' library. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include +#include + +#pragma comment(lib,"uuid.lib") + + +namespace DirectX +{ + HRESULT __cdecl SaveDDSTextureToFile( + _In_ ID3D11DeviceContext* pContext, + _In_ ID3D11Resource* pSource, + _In_z_ const wchar_t* fileName) noexcept; + + HRESULT __cdecl SaveWICTextureToFile( + _In_ ID3D11DeviceContext* pContext, + _In_ ID3D11Resource* pSource, + _In_ REFGUID guidContainerFormat, + _In_z_ const wchar_t* fileName, + _In_opt_ const GUID* targetFormat = nullptr, + _In_opt_ std::function setCustomProps = nullptr, + _In_ bool forceSRGB = false); +} diff --git a/Sdk/External/DirectXTK/Inc/SimpleMath.h b/Sdk/External/DirectXTK/Inc/SimpleMath.h new file mode 100644 index 0000000..70c2ece --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/SimpleMath.h @@ -0,0 +1,1080 @@ +//------------------------------------------------------------------------------------- +// SimpleMath.h -- Simplified C++ Math wrapper for DirectXMath +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#pragma once + +#if !(defined(_XBOX_ONE) && defined(_TITLE)) && !defined(_GAMING_XBOX) +#include +#endif + +#include + +#include +#include + +#include +#include +#include + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#endif + + +namespace DirectX +{ + namespace SimpleMath + { + struct Vector2; + struct Vector4; + struct Matrix; + struct Quaternion; + struct Plane; + + //------------------------------------------------------------------------------ + // 2D rectangle + struct Rectangle + { + long x; + long y; + long width; + long height; + + // Creators + Rectangle() noexcept : x(0), y(0), width(0), height(0) {} + constexpr Rectangle(long ix, long iy, long iw, long ih) noexcept : x(ix), y(iy), width(iw), height(ih) {} + explicit Rectangle(const RECT& rct) noexcept : x(rct.left), y(rct.top), width(rct.right - rct.left), height(rct.bottom - rct.top) {} + + Rectangle(const Rectangle&) = default; + Rectangle& operator=(const Rectangle&) = default; + + Rectangle(Rectangle&&) = default; + Rectangle& operator=(Rectangle&&) = default; + + operator RECT() noexcept { RECT rct; rct.left = x; rct.top = y; rct.right = (x + width); rct.bottom = (y + height); return rct; } + #ifdef __cplusplus_winrt + operator Windows::Foundation::Rect() noexcept { return Windows::Foundation::Rect(float(x), float(y), float(width), float(height)); } + #endif + + // Comparison operators + bool operator == (const Rectangle& r) const noexcept { return (x == r.x) && (y == r.y) && (width == r.width) && (height == r.height); } + bool operator == (const RECT& rct) const noexcept { return (x == rct.left) && (y == rct.top) && (width == (rct.right - rct.left)) && (height == (rct.bottom - rct.top)); } + + bool operator != (const Rectangle& r) const noexcept { return (x != r.x) || (y != r.y) || (width != r.width) || (height != r.height); } + bool operator != (const RECT& rct) const noexcept { return (x != rct.left) || (y != rct.top) || (width != (rct.right - rct.left)) || (height != (rct.bottom - rct.top)); } + + // Assignment operators + Rectangle& operator=(_In_ const RECT& rct) noexcept { x = rct.left; y = rct.top; width = (rct.right - rct.left); height = (rct.bottom - rct.top); return *this; } + + // Rectangle operations + Vector2 Location() const noexcept; + Vector2 Center() const noexcept; + + bool IsEmpty() const noexcept { return (width == 0 && height == 0 && x == 0 && y == 0); } + + bool Contains(long ix, long iy) const noexcept { return (x <= ix) && (ix < (x + width)) && (y <= iy) && (iy < (y + height)); } + bool Contains(const Vector2& point) const noexcept; + bool Contains(const Rectangle& r) const noexcept { return (x <= r.x) && ((r.x + r.width) <= (x + width)) && (y <= r.y) && ((r.y + r.height) <= (y + height)); } + bool Contains(const RECT& rct) const noexcept { return (x <= rct.left) && (rct.right <= (x + width)) && (y <= rct.top) && (rct.bottom <= (y + height)); } + + void Inflate(long horizAmount, long vertAmount) noexcept; + + bool Intersects(const Rectangle& r) const noexcept { return (r.x < (x + width)) && (x < (r.x + r.width)) && (r.y < (y + height)) && (y < (r.y + r.height)); } + bool Intersects(const RECT& rct) const noexcept { return (rct.left < (x + width)) && (x < rct.right) && (rct.top < (y + height)) && (y < rct.bottom); } + + void Offset(long ox, long oy) noexcept { x += ox; y += oy; } + + // Static functions + static Rectangle Intersect(const Rectangle& ra, const Rectangle& rb) noexcept; + static RECT Intersect(const RECT& rcta, const RECT& rctb) noexcept; + + static Rectangle Union(const Rectangle& ra, const Rectangle& rb) noexcept; + static RECT Union(const RECT& rcta, const RECT& rctb) noexcept; + }; + + //------------------------------------------------------------------------------ + // 2D vector + struct Vector2 : public XMFLOAT2 + { + Vector2() noexcept : XMFLOAT2(0.f, 0.f) {} + constexpr explicit Vector2(float ix) noexcept : XMFLOAT2(ix, ix) {} + constexpr Vector2(float ix, float iy) noexcept : XMFLOAT2(ix, iy) {} + explicit Vector2(_In_reads_(2) const float *pArray) noexcept : XMFLOAT2(pArray) {} + Vector2(FXMVECTOR V) noexcept { XMStoreFloat2(this, V); } + Vector2(const XMFLOAT2& V) noexcept { this->x = V.x; this->y = V.y; } + explicit Vector2(const XMVECTORF32& F) noexcept { this->x = F.f[0]; this->y = F.f[1]; } + + Vector2(const Vector2&) = default; + Vector2& operator=(const Vector2&) = default; + + Vector2(Vector2&&) = default; + Vector2& operator=(Vector2&&) = default; + + operator XMVECTOR() const noexcept { return XMLoadFloat2(this); } + + // Comparison operators + bool operator == (const Vector2& V) const noexcept; + bool operator != (const Vector2& V) const noexcept; + + // Assignment operators + Vector2& operator= (const XMVECTORF32& F) noexcept { x = F.f[0]; y = F.f[1]; return *this; } + Vector2& operator+= (const Vector2& V) noexcept; + Vector2& operator-= (const Vector2& V) noexcept; + Vector2& operator*= (const Vector2& V) noexcept; + Vector2& operator*= (float S) noexcept; + Vector2& operator/= (float S) noexcept; + + // Unary operators + Vector2 operator+ () const noexcept { return *this; } + Vector2 operator- () const noexcept { return Vector2(-x, -y); } + + // Vector operations + bool InBounds(const Vector2& Bounds) const noexcept; + + float Length() const noexcept; + float LengthSquared() const noexcept; + + float Dot(const Vector2& V) const noexcept; + void Cross(const Vector2& V, Vector2& result) const noexcept; + Vector2 Cross(const Vector2& V) const noexcept; + + void Normalize() noexcept; + void Normalize(Vector2& result) const noexcept; + + void Clamp(const Vector2& vmin, const Vector2& vmax) noexcept; + void Clamp(const Vector2& vmin, const Vector2& vmax, Vector2& result) const noexcept; + + // Static functions + static float Distance(const Vector2& v1, const Vector2& v2) noexcept; + static float DistanceSquared(const Vector2& v1, const Vector2& v2) noexcept; + + static void Min(const Vector2& v1, const Vector2& v2, Vector2& result) noexcept; + static Vector2 Min(const Vector2& v1, const Vector2& v2) noexcept; + + static void Max(const Vector2& v1, const Vector2& v2, Vector2& result) noexcept; + static Vector2 Max(const Vector2& v1, const Vector2& v2) noexcept; + + static void Lerp(const Vector2& v1, const Vector2& v2, float t, Vector2& result) noexcept; + static Vector2 Lerp(const Vector2& v1, const Vector2& v2, float t) noexcept; + + static void SmoothStep(const Vector2& v1, const Vector2& v2, float t, Vector2& result) noexcept; + static Vector2 SmoothStep(const Vector2& v1, const Vector2& v2, float t) noexcept; + + static void Barycentric(const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g, Vector2& result) noexcept; + static Vector2 Barycentric(const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g) noexcept; + + static void CatmullRom(const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t, Vector2& result) noexcept; + static Vector2 CatmullRom(const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t) noexcept; + + static void Hermite(const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t, Vector2& result) noexcept; + static Vector2 Hermite(const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t) noexcept; + + static void Reflect(const Vector2& ivec, const Vector2& nvec, Vector2& result) noexcept; + static Vector2 Reflect(const Vector2& ivec, const Vector2& nvec) noexcept; + + static void Refract(const Vector2& ivec, const Vector2& nvec, float refractionIndex, Vector2& result) noexcept; + static Vector2 Refract(const Vector2& ivec, const Vector2& nvec, float refractionIndex) noexcept; + + static void Transform(const Vector2& v, const Quaternion& quat, Vector2& result) noexcept; + static Vector2 Transform(const Vector2& v, const Quaternion& quat) noexcept; + + static void Transform(const Vector2& v, const Matrix& m, Vector2& result) noexcept; + static Vector2 Transform(const Vector2& v, const Matrix& m) noexcept; + static void Transform(_In_reads_(count) const Vector2* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector2* resultArray) noexcept; + + static void Transform(const Vector2& v, const Matrix& m, Vector4& result) noexcept; + static void Transform(_In_reads_(count) const Vector2* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector4* resultArray) noexcept; + + static void TransformNormal(const Vector2& v, const Matrix& m, Vector2& result) noexcept; + static Vector2 TransformNormal(const Vector2& v, const Matrix& m) noexcept; + static void TransformNormal(_In_reads_(count) const Vector2* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector2* resultArray) noexcept; + + // Constants + static const Vector2 Zero; + static const Vector2 One; + static const Vector2 UnitX; + static const Vector2 UnitY; + }; + + // Binary operators + Vector2 operator+ (const Vector2& V1, const Vector2& V2) noexcept; + Vector2 operator- (const Vector2& V1, const Vector2& V2) noexcept; + Vector2 operator* (const Vector2& V1, const Vector2& V2) noexcept; + Vector2 operator* (const Vector2& V, float S) noexcept; + Vector2 operator/ (const Vector2& V1, const Vector2& V2) noexcept; + Vector2 operator/ (const Vector2& V, float S) noexcept; + Vector2 operator* (float S, const Vector2& V) noexcept; + + //------------------------------------------------------------------------------ + // 3D vector + struct Vector3 : public XMFLOAT3 + { + Vector3() noexcept : XMFLOAT3(0.f, 0.f, 0.f) {} + constexpr explicit Vector3(float ix) noexcept : XMFLOAT3(ix, ix, ix) {} + constexpr Vector3(float ix, float iy, float iz) noexcept : XMFLOAT3(ix, iy, iz) {} + explicit Vector3(_In_reads_(3) const float *pArray) noexcept : XMFLOAT3(pArray) {} + Vector3(FXMVECTOR V) noexcept { XMStoreFloat3(this, V); } + Vector3(const XMFLOAT3& V) noexcept { this->x = V.x; this->y = V.y; this->z = V.z; } + explicit Vector3(const XMVECTORF32& F) noexcept { this->x = F.f[0]; this->y = F.f[1]; this->z = F.f[2]; } + + Vector3(const Vector3&) = default; + Vector3& operator=(const Vector3&) = default; + + Vector3(Vector3&&) = default; + Vector3& operator=(Vector3&&) = default; + + operator XMVECTOR() const noexcept { return XMLoadFloat3(this); } + + // Comparison operators + bool operator == (const Vector3& V) const noexcept; + bool operator != (const Vector3& V) const noexcept; + + // Assignment operators + Vector3& operator= (const XMVECTORF32& F) noexcept { x = F.f[0]; y = F.f[1]; z = F.f[2]; return *this; } + Vector3& operator+= (const Vector3& V) noexcept; + Vector3& operator-= (const Vector3& V) noexcept; + Vector3& operator*= (const Vector3& V) noexcept; + Vector3& operator*= (float S) noexcept; + Vector3& operator/= (float S) noexcept; + + // Unary operators + Vector3 operator+ () const noexcept { return *this; } + Vector3 operator- () const noexcept; + + // Vector operations + bool InBounds(const Vector3& Bounds) const noexcept; + + float Length() const noexcept; + float LengthSquared() const noexcept; + + float Dot(const Vector3& V) const noexcept; + void Cross(const Vector3& V, Vector3& result) const noexcept; + Vector3 Cross(const Vector3& V) const noexcept; + + void Normalize() noexcept; + void Normalize(Vector3& result) const noexcept; + + void Clamp(const Vector3& vmin, const Vector3& vmax) noexcept; + void Clamp(const Vector3& vmin, const Vector3& vmax, Vector3& result) const noexcept; + + // Static functions + static float Distance(const Vector3& v1, const Vector3& v2) noexcept; + static float DistanceSquared(const Vector3& v1, const Vector3& v2) noexcept; + + static void Min(const Vector3& v1, const Vector3& v2, Vector3& result) noexcept; + static Vector3 Min(const Vector3& v1, const Vector3& v2) noexcept; + + static void Max(const Vector3& v1, const Vector3& v2, Vector3& result) noexcept; + static Vector3 Max(const Vector3& v1, const Vector3& v2) noexcept; + + static void Lerp(const Vector3& v1, const Vector3& v2, float t, Vector3& result) noexcept; + static Vector3 Lerp(const Vector3& v1, const Vector3& v2, float t) noexcept; + + static void SmoothStep(const Vector3& v1, const Vector3& v2, float t, Vector3& result) noexcept; + static Vector3 SmoothStep(const Vector3& v1, const Vector3& v2, float t) noexcept; + + static void Barycentric(const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g, Vector3& result) noexcept; + static Vector3 Barycentric(const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g) noexcept; + + static void CatmullRom(const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t, Vector3& result) noexcept; + static Vector3 CatmullRom(const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t) noexcept; + + static void Hermite(const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t, Vector3& result) noexcept; + static Vector3 Hermite(const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t) noexcept; + + static void Reflect(const Vector3& ivec, const Vector3& nvec, Vector3& result) noexcept; + static Vector3 Reflect(const Vector3& ivec, const Vector3& nvec) noexcept; + + static void Refract(const Vector3& ivec, const Vector3& nvec, float refractionIndex, Vector3& result) noexcept; + static Vector3 Refract(const Vector3& ivec, const Vector3& nvec, float refractionIndex) noexcept; + + static void Transform(const Vector3& v, const Quaternion& quat, Vector3& result) noexcept; + static Vector3 Transform(const Vector3& v, const Quaternion& quat) noexcept; + + static void Transform(const Vector3& v, const Matrix& m, Vector3& result) noexcept; + static Vector3 Transform(const Vector3& v, const Matrix& m) noexcept; + static void Transform(_In_reads_(count) const Vector3* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector3* resultArray) noexcept; + + static void Transform(const Vector3& v, const Matrix& m, Vector4& result) noexcept; + static void Transform(_In_reads_(count) const Vector3* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector4* resultArray) noexcept; + + static void TransformNormal(const Vector3& v, const Matrix& m, Vector3& result) noexcept; + static Vector3 TransformNormal(const Vector3& v, const Matrix& m) noexcept; + static void TransformNormal(_In_reads_(count) const Vector3* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector3* resultArray) noexcept; + + // Constants + static const Vector3 Zero; + static const Vector3 One; + static const Vector3 UnitX; + static const Vector3 UnitY; + static const Vector3 UnitZ; + static const Vector3 Up; + static const Vector3 Down; + static const Vector3 Right; + static const Vector3 Left; + static const Vector3 Forward; + static const Vector3 Backward; + }; + + // Binary operators + Vector3 operator+ (const Vector3& V1, const Vector3& V2) noexcept; + Vector3 operator- (const Vector3& V1, const Vector3& V2) noexcept; + Vector3 operator* (const Vector3& V1, const Vector3& V2) noexcept; + Vector3 operator* (const Vector3& V, float S) noexcept; + Vector3 operator/ (const Vector3& V1, const Vector3& V2) noexcept; + Vector3 operator/ (const Vector3& V, float S) noexcept; + Vector3 operator* (float S, const Vector3& V) noexcept; + + //------------------------------------------------------------------------------ + // 4D vector + struct Vector4 : public XMFLOAT4 + { + Vector4() noexcept : XMFLOAT4(0.f, 0.f, 0.f, 0.f) {} + constexpr explicit Vector4(float ix) noexcept : XMFLOAT4(ix, ix, ix, ix) {} + constexpr Vector4(float ix, float iy, float iz, float iw) noexcept : XMFLOAT4(ix, iy, iz, iw) {} + explicit Vector4(_In_reads_(4) const float *pArray) noexcept : XMFLOAT4(pArray) {} + Vector4(FXMVECTOR V) noexcept { XMStoreFloat4(this, V); } + Vector4(const XMFLOAT4& V) noexcept { this->x = V.x; this->y = V.y; this->z = V.z; this->w = V.w; } + explicit Vector4(const XMVECTORF32& F) noexcept { this->x = F.f[0]; this->y = F.f[1]; this->z = F.f[2]; this->w = F.f[3]; } + + Vector4(const Vector4&) = default; + Vector4& operator=(const Vector4&) = default; + + Vector4(Vector4&&) = default; + Vector4& operator=(Vector4&&) = default; + + operator XMVECTOR() const noexcept { return XMLoadFloat4(this); } + + // Comparison operators + bool operator == (const Vector4& V) const noexcept; + bool operator != (const Vector4& V) const noexcept; + + // Assignment operators + Vector4& operator= (const XMVECTORF32& F) noexcept { x = F.f[0]; y = F.f[1]; z = F.f[2]; w = F.f[3]; return *this; } + Vector4& operator+= (const Vector4& V) noexcept; + Vector4& operator-= (const Vector4& V) noexcept; + Vector4& operator*= (const Vector4& V) noexcept; + Vector4& operator*= (float S) noexcept; + Vector4& operator/= (float S) noexcept; + + // Unary operators + Vector4 operator+ () const noexcept { return *this; } + Vector4 operator- () const noexcept; + + // Vector operations + bool InBounds(const Vector4& Bounds) const noexcept; + + float Length() const noexcept; + float LengthSquared() const noexcept; + + float Dot(const Vector4& V) const noexcept; + void Cross(const Vector4& v1, const Vector4& v2, Vector4& result) const noexcept; + Vector4 Cross(const Vector4& v1, const Vector4& v2) const noexcept; + + void Normalize() noexcept; + void Normalize(Vector4& result) const noexcept; + + void Clamp(const Vector4& vmin, const Vector4& vmax) noexcept; + void Clamp(const Vector4& vmin, const Vector4& vmax, Vector4& result) const noexcept; + + // Static functions + static float Distance(const Vector4& v1, const Vector4& v2) noexcept; + static float DistanceSquared(const Vector4& v1, const Vector4& v2) noexcept; + + static void Min(const Vector4& v1, const Vector4& v2, Vector4& result) noexcept; + static Vector4 Min(const Vector4& v1, const Vector4& v2) noexcept; + + static void Max(const Vector4& v1, const Vector4& v2, Vector4& result) noexcept; + static Vector4 Max(const Vector4& v1, const Vector4& v2) noexcept; + + static void Lerp(const Vector4& v1, const Vector4& v2, float t, Vector4& result) noexcept; + static Vector4 Lerp(const Vector4& v1, const Vector4& v2, float t) noexcept; + + static void SmoothStep(const Vector4& v1, const Vector4& v2, float t, Vector4& result) noexcept; + static Vector4 SmoothStep(const Vector4& v1, const Vector4& v2, float t) noexcept; + + static void Barycentric(const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g, Vector4& result) noexcept; + static Vector4 Barycentric(const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g) noexcept; + + static void CatmullRom(const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t, Vector4& result) noexcept; + static Vector4 CatmullRom(const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t) noexcept; + + static void Hermite(const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t, Vector4& result) noexcept; + static Vector4 Hermite(const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t) noexcept; + + static void Reflect(const Vector4& ivec, const Vector4& nvec, Vector4& result) noexcept; + static Vector4 Reflect(const Vector4& ivec, const Vector4& nvec) noexcept; + + static void Refract(const Vector4& ivec, const Vector4& nvec, float refractionIndex, Vector4& result) noexcept; + static Vector4 Refract(const Vector4& ivec, const Vector4& nvec, float refractionIndex) noexcept; + + static void Transform(const Vector2& v, const Quaternion& quat, Vector4& result) noexcept; + static Vector4 Transform(const Vector2& v, const Quaternion& quat) noexcept; + + static void Transform(const Vector3& v, const Quaternion& quat, Vector4& result) noexcept; + static Vector4 Transform(const Vector3& v, const Quaternion& quat) noexcept; + + static void Transform(const Vector4& v, const Quaternion& quat, Vector4& result) noexcept; + static Vector4 Transform(const Vector4& v, const Quaternion& quat) noexcept; + + static void Transform(const Vector4& v, const Matrix& m, Vector4& result) noexcept; + static Vector4 Transform(const Vector4& v, const Matrix& m) noexcept; + static void Transform(_In_reads_(count) const Vector4* varray, size_t count, const Matrix& m, _Out_writes_(count) Vector4* resultArray) noexcept; + + // Constants + static const Vector4 Zero; + static const Vector4 One; + static const Vector4 UnitX; + static const Vector4 UnitY; + static const Vector4 UnitZ; + static const Vector4 UnitW; + }; + + // Binary operators + Vector4 operator+ (const Vector4& V1, const Vector4& V2) noexcept; + Vector4 operator- (const Vector4& V1, const Vector4& V2) noexcept; + Vector4 operator* (const Vector4& V1, const Vector4& V2) noexcept; + Vector4 operator* (const Vector4& V, float S) noexcept; + Vector4 operator/ (const Vector4& V1, const Vector4& V2) noexcept; + Vector4 operator/ (const Vector4& V, float S) noexcept; + Vector4 operator* (float S, const Vector4& V) noexcept; + + //------------------------------------------------------------------------------ + // 4x4 Matrix (assumes right-handed cooordinates) + struct Matrix : public XMFLOAT4X4 + { + Matrix() noexcept + : XMFLOAT4X4(1.f, 0, 0, 0, + 0, 1.f, 0, 0, + 0, 0, 1.f, 0, + 0, 0, 0, 1.f) {} + constexpr Matrix(float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23, + float m30, float m31, float m32, float m33) noexcept + : XMFLOAT4X4(m00, m01, m02, m03, + m10, m11, m12, m13, + m20, m21, m22, m23, + m30, m31, m32, m33) {} + explicit Matrix(const Vector3& r0, const Vector3& r1, const Vector3& r2) noexcept + : XMFLOAT4X4(r0.x, r0.y, r0.z, 0, + r1.x, r1.y, r1.z, 0, + r2.x, r2.y, r2.z, 0, + 0, 0, 0, 1.f) {} + explicit Matrix(const Vector4& r0, const Vector4& r1, const Vector4& r2, const Vector4& r3) noexcept + : XMFLOAT4X4(r0.x, r0.y, r0.z, r0.w, + r1.x, r1.y, r1.z, r1.w, + r2.x, r2.y, r2.z, r2.w, + r3.x, r3.y, r3.z, r3.w) {} + Matrix(const XMFLOAT4X4& M) noexcept { memcpy_s(this, sizeof(float) * 16, &M, sizeof(XMFLOAT4X4)); } + Matrix(const XMFLOAT3X3& M) noexcept; + Matrix(const XMFLOAT4X3& M) noexcept; + + explicit Matrix(_In_reads_(16) const float *pArray) noexcept : XMFLOAT4X4(pArray) {} + Matrix(CXMMATRIX M) noexcept { XMStoreFloat4x4(this, M); } + + Matrix(const Matrix&) = default; + Matrix& operator=(const Matrix&) = default; + + Matrix(Matrix&&) = default; + Matrix& operator=(Matrix&&) = default; + + operator XMMATRIX() const noexcept { return XMLoadFloat4x4(this); } + + // Comparison operators + bool operator == (const Matrix& M) const noexcept; + bool operator != (const Matrix& M) const noexcept; + + // Assignment operators + Matrix& operator= (const XMFLOAT3X3& M) noexcept; + Matrix& operator= (const XMFLOAT4X3& M) noexcept; + Matrix& operator+= (const Matrix& M) noexcept; + Matrix& operator-= (const Matrix& M) noexcept; + Matrix& operator*= (const Matrix& M) noexcept; + Matrix& operator*= (float S) noexcept; + Matrix& operator/= (float S) noexcept; + + Matrix& operator/= (const Matrix& M) noexcept; + // Element-wise divide + + // Unary operators + Matrix operator+ () const noexcept { return *this; } + Matrix operator- () const noexcept; + + // Properties + Vector3 Up() const noexcept { return Vector3(_21, _22, _23); } + void Up(const Vector3& v) noexcept { _21 = v.x; _22 = v.y; _23 = v.z; } + + Vector3 Down() const noexcept { return Vector3(-_21, -_22, -_23); } + void Down(const Vector3& v) noexcept { _21 = -v.x; _22 = -v.y; _23 = -v.z; } + + Vector3 Right() const noexcept { return Vector3(_11, _12, _13); } + void Right(const Vector3& v) noexcept { _11 = v.x; _12 = v.y; _13 = v.z; } + + Vector3 Left() const noexcept { return Vector3(-_11, -_12, -_13); } + void Left(const Vector3& v) noexcept { _11 = -v.x; _12 = -v.y; _13 = -v.z; } + + Vector3 Forward() const noexcept { return Vector3(-_31, -_32, -_33); } + void Forward(const Vector3& v) noexcept { _31 = -v.x; _32 = -v.y; _33 = -v.z; } + + Vector3 Backward() const noexcept { return Vector3(_31, _32, _33); } + void Backward(const Vector3& v) noexcept { _31 = v.x; _32 = v.y; _33 = v.z; } + + Vector3 Translation() const noexcept { return Vector3(_41, _42, _43); } + void Translation(const Vector3& v) noexcept { _41 = v.x; _42 = v.y; _43 = v.z; } + + // Matrix operations + bool Decompose(Vector3& scale, Quaternion& rotation, Vector3& translation) noexcept; + + Matrix Transpose() const noexcept; + void Transpose(Matrix& result) const noexcept; + + Matrix Invert() const noexcept; + void Invert(Matrix& result) const noexcept; + + float Determinant() const noexcept; + + // Static functions + static Matrix CreateBillboard( + const Vector3& object, const Vector3& cameraPosition, const Vector3& cameraUp, _In_opt_ const Vector3* cameraForward = nullptr) noexcept; + + static Matrix CreateConstrainedBillboard( + const Vector3& object, const Vector3& cameraPosition, const Vector3& rotateAxis, + _In_opt_ const Vector3* cameraForward = nullptr, _In_opt_ const Vector3* objectForward = nullptr) noexcept; + + static Matrix CreateTranslation(const Vector3& position) noexcept; + static Matrix CreateTranslation(float x, float y, float z) noexcept; + + static Matrix CreateScale(const Vector3& scales) noexcept; + static Matrix CreateScale(float xs, float ys, float zs) noexcept; + static Matrix CreateScale(float scale) noexcept; + + static Matrix CreateRotationX(float radians) noexcept; + static Matrix CreateRotationY(float radians) noexcept; + static Matrix CreateRotationZ(float radians) noexcept; + + static Matrix CreateFromAxisAngle(const Vector3& axis, float angle) noexcept; + + static Matrix CreatePerspectiveFieldOfView(float fov, float aspectRatio, float nearPlane, float farPlane) noexcept; + static Matrix CreatePerspective(float width, float height, float nearPlane, float farPlane) noexcept; + static Matrix CreatePerspectiveOffCenter(float left, float right, float bottom, float top, float nearPlane, float farPlane) noexcept; + static Matrix CreateOrthographic(float width, float height, float zNearPlane, float zFarPlane) noexcept; + static Matrix CreateOrthographicOffCenter(float left, float right, float bottom, float top, float zNearPlane, float zFarPlane) noexcept; + + static Matrix CreateLookAt(const Vector3& position, const Vector3& target, const Vector3& up) noexcept; + static Matrix CreateWorld(const Vector3& position, const Vector3& forward, const Vector3& up) noexcept; + + static Matrix CreateFromQuaternion(const Quaternion& quat) noexcept; + + static Matrix CreateFromYawPitchRoll(float yaw, float pitch, float roll) noexcept; + + static Matrix CreateShadow(const Vector3& lightDir, const Plane& plane) noexcept; + + static Matrix CreateReflection(const Plane& plane) noexcept; + + static void Lerp(const Matrix& M1, const Matrix& M2, float t, Matrix& result) noexcept; + static Matrix Lerp(const Matrix& M1, const Matrix& M2, float t) noexcept; + + static void Transform(const Matrix& M, const Quaternion& rotation, Matrix& result) noexcept; + static Matrix Transform(const Matrix& M, const Quaternion& rotation) noexcept; + + // Constants + static const Matrix Identity; + }; + + // Binary operators + Matrix operator+ (const Matrix& M1, const Matrix& M2) noexcept; + Matrix operator- (const Matrix& M1, const Matrix& M2) noexcept; + Matrix operator* (const Matrix& M1, const Matrix& M2) noexcept; + Matrix operator* (const Matrix& M, float S) noexcept; + Matrix operator/ (const Matrix& M, float S) noexcept; + Matrix operator/ (const Matrix& M1, const Matrix& M2) noexcept; + // Element-wise divide + Matrix operator* (float S, const Matrix& M) noexcept; + + + //----------------------------------------------------------------------------- + // Plane + struct Plane : public XMFLOAT4 + { + Plane() noexcept : XMFLOAT4(0.f, 1.f, 0.f, 0.f) {} + constexpr Plane(float ix, float iy, float iz, float iw) noexcept : XMFLOAT4(ix, iy, iz, iw) {} + Plane(const Vector3& normal, float d) noexcept : XMFLOAT4(normal.x, normal.y, normal.z, d) {} + Plane(const Vector3& point1, const Vector3& point2, const Vector3& point3) noexcept; + Plane(const Vector3& point, const Vector3& normal) noexcept; + explicit Plane(const Vector4& v) noexcept : XMFLOAT4(v.x, v.y, v.z, v.w) {} + explicit Plane(_In_reads_(4) const float *pArray) noexcept : XMFLOAT4(pArray) {} + Plane(FXMVECTOR V) noexcept { XMStoreFloat4(this, V); } + Plane(const XMFLOAT4& p) noexcept { this->x = p.x; this->y = p.y; this->z = p.z; this->w = p.w; } + explicit Plane(const XMVECTORF32& F) noexcept { this->x = F.f[0]; this->y = F.f[1]; this->z = F.f[2]; this->w = F.f[3]; } + + Plane(const Plane&) = default; + Plane& operator=(const Plane&) = default; + + Plane(Plane&&) = default; + Plane& operator=(Plane&&) = default; + + operator XMVECTOR() const noexcept { return XMLoadFloat4(this); } + + // Comparison operators + bool operator == (const Plane& p) const noexcept; + bool operator != (const Plane& p) const noexcept; + + // Assignment operators + Plane& operator= (const XMVECTORF32& F) noexcept { x = F.f[0]; y = F.f[1]; z = F.f[2]; w = F.f[3]; return *this; } + + // Properties + Vector3 Normal() const noexcept { return Vector3(x, y, z); } + void Normal(const Vector3& normal) noexcept { x = normal.x; y = normal.y; z = normal.z; } + + float D() const noexcept { return w; } + void D(float d) noexcept { w = d; } + + // Plane operations + void Normalize() noexcept; + void Normalize(Plane& result) const noexcept; + + float Dot(const Vector4& v) const noexcept; + float DotCoordinate(const Vector3& position) const noexcept; + float DotNormal(const Vector3& normal) const noexcept; + + // Static functions + static void Transform(const Plane& plane, const Matrix& M, Plane& result) noexcept; + static Plane Transform(const Plane& plane, const Matrix& M) noexcept; + + static void Transform(const Plane& plane, const Quaternion& rotation, Plane& result) noexcept; + static Plane Transform(const Plane& plane, const Quaternion& rotation) noexcept; + // Input quaternion must be the inverse transpose of the transformation + }; + + //------------------------------------------------------------------------------ + // Quaternion + struct Quaternion : public XMFLOAT4 + { + Quaternion() noexcept : XMFLOAT4(0, 0, 0, 1.f) {} + constexpr Quaternion(float ix, float iy, float iz, float iw) noexcept : XMFLOAT4(ix, iy, iz, iw) {} + Quaternion(const Vector3& v, float scalar) noexcept : XMFLOAT4(v.x, v.y, v.z, scalar) {} + explicit Quaternion(const Vector4& v) noexcept : XMFLOAT4(v.x, v.y, v.z, v.w) {} + explicit Quaternion(_In_reads_(4) const float *pArray) noexcept : XMFLOAT4(pArray) {} + Quaternion(FXMVECTOR V) noexcept { XMStoreFloat4(this, V); } + Quaternion(const XMFLOAT4& q) noexcept { this->x = q.x; this->y = q.y; this->z = q.z; this->w = q.w; } + explicit Quaternion(const XMVECTORF32& F) noexcept { this->x = F.f[0]; this->y = F.f[1]; this->z = F.f[2]; this->w = F.f[3]; } + + Quaternion(const Quaternion&) = default; + Quaternion& operator=(const Quaternion&) = default; + + Quaternion(Quaternion&&) = default; + Quaternion& operator=(Quaternion&&) = default; + + operator XMVECTOR() const noexcept { return XMLoadFloat4(this); } + + // Comparison operators + bool operator == (const Quaternion& q) const noexcept; + bool operator != (const Quaternion& q) const noexcept; + + // Assignment operators + Quaternion& operator= (const XMVECTORF32& F) noexcept { x = F.f[0]; y = F.f[1]; z = F.f[2]; w = F.f[3]; return *this; } + Quaternion& operator+= (const Quaternion& q) noexcept; + Quaternion& operator-= (const Quaternion& q) noexcept; + Quaternion& operator*= (const Quaternion& q) noexcept; + Quaternion& operator*= (float S) noexcept; + Quaternion& operator/= (const Quaternion& q) noexcept; + + // Unary operators + Quaternion operator+ () const noexcept { return *this; } + Quaternion operator- () const noexcept; + + // Quaternion operations + float Length() const noexcept; + float LengthSquared() const noexcept; + + void Normalize() noexcept; + void Normalize(Quaternion& result) const noexcept; + + void Conjugate() noexcept; + void Conjugate(Quaternion& result) const noexcept; + + void Inverse(Quaternion& result) const noexcept; + + float Dot(const Quaternion& Q) const noexcept; + + // Static functions + static Quaternion CreateFromAxisAngle(const Vector3& axis, float angle) noexcept; + static Quaternion CreateFromYawPitchRoll(float yaw, float pitch, float roll) noexcept; + static Quaternion CreateFromRotationMatrix(const Matrix& M) noexcept; + + static void Lerp(const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result) noexcept; + static Quaternion Lerp(const Quaternion& q1, const Quaternion& q2, float t) noexcept; + + static void Slerp(const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result) noexcept; + static Quaternion Slerp(const Quaternion& q1, const Quaternion& q2, float t) noexcept; + + static void Concatenate(const Quaternion& q1, const Quaternion& q2, Quaternion& result) noexcept; + static Quaternion Concatenate(const Quaternion& q1, const Quaternion& q2) noexcept; + + // Constants + static const Quaternion Identity; + }; + + // Binary operators + Quaternion operator+ (const Quaternion& Q1, const Quaternion& Q2) noexcept; + Quaternion operator- (const Quaternion& Q1, const Quaternion& Q2) noexcept; + Quaternion operator* (const Quaternion& Q1, const Quaternion& Q2) noexcept; + Quaternion operator* (const Quaternion& Q, float S) noexcept; + Quaternion operator/ (const Quaternion& Q1, const Quaternion& Q2) noexcept; + Quaternion operator* (float S, const Quaternion& Q) noexcept; + + //------------------------------------------------------------------------------ + // Color + struct Color : public XMFLOAT4 + { + Color() noexcept : XMFLOAT4(0, 0, 0, 1.f) {} + constexpr Color(float _r, float _g, float _b) noexcept : XMFLOAT4(_r, _g, _b, 1.f) {} + constexpr Color(float _r, float _g, float _b, float _a) noexcept : XMFLOAT4(_r, _g, _b, _a) {} + explicit Color(const Vector3& clr) noexcept : XMFLOAT4(clr.x, clr.y, clr.z, 1.f) {} + explicit Color(const Vector4& clr) noexcept : XMFLOAT4(clr.x, clr.y, clr.z, clr.w) {} + explicit Color(_In_reads_(4) const float *pArray) noexcept : XMFLOAT4(pArray) {} + Color(FXMVECTOR V) noexcept { XMStoreFloat4(this, V); } + Color(const XMFLOAT4& c) noexcept { this->x = c.x; this->y = c.y; this->z = c.z; this->w = c.w; } + explicit Color(const XMVECTORF32& F) noexcept { this->x = F.f[0]; this->y = F.f[1]; this->z = F.f[2]; this->w = F.f[3]; } + + explicit Color(const DirectX::PackedVector::XMCOLOR& Packed) noexcept; + // BGRA Direct3D 9 D3DCOLOR packed color + + explicit Color(const DirectX::PackedVector::XMUBYTEN4& Packed) noexcept; + // RGBA XNA Game Studio packed color + + Color(const Color&) = default; + Color& operator=(const Color&) = default; + + Color(Color&&) = default; + Color& operator=(Color&&) = default; + + operator XMVECTOR() const noexcept { return XMLoadFloat4(this); } + operator const float*() const noexcept { return reinterpret_cast(this); } + + // Comparison operators + bool operator == (const Color& c) const noexcept; + bool operator != (const Color& c) const noexcept; + + // Assignment operators + Color& operator= (const XMVECTORF32& F) noexcept { x = F.f[0]; y = F.f[1]; z = F.f[2]; w = F.f[3]; return *this; } + Color& operator= (const DirectX::PackedVector::XMCOLOR& Packed) noexcept; + Color& operator= (const DirectX::PackedVector::XMUBYTEN4& Packed) noexcept; + Color& operator+= (const Color& c) noexcept; + Color& operator-= (const Color& c) noexcept; + Color& operator*= (const Color& c) noexcept; + Color& operator*= (float S) noexcept; + Color& operator/= (const Color& c) noexcept; + + // Unary operators + Color operator+ () const noexcept { return *this; } + Color operator- () const noexcept; + + // Properties + float R() const noexcept { return x; } + void R(float r) noexcept { x = r; } + + float G() const noexcept { return y; } + void G(float g) noexcept { y = g; } + + float B() const noexcept { return z; } + void B(float b) noexcept { z = b; } + + float A() const noexcept { return w; } + void A(float a) noexcept { w = a; } + + // Color operations + DirectX::PackedVector::XMCOLOR BGRA() const noexcept; + DirectX::PackedVector::XMUBYTEN4 RGBA() const noexcept; + + Vector3 ToVector3() const noexcept; + Vector4 ToVector4() const noexcept; + + void Negate() noexcept; + void Negate(Color& result) const noexcept; + + void Saturate() noexcept; + void Saturate(Color& result) const noexcept; + + void Premultiply() noexcept; + void Premultiply(Color& result) const noexcept; + + void AdjustSaturation(float sat) noexcept; + void AdjustSaturation(float sat, Color& result) const noexcept; + + void AdjustContrast(float contrast) noexcept; + void AdjustContrast(float contrast, Color& result) const noexcept; + + // Static functions + static void Modulate(const Color& c1, const Color& c2, Color& result) noexcept; + static Color Modulate(const Color& c1, const Color& c2) noexcept; + + static void Lerp(const Color& c1, const Color& c2, float t, Color& result) noexcept; + static Color Lerp(const Color& c1, const Color& c2, float t) noexcept; + }; + + // Binary operators + Color operator+ (const Color& C1, const Color& C2) noexcept; + Color operator- (const Color& C1, const Color& C2) noexcept; + Color operator* (const Color& C1, const Color& C2) noexcept; + Color operator* (const Color& C, float S) noexcept; + Color operator/ (const Color& C1, const Color& C2) noexcept; + Color operator* (float S, const Color& C) noexcept; + + //------------------------------------------------------------------------------ + // Ray + class Ray + { + public: + Vector3 position; + Vector3 direction; + + Ray() noexcept : position(0, 0, 0), direction(0, 0, 1) {} + Ray(const Vector3& pos, const Vector3& dir) noexcept : position(pos), direction(dir) {} + + Ray(const Ray&) = default; + Ray& operator=(const Ray&) = default; + + Ray(Ray&&) = default; + Ray& operator=(Ray&&) = default; + + // Comparison operators + bool operator == (const Ray& r) const noexcept; + bool operator != (const Ray& r) const noexcept; + + // Ray operations + bool Intersects(const BoundingSphere& sphere, _Out_ float& Dist) const noexcept; + bool Intersects(const BoundingBox& box, _Out_ float& Dist) const noexcept; + bool Intersects(const Vector3& tri0, const Vector3& tri1, const Vector3& tri2, _Out_ float& Dist) const noexcept; + bool Intersects(const Plane& plane, _Out_ float& Dist) const noexcept; + }; + + //------------------------------------------------------------------------------ + // Viewport + class Viewport + { + public: + float x; + float y; + float width; + float height; + float minDepth; + float maxDepth; + + Viewport() noexcept : + x(0.f), y(0.f), width(0.f), height(0.f), minDepth(0.f), maxDepth(1.f) {} + constexpr Viewport(float ix, float iy, float iw, float ih, float iminz = 0.f, float imaxz = 1.f) noexcept : + x(ix), y(iy), width(iw), height(ih), minDepth(iminz), maxDepth(imaxz) {} + explicit Viewport(const RECT& rct) noexcept : + x(float(rct.left)), y(float(rct.top)), + width(float(rct.right - rct.left)), + height(float(rct.bottom - rct.top)), + minDepth(0.f), maxDepth(1.f) {} + + #if defined(__d3d11_h__) || defined(__d3d11_x_h__) + // Direct3D 11 interop + explicit Viewport(const D3D11_VIEWPORT& vp) noexcept : + x(vp.TopLeftX), y(vp.TopLeftY), + width(vp.Width), height(vp.Height), + minDepth(vp.MinDepth), maxDepth(vp.MaxDepth) {} + + operator D3D11_VIEWPORT() noexcept { return *reinterpret_cast(this); } + const D3D11_VIEWPORT* Get11() const noexcept { return reinterpret_cast(this); } + Viewport& operator= (const D3D11_VIEWPORT& vp) noexcept; + #endif + + #if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) + // Direct3D 12 interop + explicit Viewport(const D3D12_VIEWPORT& vp) noexcept : + x(vp.TopLeftX), y(vp.TopLeftY), + width(vp.Width), height(vp.Height), + minDepth(vp.MinDepth), maxDepth(vp.MaxDepth) {} + + operator D3D12_VIEWPORT() noexcept { return *reinterpret_cast(this); } + const D3D12_VIEWPORT* Get12() const noexcept { return reinterpret_cast(this); } + Viewport& operator= (const D3D12_VIEWPORT& vp) noexcept; + #endif + + Viewport(const Viewport&) = default; + Viewport& operator=(const Viewport&) = default; + + Viewport(Viewport&&) = default; + Viewport& operator=(Viewport&&) = default; + + // Comparison operators + bool operator == (const Viewport& vp) const noexcept; + bool operator != (const Viewport& vp) const noexcept; + + // Assignment operators + Viewport& operator= (const RECT& rct) noexcept; + + // Viewport operations + float AspectRatio() const noexcept; + + Vector3 Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world) const noexcept; + void Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result) const noexcept; + + Vector3 Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world) const noexcept; + void Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result) const noexcept; + + // Static methods + static RECT __cdecl ComputeDisplayArea(DXGI_SCALING scaling, UINT backBufferWidth, UINT backBufferHeight, int outputWidth, int outputHeight) noexcept; + static RECT __cdecl ComputeTitleSafeArea(UINT backBufferWidth, UINT backBufferHeight) noexcept; + }; + + #include "SimpleMath.inl" + + } // namespace SimpleMath + +} // namespace DirectX + +//------------------------------------------------------------------------------ +// Support for SimpleMath and Standard C++ Library containers +namespace std +{ + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Rectangle& r1, const DirectX::SimpleMath::Rectangle& r2) const noexcept + { + return ((r1.x < r2.x) + || ((r1.x == r2.x) && (r1.y < r2.y)) + || ((r1.x == r2.x) && (r1.y == r2.y) && (r1.width < r2.width)) + || ((r1.x == r2.x) && (r1.y == r2.y) && (r1.width == r2.width) && (r1.height < r2.height))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Vector2& V1, const DirectX::SimpleMath::Vector2& V2) const noexcept + { + return ((V1.x < V2.x) || ((V1.x == V2.x) && (V1.y < V2.y))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Vector3& V1, const DirectX::SimpleMath::Vector3& V2) const noexcept + { + return ((V1.x < V2.x) + || ((V1.x == V2.x) && (V1.y < V2.y)) + || ((V1.x == V2.x) && (V1.y == V2.y) && (V1.z < V2.z))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Vector4& V1, const DirectX::SimpleMath::Vector4& V2) const noexcept + { + return ((V1.x < V2.x) + || ((V1.x == V2.x) && (V1.y < V2.y)) + || ((V1.x == V2.x) && (V1.y == V2.y) && (V1.z < V2.z)) + || ((V1.x == V2.x) && (V1.y == V2.y) && (V1.z == V2.z) && (V1.w < V2.w))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Matrix& M1, const DirectX::SimpleMath::Matrix& M2) const noexcept + { + if (M1._11 != M2._11) return M1._11 < M2._11; + if (M1._12 != M2._12) return M1._12 < M2._12; + if (M1._13 != M2._13) return M1._13 < M2._13; + if (M1._14 != M2._14) return M1._14 < M2._14; + if (M1._21 != M2._21) return M1._21 < M2._21; + if (M1._22 != M2._22) return M1._22 < M2._22; + if (M1._23 != M2._23) return M1._23 < M2._23; + if (M1._24 != M2._24) return M1._24 < M2._24; + if (M1._31 != M2._31) return M1._31 < M2._31; + if (M1._32 != M2._32) return M1._32 < M2._32; + if (M1._33 != M2._33) return M1._33 < M2._33; + if (M1._34 != M2._34) return M1._34 < M2._34; + if (M1._41 != M2._41) return M1._41 < M2._41; + if (M1._42 != M2._42) return M1._42 < M2._42; + if (M1._43 != M2._43) return M1._43 < M2._43; + if (M1._44 != M2._44) return M1._44 < M2._44; + + return false; + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Plane& P1, const DirectX::SimpleMath::Plane& P2) const noexcept + { + return ((P1.x < P2.x) + || ((P1.x == P2.x) && (P1.y < P2.y)) + || ((P1.x == P2.x) && (P1.y == P2.y) && (P1.z < P2.z)) + || ((P1.x == P2.x) && (P1.y == P2.y) && (P1.z == P2.z) && (P1.w < P2.w))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Quaternion& Q1, const DirectX::SimpleMath::Quaternion& Q2) const noexcept + { + return ((Q1.x < Q2.x) + || ((Q1.x == Q2.x) && (Q1.y < Q2.y)) + || ((Q1.x == Q2.x) && (Q1.y == Q2.y) && (Q1.z < Q2.z)) + || ((Q1.x == Q2.x) && (Q1.y == Q2.y) && (Q1.z == Q2.z) && (Q1.w < Q2.w))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Color& C1, const DirectX::SimpleMath::Color& C2) const noexcept + { + return ((C1.x < C2.x) + || ((C1.x == C2.x) && (C1.y < C2.y)) + || ((C1.x == C2.x) && (C1.y == C2.y) && (C1.z < C2.z)) + || ((C1.x == C2.x) && (C1.y == C2.y) && (C1.z == C2.z) && (C1.w < C2.w))); + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Ray& R1, const DirectX::SimpleMath::Ray& R2) const noexcept + { + if (R1.position.x != R2.position.x) return R1.position.x < R2.position.x; + if (R1.position.y != R2.position.y) return R1.position.y < R2.position.y; + if (R1.position.z != R2.position.z) return R1.position.z < R2.position.z; + + if (R1.direction.x != R2.direction.x) return R1.direction.x < R2.direction.x; + if (R1.direction.y != R2.direction.y) return R1.direction.y < R2.direction.y; + if (R1.direction.z != R2.direction.z) return R1.direction.z < R2.direction.z; + + return false; + } + }; + + template<> struct less + { + bool operator()(const DirectX::SimpleMath::Viewport& vp1, const DirectX::SimpleMath::Viewport& vp2) const noexcept + { + if (vp1.x != vp2.x) return (vp1.x < vp2.x); + if (vp1.y != vp2.y) return (vp1.y < vp2.y); + + if (vp1.width != vp2.width) return (vp1.width < vp2.width); + if (vp1.height != vp2.height) return (vp1.height < vp2.height); + + if (vp1.minDepth != vp2.minDepth) return (vp1.minDepth < vp2.minDepth); + if (vp1.maxDepth != vp2.maxDepth) return (vp1.maxDepth < vp2.maxDepth); + + return false; + } + }; + +} // namespace std + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/Sdk/External/DirectXTK/Inc/SimpleMath.inl b/Sdk/External/DirectXTK/Inc/SimpleMath.inl new file mode 100644 index 0000000..576d425 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/SimpleMath.inl @@ -0,0 +1,3733 @@ +//------------------------------------------------------------------------------------- +// SimpleMath.inl -- Simplified C++ Math wrapper for DirectXMath +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#pragma once + +/**************************************************************************** +* +* Rectangle +* +****************************************************************************/ + +//------------------------------------------------------------------------------ +// Rectangle operations +//------------------------------------------------------------------------------ +inline Vector2 Rectangle::Location() const noexcept +{ + return Vector2(float(x), float(y)); +} + +inline Vector2 Rectangle::Center() const noexcept +{ + return Vector2(float(x) + (float(width) / 2.f), float(y) + (float(height) / 2.f)); +} + +inline bool Rectangle::Contains(const Vector2& point) const noexcept +{ + return (float(x) <= point.x) && (point.x < float(x + width)) && (float(y) <= point.y) && (point.y < float(y + height)); +} + +inline void Rectangle::Inflate(long horizAmount, long vertAmount) noexcept +{ + x -= horizAmount; + y -= vertAmount; + width += horizAmount; + height += vertAmount; +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline Rectangle Rectangle::Intersect(const Rectangle& ra, const Rectangle& rb) noexcept +{ + long righta = ra.x + ra.width; + long rightb = rb.x + rb.width; + + long bottoma = ra.y + ra.height; + long bottomb = rb.y + rb.height; + + long maxX = ra.x > rb.x ? ra.x : rb.x; + long maxY = ra.y > rb.y ? ra.y : rb.y; + + long minRight = righta < rightb ? righta : rightb; + long minBottom = bottoma < bottomb ? bottoma : bottomb; + + Rectangle result; + + if ((minRight > maxX) && (minBottom > maxY)) + { + result.x = maxX; + result.y = maxY; + result.width = minRight - maxX; + result.height = minBottom - maxY; + } + else + { + result.x = 0; + result.y = 0; + result.width = 0; + result.height = 0; + } + + return result; +} + +inline RECT Rectangle::Intersect(const RECT& rcta, const RECT& rctb) noexcept +{ + long maxX = rcta.left > rctb.left ? rcta.left : rctb.left; + long maxY = rcta.top > rctb.top ? rcta.top : rctb.top; + + long minRight = rcta.right < rctb.right ? rcta.right : rctb.right; + long minBottom = rcta.bottom < rctb.bottom ? rcta.bottom : rctb.bottom; + + RECT result; + + if ((minRight > maxX) && (minBottom > maxY)) + { + result.left = maxX; + result.top = maxY; + result.right = minRight; + result.bottom = minBottom; + } + else + { + result.left = 0; + result.top = 0; + result.right = 0; + result.bottom = 0; + } + + return result; +} + +inline Rectangle Rectangle::Union(const Rectangle& ra, const Rectangle& rb) noexcept +{ + long righta = ra.x + ra.width; + long rightb = rb.x + rb.width; + + long bottoma = ra.y + ra.height; + long bottomb = rb.y + rb.height; + + int minX = ra.x < rb.x ? ra.x : rb.x; + int minY = ra.y < rb.y ? ra.y : rb.y; + + int maxRight = righta > rightb ? righta : rightb; + int maxBottom = bottoma > bottomb ? bottoma : bottomb; + + Rectangle result; + result.x = minX; + result.y = minY; + result.width = maxRight - minX; + result.height = maxBottom - minY; + return result; +} + +inline RECT Rectangle::Union(const RECT& rcta, const RECT& rctb) noexcept +{ + RECT result; + result.left = rcta.left < rctb.left ? rcta.left : rctb.left; + result.top = rcta.top < rctb.top ? rcta.top : rctb.top; + result.right = rcta.right > rctb.right ? rcta.right : rctb.right; + result.bottom = rcta.bottom > rctb.bottom ? rcta.bottom : rctb.bottom; + return result; +} + + +/**************************************************************************** + * + * Vector2 + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Vector2::operator == (const Vector2& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + return XMVector2Equal(v1, v2); +} + +inline bool Vector2::operator != (const Vector2& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + return XMVector2NotEqual(v1, v2); +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Vector2& Vector2::operator+= (const Vector2& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + XMVECTOR X = XMVectorAdd(v1, v2); + XMStoreFloat2(this, X); + return *this; +} + +inline Vector2& Vector2::operator-= (const Vector2& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + XMVECTOR X = XMVectorSubtract(v1, v2); + XMStoreFloat2(this, X); + return *this; +} + +inline Vector2& Vector2::operator*= (const Vector2& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + XMVECTOR X = XMVectorMultiply(v1, v2); + XMStoreFloat2(this, X); + return *this; +} + +inline Vector2& Vector2::operator*= (float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR X = XMVectorScale(v1, S); + XMStoreFloat2(this, X); + return *this; +} + +inline Vector2& Vector2::operator/= (float S) noexcept +{ + using namespace DirectX; + assert(S != 0.0f); + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR X = XMVectorScale(v1, 1.f / S); + XMStoreFloat2(this, X); + return *this; +} + +//------------------------------------------------------------------------------ +// Binary operators +//------------------------------------------------------------------------------ + +inline Vector2 operator+ (const Vector2& V1, const Vector2& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V1); + XMVECTOR v2 = XMLoadFloat2(&V2); + XMVECTOR X = XMVectorAdd(v1, v2); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +inline Vector2 operator- (const Vector2& V1, const Vector2& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V1); + XMVECTOR v2 = XMLoadFloat2(&V2); + XMVECTOR X = XMVectorSubtract(v1, v2); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +inline Vector2 operator* (const Vector2& V1, const Vector2& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V1); + XMVECTOR v2 = XMLoadFloat2(&V2); + XMVECTOR X = XMVectorMultiply(v1, v2); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +inline Vector2 operator* (const Vector2& V, float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V); + XMVECTOR X = XMVectorScale(v1, S); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +inline Vector2 operator/ (const Vector2& V1, const Vector2& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V1); + XMVECTOR v2 = XMLoadFloat2(&V2); + XMVECTOR X = XMVectorDivide(v1, v2); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +inline Vector2 operator/ (const Vector2& V, float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V); + XMVECTOR X = XMVectorScale(v1, 1.f / S); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +inline Vector2 operator* (float S, const Vector2& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&V); + XMVECTOR X = XMVectorScale(v1, S); + Vector2 R; + XMStoreFloat2(&R, X); + return R; +} + +//------------------------------------------------------------------------------ +// Vector operations +//------------------------------------------------------------------------------ + +inline bool Vector2::InBounds(const Vector2& Bounds) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&Bounds); + return XMVector2InBounds(v1, v2); +} + +inline float Vector2::Length() const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR X = XMVector2Length(v1); + return XMVectorGetX(X); +} + +inline float Vector2::LengthSquared() const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR X = XMVector2LengthSq(v1); + return XMVectorGetX(X); +} + +inline float Vector2::Dot(const Vector2& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + XMVECTOR X = XMVector2Dot(v1, v2); + return XMVectorGetX(X); +} + +inline void Vector2::Cross(const Vector2& V, Vector2& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + XMVECTOR R = XMVector2Cross(v1, v2); + XMStoreFloat2(&result, R); +} + +inline Vector2 Vector2::Cross(const Vector2& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&V); + XMVECTOR R = XMVector2Cross(v1, v2); + + Vector2 result; + XMStoreFloat2(&result, R); + return result; +} + +inline void Vector2::Normalize() noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR X = XMVector2Normalize(v1); + XMStoreFloat2(this, X); +} + +inline void Vector2::Normalize(Vector2& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR X = XMVector2Normalize(v1); + XMStoreFloat2(&result, X); +} + +inline void Vector2::Clamp(const Vector2& vmin, const Vector2& vmax) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&vmin); + XMVECTOR v3 = XMLoadFloat2(&vmax); + XMVECTOR X = XMVectorClamp(v1, v2, v3); + XMStoreFloat2(this, X); +} + +inline void Vector2::Clamp(const Vector2& vmin, const Vector2& vmax, Vector2& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(this); + XMVECTOR v2 = XMLoadFloat2(&vmin); + XMVECTOR v3 = XMLoadFloat2(&vmax); + XMVECTOR X = XMVectorClamp(v1, v2, v3); + XMStoreFloat2(&result, X); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline float Vector2::Distance(const Vector2& v1, const Vector2& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR V = XMVectorSubtract(x2, x1); + XMVECTOR X = XMVector2Length(V); + return XMVectorGetX(X); +} + +inline float Vector2::DistanceSquared(const Vector2& v1, const Vector2& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR V = XMVectorSubtract(x2, x1); + XMVECTOR X = XMVector2LengthSq(V); + return XMVectorGetX(X); +} + +inline void Vector2::Min(const Vector2& v1, const Vector2& v2, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorMin(x1, x2); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Min(const Vector2& v1, const Vector2& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorMin(x1, x2); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Max(const Vector2& v1, const Vector2& v2, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorMax(x1, x2); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Max(const Vector2& v1, const Vector2& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorMax(x1, x2); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Lerp(const Vector2& v1, const Vector2& v2, float t, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Lerp(const Vector2& v1, const Vector2& v2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::SmoothStep(const Vector2& v1, const Vector2& v2, float t, Vector2& result) noexcept +{ + using namespace DirectX; + t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t); // Clamp value to 0 to 1 + t = t * t*(3.f - 2.f*t); + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::SmoothStep(const Vector2& v1, const Vector2& v2, float t) noexcept +{ + using namespace DirectX; + t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t); // Clamp value to 0 to 1 + t = t * t*(3.f - 2.f*t); + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Barycentric(const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR x3 = XMLoadFloat2(&v3); + XMVECTOR X = XMVectorBaryCentric(x1, x2, x3, f, g); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Barycentric(const Vector2& v1, const Vector2& v2, const Vector2& v3, float f, float g) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR x3 = XMLoadFloat2(&v3); + XMVECTOR X = XMVectorBaryCentric(x1, x2, x3, f, g); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::CatmullRom(const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR x3 = XMLoadFloat2(&v3); + XMVECTOR x4 = XMLoadFloat2(&v4); + XMVECTOR X = XMVectorCatmullRom(x1, x2, x3, x4, t); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::CatmullRom(const Vector2& v1, const Vector2& v2, const Vector2& v3, const Vector2& v4, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&v2); + XMVECTOR x3 = XMLoadFloat2(&v3); + XMVECTOR x4 = XMLoadFloat2(&v4); + XMVECTOR X = XMVectorCatmullRom(x1, x2, x3, x4, t); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Hermite(const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&t1); + XMVECTOR x3 = XMLoadFloat2(&v2); + XMVECTOR x4 = XMLoadFloat2(&t2); + XMVECTOR X = XMVectorHermite(x1, x2, x3, x4, t); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Hermite(const Vector2& v1, const Vector2& t1, const Vector2& v2, const Vector2& t2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat2(&v1); + XMVECTOR x2 = XMLoadFloat2(&t1); + XMVECTOR x3 = XMLoadFloat2(&v2); + XMVECTOR x4 = XMLoadFloat2(&t2); + XMVECTOR X = XMVectorHermite(x1, x2, x3, x4, t); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Reflect(const Vector2& ivec, const Vector2& nvec, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat2(&ivec); + XMVECTOR n = XMLoadFloat2(&nvec); + XMVECTOR X = XMVector2Reflect(i, n); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Reflect(const Vector2& ivec, const Vector2& nvec) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat2(&ivec); + XMVECTOR n = XMLoadFloat2(&nvec); + XMVECTOR X = XMVector2Reflect(i, n); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Refract(const Vector2& ivec, const Vector2& nvec, float refractionIndex, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat2(&ivec); + XMVECTOR n = XMLoadFloat2(&nvec); + XMVECTOR X = XMVector2Refract(i, n, refractionIndex); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Refract(const Vector2& ivec, const Vector2& nvec, float refractionIndex) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat2(&ivec); + XMVECTOR n = XMLoadFloat2(&nvec); + XMVECTOR X = XMVector2Refract(i, n, refractionIndex); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Transform(const Vector2& v, const Quaternion& quat, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Transform(const Vector2& v, const Quaternion& quat) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +inline void Vector2::Transform(const Vector2& v, const Matrix& m, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector2TransformCoord(v1, M); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::Transform(const Vector2& v, const Matrix& m) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector2TransformCoord(v1, M); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +_Use_decl_annotations_ +inline void Vector2::Transform(const Vector2* varray, size_t count, const Matrix& m, Vector2* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector2TransformCoordStream(resultArray, sizeof(XMFLOAT2), varray, sizeof(XMFLOAT2), count, M); +} + +inline void Vector2::Transform(const Vector2& v, const Matrix& m, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector2Transform(v1, M); + XMStoreFloat4(&result, X); +} + +_Use_decl_annotations_ +inline void Vector2::Transform(const Vector2* varray, size_t count, const Matrix& m, Vector4* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector2TransformStream(resultArray, sizeof(XMFLOAT4), varray, sizeof(XMFLOAT2), count, M); +} + +inline void Vector2::TransformNormal(const Vector2& v, const Matrix& m, Vector2& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector2TransformNormal(v1, M); + XMStoreFloat2(&result, X); +} + +inline Vector2 Vector2::TransformNormal(const Vector2& v, const Matrix& m) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector2TransformNormal(v1, M); + + Vector2 result; + XMStoreFloat2(&result, X); + return result; +} + +_Use_decl_annotations_ +inline void Vector2::TransformNormal(const Vector2* varray, size_t count, const Matrix& m, Vector2* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector2TransformNormalStream(resultArray, sizeof(XMFLOAT2), varray, sizeof(XMFLOAT2), count, M); +} + + +/**************************************************************************** + * + * Vector3 + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Vector3::operator == (const Vector3& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + return XMVector3Equal(v1, v2); +} + +inline bool Vector3::operator != (const Vector3& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + return XMVector3NotEqual(v1, v2); +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Vector3& Vector3::operator+= (const Vector3& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + XMVECTOR X = XMVectorAdd(v1, v2); + XMStoreFloat3(this, X); + return *this; +} + +inline Vector3& Vector3::operator-= (const Vector3& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + XMVECTOR X = XMVectorSubtract(v1, v2); + XMStoreFloat3(this, X); + return *this; +} + +inline Vector3& Vector3::operator*= (const Vector3& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + XMVECTOR X = XMVectorMultiply(v1, v2); + XMStoreFloat3(this, X); + return *this; +} + +inline Vector3& Vector3::operator*= (float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVectorScale(v1, S); + XMStoreFloat3(this, X); + return *this; +} + +inline Vector3& Vector3::operator/= (float S) noexcept +{ + using namespace DirectX; + assert(S != 0.0f); + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVectorScale(v1, 1.f / S); + XMStoreFloat3(this, X); + return *this; +} + +//------------------------------------------------------------------------------ +// Urnary operators +//------------------------------------------------------------------------------ + +inline Vector3 Vector3::operator- () const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVectorNegate(v1); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +//------------------------------------------------------------------------------ +// Binary operators +//------------------------------------------------------------------------------ + +inline Vector3 operator+ (const Vector3& V1, const Vector3& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V1); + XMVECTOR v2 = XMLoadFloat3(&V2); + XMVECTOR X = XMVectorAdd(v1, v2); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +inline Vector3 operator- (const Vector3& V1, const Vector3& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V1); + XMVECTOR v2 = XMLoadFloat3(&V2); + XMVECTOR X = XMVectorSubtract(v1, v2); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +inline Vector3 operator* (const Vector3& V1, const Vector3& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V1); + XMVECTOR v2 = XMLoadFloat3(&V2); + XMVECTOR X = XMVectorMultiply(v1, v2); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +inline Vector3 operator* (const Vector3& V, float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V); + XMVECTOR X = XMVectorScale(v1, S); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +inline Vector3 operator/ (const Vector3& V1, const Vector3& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V1); + XMVECTOR v2 = XMLoadFloat3(&V2); + XMVECTOR X = XMVectorDivide(v1, v2); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +inline Vector3 operator/ (const Vector3& V, float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V); + XMVECTOR X = XMVectorScale(v1, 1.f / S); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +inline Vector3 operator* (float S, const Vector3& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&V); + XMVECTOR X = XMVectorScale(v1, S); + Vector3 R; + XMStoreFloat3(&R, X); + return R; +} + +//------------------------------------------------------------------------------ +// Vector operations +//------------------------------------------------------------------------------ + +inline bool Vector3::InBounds(const Vector3& Bounds) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&Bounds); + return XMVector3InBounds(v1, v2); +} + +inline float Vector3::Length() const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVector3Length(v1); + return XMVectorGetX(X); +} + +inline float Vector3::LengthSquared() const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVector3LengthSq(v1); + return XMVectorGetX(X); +} + +inline float Vector3::Dot(const Vector3& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + XMVECTOR X = XMVector3Dot(v1, v2); + return XMVectorGetX(X); +} + +inline void Vector3::Cross(const Vector3& V, Vector3& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + XMVECTOR R = XMVector3Cross(v1, v2); + XMStoreFloat3(&result, R); +} + +inline Vector3 Vector3::Cross(const Vector3& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&V); + XMVECTOR R = XMVector3Cross(v1, v2); + + Vector3 result; + XMStoreFloat3(&result, R); + return result; +} + +inline void Vector3::Normalize() noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVector3Normalize(v1); + XMStoreFloat3(this, X); +} + +inline void Vector3::Normalize(Vector3& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR X = XMVector3Normalize(v1); + XMStoreFloat3(&result, X); +} + +inline void Vector3::Clamp(const Vector3& vmin, const Vector3& vmax) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&vmin); + XMVECTOR v3 = XMLoadFloat3(&vmax); + XMVECTOR X = XMVectorClamp(v1, v2, v3); + XMStoreFloat3(this, X); +} + +inline void Vector3::Clamp(const Vector3& vmin, const Vector3& vmax, Vector3& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(this); + XMVECTOR v2 = XMLoadFloat3(&vmin); + XMVECTOR v3 = XMLoadFloat3(&vmax); + XMVECTOR X = XMVectorClamp(v1, v2, v3); + XMStoreFloat3(&result, X); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline float Vector3::Distance(const Vector3& v1, const Vector3& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR V = XMVectorSubtract(x2, x1); + XMVECTOR X = XMVector3Length(V); + return XMVectorGetX(X); +} + +inline float Vector3::DistanceSquared(const Vector3& v1, const Vector3& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR V = XMVectorSubtract(x2, x1); + XMVECTOR X = XMVector3LengthSq(V); + return XMVectorGetX(X); +} + +inline void Vector3::Min(const Vector3& v1, const Vector3& v2, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorMin(x1, x2); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Min(const Vector3& v1, const Vector3& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorMin(x1, x2); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Max(const Vector3& v1, const Vector3& v2, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorMax(x1, x2); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Max(const Vector3& v1, const Vector3& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorMax(x1, x2); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Lerp(const Vector3& v1, const Vector3& v2, float t, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Lerp(const Vector3& v1, const Vector3& v2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::SmoothStep(const Vector3& v1, const Vector3& v2, float t, Vector3& result) noexcept +{ + using namespace DirectX; + t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t); // Clamp value to 0 to 1 + t = t * t*(3.f - 2.f*t); + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::SmoothStep(const Vector3& v1, const Vector3& v2, float t) noexcept +{ + using namespace DirectX; + t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t); // Clamp value to 0 to 1 + t = t * t*(3.f - 2.f*t); + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Barycentric(const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR x3 = XMLoadFloat3(&v3); + XMVECTOR X = XMVectorBaryCentric(x1, x2, x3, f, g); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Barycentric(const Vector3& v1, const Vector3& v2, const Vector3& v3, float f, float g) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR x3 = XMLoadFloat3(&v3); + XMVECTOR X = XMVectorBaryCentric(x1, x2, x3, f, g); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::CatmullRom(const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR x3 = XMLoadFloat3(&v3); + XMVECTOR x4 = XMLoadFloat3(&v4); + XMVECTOR X = XMVectorCatmullRom(x1, x2, x3, x4, t); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::CatmullRom(const Vector3& v1, const Vector3& v2, const Vector3& v3, const Vector3& v4, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&v2); + XMVECTOR x3 = XMLoadFloat3(&v3); + XMVECTOR x4 = XMLoadFloat3(&v4); + XMVECTOR X = XMVectorCatmullRom(x1, x2, x3, x4, t); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Hermite(const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&t1); + XMVECTOR x3 = XMLoadFloat3(&v2); + XMVECTOR x4 = XMLoadFloat3(&t2); + XMVECTOR X = XMVectorHermite(x1, x2, x3, x4, t); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Hermite(const Vector3& v1, const Vector3& t1, const Vector3& v2, const Vector3& t2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat3(&v1); + XMVECTOR x2 = XMLoadFloat3(&t1); + XMVECTOR x3 = XMLoadFloat3(&v2); + XMVECTOR x4 = XMLoadFloat3(&t2); + XMVECTOR X = XMVectorHermite(x1, x2, x3, x4, t); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Reflect(const Vector3& ivec, const Vector3& nvec, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat3(&ivec); + XMVECTOR n = XMLoadFloat3(&nvec); + XMVECTOR X = XMVector3Reflect(i, n); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Reflect(const Vector3& ivec, const Vector3& nvec) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat3(&ivec); + XMVECTOR n = XMLoadFloat3(&nvec); + XMVECTOR X = XMVector3Reflect(i, n); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Refract(const Vector3& ivec, const Vector3& nvec, float refractionIndex, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat3(&ivec); + XMVECTOR n = XMLoadFloat3(&nvec); + XMVECTOR X = XMVector3Refract(i, n, refractionIndex); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Refract(const Vector3& ivec, const Vector3& nvec, float refractionIndex) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat3(&ivec); + XMVECTOR n = XMLoadFloat3(&nvec); + XMVECTOR X = XMVector3Refract(i, n, refractionIndex); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Transform(const Vector3& v, const Quaternion& quat, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Transform(const Vector3& v, const Quaternion& quat) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +inline void Vector3::Transform(const Vector3& v, const Matrix& m, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector3TransformCoord(v1, M); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::Transform(const Vector3& v, const Matrix& m) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector3TransformCoord(v1, M); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +_Use_decl_annotations_ +inline void Vector3::Transform(const Vector3* varray, size_t count, const Matrix& m, Vector3* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector3TransformCoordStream(resultArray, sizeof(XMFLOAT3), varray, sizeof(XMFLOAT3), count, M); +} + +inline void Vector3::Transform(const Vector3& v, const Matrix& m, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector3Transform(v1, M); + XMStoreFloat4(&result, X); +} + +_Use_decl_annotations_ +inline void Vector3::Transform(const Vector3* varray, size_t count, const Matrix& m, Vector4* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector3TransformStream(resultArray, sizeof(XMFLOAT4), varray, sizeof(XMFLOAT3), count, M); +} + +inline void Vector3::TransformNormal(const Vector3& v, const Matrix& m, Vector3& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector3TransformNormal(v1, M); + XMStoreFloat3(&result, X); +} + +inline Vector3 Vector3::TransformNormal(const Vector3& v, const Matrix& m) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector3TransformNormal(v1, M); + + Vector3 result; + XMStoreFloat3(&result, X); + return result; +} + +_Use_decl_annotations_ +inline void Vector3::TransformNormal(const Vector3* varray, size_t count, const Matrix& m, Vector3* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector3TransformNormalStream(resultArray, sizeof(XMFLOAT3), varray, sizeof(XMFLOAT3), count, M); +} + + +/**************************************************************************** + * + * Vector4 + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Vector4::operator == (const Vector4& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&V); + return XMVector4Equal(v1, v2); +} + +inline bool Vector4::operator != (const Vector4& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&V); + return XMVector4NotEqual(v1, v2); +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Vector4& Vector4::operator+= (const Vector4& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&V); + XMVECTOR X = XMVectorAdd(v1, v2); + XMStoreFloat4(this, X); + return *this; +} + +inline Vector4& Vector4::operator-= (const Vector4& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&V); + XMVECTOR X = XMVectorSubtract(v1, v2); + XMStoreFloat4(this, X); + return *this; +} + +inline Vector4& Vector4::operator*= (const Vector4& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&V); + XMVECTOR X = XMVectorMultiply(v1, v2); + XMStoreFloat4(this, X); + return *this; +} + +inline Vector4& Vector4::operator*= (float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVectorScale(v1, S); + XMStoreFloat4(this, X); + return *this; +} + +inline Vector4& Vector4::operator/= (float S) noexcept +{ + using namespace DirectX; + assert(S != 0.0f); + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVectorScale(v1, 1.f / S); + XMStoreFloat4(this, X); + return *this; +} + +//------------------------------------------------------------------------------ +// Urnary operators +//------------------------------------------------------------------------------ + +inline Vector4 Vector4::operator- () const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVectorNegate(v1); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +//------------------------------------------------------------------------------ +// Binary operators +//------------------------------------------------------------------------------ + +inline Vector4 operator+ (const Vector4& V1, const Vector4& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V1); + XMVECTOR v2 = XMLoadFloat4(&V2); + XMVECTOR X = XMVectorAdd(v1, v2); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +inline Vector4 operator- (const Vector4& V1, const Vector4& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V1); + XMVECTOR v2 = XMLoadFloat4(&V2); + XMVECTOR X = XMVectorSubtract(v1, v2); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +inline Vector4 operator* (const Vector4& V1, const Vector4& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V1); + XMVECTOR v2 = XMLoadFloat4(&V2); + XMVECTOR X = XMVectorMultiply(v1, v2); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +inline Vector4 operator* (const Vector4& V, float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V); + XMVECTOR X = XMVectorScale(v1, S); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +inline Vector4 operator/ (const Vector4& V1, const Vector4& V2) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V1); + XMVECTOR v2 = XMLoadFloat4(&V2); + XMVECTOR X = XMVectorDivide(v1, v2); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +inline Vector4 operator/ (const Vector4& V, float S) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V); + XMVECTOR X = XMVectorScale(v1, 1.f / S); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +inline Vector4 operator* (float S, const Vector4& V) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&V); + XMVECTOR X = XMVectorScale(v1, S); + Vector4 R; + XMStoreFloat4(&R, X); + return R; +} + +//------------------------------------------------------------------------------ +// Vector operations +//------------------------------------------------------------------------------ + +inline bool Vector4::InBounds(const Vector4& Bounds) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&Bounds); + return XMVector4InBounds(v1, v2); +} + +inline float Vector4::Length() const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVector4Length(v1); + return XMVectorGetX(X); +} + +inline float Vector4::LengthSquared() const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVector4LengthSq(v1); + return XMVectorGetX(X); +} + +inline float Vector4::Dot(const Vector4& V) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&V); + XMVECTOR X = XMVector4Dot(v1, v2); + return XMVectorGetX(X); +} + +inline void Vector4::Cross(const Vector4& v1, const Vector4& v2, Vector4& result) const noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(this); + XMVECTOR x2 = XMLoadFloat4(&v1); + XMVECTOR x3 = XMLoadFloat4(&v2); + XMVECTOR R = XMVector4Cross(x1, x2, x3); + XMStoreFloat4(&result, R); +} + +inline Vector4 Vector4::Cross(const Vector4& v1, const Vector4& v2) const noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(this); + XMVECTOR x2 = XMLoadFloat4(&v1); + XMVECTOR x3 = XMLoadFloat4(&v2); + XMVECTOR R = XMVector4Cross(x1, x2, x3); + + Vector4 result; + XMStoreFloat4(&result, R); + return result; +} + +inline void Vector4::Normalize() noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVector4Normalize(v1); + XMStoreFloat4(this, X); +} + +inline void Vector4::Normalize(Vector4& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR X = XMVector4Normalize(v1); + XMStoreFloat4(&result, X); +} + +inline void Vector4::Clamp(const Vector4& vmin, const Vector4& vmax) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&vmin); + XMVECTOR v3 = XMLoadFloat4(&vmax); + XMVECTOR X = XMVectorClamp(v1, v2, v3); + XMStoreFloat4(this, X); +} + +inline void Vector4::Clamp(const Vector4& vmin, const Vector4& vmax, Vector4& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(this); + XMVECTOR v2 = XMLoadFloat4(&vmin); + XMVECTOR v3 = XMLoadFloat4(&vmax); + XMVECTOR X = XMVectorClamp(v1, v2, v3); + XMStoreFloat4(&result, X); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline float Vector4::Distance(const Vector4& v1, const Vector4& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR V = XMVectorSubtract(x2, x1); + XMVECTOR X = XMVector4Length(V); + return XMVectorGetX(X); +} + +inline float Vector4::DistanceSquared(const Vector4& v1, const Vector4& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR V = XMVectorSubtract(x2, x1); + XMVECTOR X = XMVector4LengthSq(V); + return XMVectorGetX(X); +} + +inline void Vector4::Min(const Vector4& v1, const Vector4& v2, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorMin(x1, x2); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Min(const Vector4& v1, const Vector4& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorMin(x1, x2); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Max(const Vector4& v1, const Vector4& v2, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorMax(x1, x2); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Max(const Vector4& v1, const Vector4& v2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorMax(x1, x2); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Lerp(const Vector4& v1, const Vector4& v2, float t, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Lerp(const Vector4& v1, const Vector4& v2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::SmoothStep(const Vector4& v1, const Vector4& v2, float t, Vector4& result) noexcept +{ + using namespace DirectX; + t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t); // Clamp value to 0 to 1 + t = t * t*(3.f - 2.f*t); + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::SmoothStep(const Vector4& v1, const Vector4& v2, float t) noexcept +{ + using namespace DirectX; + t = (t > 1.0f) ? 1.0f : ((t < 0.0f) ? 0.0f : t); // Clamp value to 0 to 1 + t = t * t*(3.f - 2.f*t); + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR X = XMVectorLerp(x1, x2, t); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Barycentric(const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR x3 = XMLoadFloat4(&v3); + XMVECTOR X = XMVectorBaryCentric(x1, x2, x3, f, g); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Barycentric(const Vector4& v1, const Vector4& v2, const Vector4& v3, float f, float g) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR x3 = XMLoadFloat4(&v3); + XMVECTOR X = XMVectorBaryCentric(x1, x2, x3, f, g); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::CatmullRom(const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR x3 = XMLoadFloat4(&v3); + XMVECTOR x4 = XMLoadFloat4(&v4); + XMVECTOR X = XMVectorCatmullRom(x1, x2, x3, x4, t); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::CatmullRom(const Vector4& v1, const Vector4& v2, const Vector4& v3, const Vector4& v4, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&v2); + XMVECTOR x3 = XMLoadFloat4(&v3); + XMVECTOR x4 = XMLoadFloat4(&v4); + XMVECTOR X = XMVectorCatmullRom(x1, x2, x3, x4, t); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Hermite(const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&t1); + XMVECTOR x3 = XMLoadFloat4(&v2); + XMVECTOR x4 = XMLoadFloat4(&t2); + XMVECTOR X = XMVectorHermite(x1, x2, x3, x4, t); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Hermite(const Vector4& v1, const Vector4& t1, const Vector4& v2, const Vector4& t2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(&v1); + XMVECTOR x2 = XMLoadFloat4(&t1); + XMVECTOR x3 = XMLoadFloat4(&v2); + XMVECTOR x4 = XMLoadFloat4(&t2); + XMVECTOR X = XMVectorHermite(x1, x2, x3, x4, t); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Reflect(const Vector4& ivec, const Vector4& nvec, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat4(&ivec); + XMVECTOR n = XMLoadFloat4(&nvec); + XMVECTOR X = XMVector4Reflect(i, n); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Reflect(const Vector4& ivec, const Vector4& nvec) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat4(&ivec); + XMVECTOR n = XMLoadFloat4(&nvec); + XMVECTOR X = XMVector4Reflect(i, n); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Refract(const Vector4& ivec, const Vector4& nvec, float refractionIndex, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat4(&ivec); + XMVECTOR n = XMLoadFloat4(&nvec); + XMVECTOR X = XMVector4Refract(i, n, refractionIndex); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Refract(const Vector4& ivec, const Vector4& nvec, float refractionIndex) noexcept +{ + using namespace DirectX; + XMVECTOR i = XMLoadFloat4(&ivec); + XMVECTOR n = XMLoadFloat4(&nvec); + XMVECTOR X = XMVector4Refract(i, n, refractionIndex); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Transform(const Vector2& v, const Quaternion& quat, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + X = XMVectorSelect(g_XMIdentityR3, X, g_XMSelect1110); // result.w = 1.f + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Transform(const Vector2& v, const Quaternion& quat) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat2(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + X = XMVectorSelect(g_XMIdentityR3, X, g_XMSelect1110); // result.w = 1.f + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Transform(const Vector3& v, const Quaternion& quat, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + X = XMVectorSelect(g_XMIdentityR3, X, g_XMSelect1110); // result.w = 1.f + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Transform(const Vector3& v, const Quaternion& quat) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat3(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + X = XMVectorSelect(g_XMIdentityR3, X, g_XMSelect1110); // result.w = 1.f + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Transform(const Vector4& v, const Quaternion& quat, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + X = XMVectorSelect(v1, X, g_XMSelect1110); // result.w = v.w + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Transform(const Vector4& v, const Quaternion& quat) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&v); + XMVECTOR q = XMLoadFloat4(&quat); + XMVECTOR X = XMVector3Rotate(v1, q); + X = XMVectorSelect(v1, X, g_XMSelect1110); // result.w = v.w + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +inline void Vector4::Transform(const Vector4& v, const Matrix& m, Vector4& result) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector4Transform(v1, M); + XMStoreFloat4(&result, X); +} + +inline Vector4 Vector4::Transform(const Vector4& v, const Matrix& m) noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(&v); + XMMATRIX M = XMLoadFloat4x4(&m); + XMVECTOR X = XMVector4Transform(v1, M); + + Vector4 result; + XMStoreFloat4(&result, X); + return result; +} + +_Use_decl_annotations_ +inline void Vector4::Transform(const Vector4* varray, size_t count, const Matrix& m, Vector4* resultArray) noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(&m); + XMVector4TransformStream(resultArray, sizeof(XMFLOAT4), varray, sizeof(XMFLOAT4), count, M); +} + + +/**************************************************************************** + * + * Matrix + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Matrix::operator == (const Matrix& M) const noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + return (XMVector4Equal(x1, y1) + && XMVector4Equal(x2, y2) + && XMVector4Equal(x3, y3) + && XMVector4Equal(x4, y4)) != 0; +} + +inline bool Matrix::operator != (const Matrix& M) const noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + return (XMVector4NotEqual(x1, y1) + || XMVector4NotEqual(x2, y2) + || XMVector4NotEqual(x3, y3) + || XMVector4NotEqual(x4, y4)) != 0; +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Matrix::Matrix(const XMFLOAT3X3& M) noexcept +{ + _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f; + _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f; + _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f; + _41 = 0.f; _42 = 0.f; _43 = 0.f; _44 = 1.f; +} + +inline Matrix::Matrix(const XMFLOAT4X3& M) noexcept +{ + _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f; + _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f; + _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f; + _41 = M._41; _42 = M._42; _43 = M._43; _44 = 1.f; +} + +inline Matrix& Matrix::operator= (const XMFLOAT3X3& M) noexcept +{ + _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f; + _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f; + _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f; + _41 = 0.f; _42 = 0.f; _43 = 0.f; _44 = 1.f; + return *this; +} + +inline Matrix& Matrix::operator= (const XMFLOAT4X3& M) noexcept +{ + _11 = M._11; _12 = M._12; _13 = M._13; _14 = 0.f; + _21 = M._21; _22 = M._22; _23 = M._23; _24 = 0.f; + _31 = M._31; _32 = M._32; _33 = M._33; _34 = 0.f; + _41 = M._41; _42 = M._42; _43 = M._43; _44 = 1.f; + return *this; +} + +inline Matrix& Matrix::operator+= (const Matrix& M) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + x1 = XMVectorAdd(x1, y1); + x2 = XMVectorAdd(x2, y2); + x3 = XMVectorAdd(x3, y3); + x4 = XMVectorAdd(x4, y4); + + XMStoreFloat4(reinterpret_cast(&_11), x1); + XMStoreFloat4(reinterpret_cast(&_21), x2); + XMStoreFloat4(reinterpret_cast(&_31), x3); + XMStoreFloat4(reinterpret_cast(&_41), x4); + return *this; +} + +inline Matrix& Matrix::operator-= (const Matrix& M) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + x1 = XMVectorSubtract(x1, y1); + x2 = XMVectorSubtract(x2, y2); + x3 = XMVectorSubtract(x3, y3); + x4 = XMVectorSubtract(x4, y4); + + XMStoreFloat4(reinterpret_cast(&_11), x1); + XMStoreFloat4(reinterpret_cast(&_21), x2); + XMStoreFloat4(reinterpret_cast(&_31), x3); + XMStoreFloat4(reinterpret_cast(&_41), x4); + return *this; +} + +inline Matrix& Matrix::operator*= (const Matrix& M) noexcept +{ + using namespace DirectX; + XMMATRIX M1 = XMLoadFloat4x4(this); + XMMATRIX M2 = XMLoadFloat4x4(&M); + XMMATRIX X = XMMatrixMultiply(M1, M2); + XMStoreFloat4x4(this, X); + return *this; +} + +inline Matrix& Matrix::operator*= (float S) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + x1 = XMVectorScale(x1, S); + x2 = XMVectorScale(x2, S); + x3 = XMVectorScale(x3, S); + x4 = XMVectorScale(x4, S); + + XMStoreFloat4(reinterpret_cast(&_11), x1); + XMStoreFloat4(reinterpret_cast(&_21), x2); + XMStoreFloat4(reinterpret_cast(&_31), x3); + XMStoreFloat4(reinterpret_cast(&_41), x4); + return *this; +} + +inline Matrix& Matrix::operator/= (float S) noexcept +{ + using namespace DirectX; + assert(S != 0.f); + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + float rs = 1.f / S; + + x1 = XMVectorScale(x1, rs); + x2 = XMVectorScale(x2, rs); + x3 = XMVectorScale(x3, rs); + x4 = XMVectorScale(x4, rs); + + XMStoreFloat4(reinterpret_cast(&_11), x1); + XMStoreFloat4(reinterpret_cast(&_21), x2); + XMStoreFloat4(reinterpret_cast(&_31), x3); + XMStoreFloat4(reinterpret_cast(&_41), x4); + return *this; +} + +inline Matrix& Matrix::operator/= (const Matrix& M) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&_41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + x1 = XMVectorDivide(x1, y1); + x2 = XMVectorDivide(x2, y2); + x3 = XMVectorDivide(x3, y3); + x4 = XMVectorDivide(x4, y4); + + XMStoreFloat4(reinterpret_cast(&_11), x1); + XMStoreFloat4(reinterpret_cast(&_21), x2); + XMStoreFloat4(reinterpret_cast(&_31), x3); + XMStoreFloat4(reinterpret_cast(&_41), x4); + return *this; +} + +//------------------------------------------------------------------------------ +// Urnary operators +//------------------------------------------------------------------------------ + +inline Matrix Matrix::operator- () const noexcept +{ + using namespace DirectX; + XMVECTOR v1 = XMLoadFloat4(reinterpret_cast(&_11)); + XMVECTOR v2 = XMLoadFloat4(reinterpret_cast(&_21)); + XMVECTOR v3 = XMLoadFloat4(reinterpret_cast(&_31)); + XMVECTOR v4 = XMLoadFloat4(reinterpret_cast(&_41)); + + v1 = XMVectorNegate(v1); + v2 = XMVectorNegate(v2); + v3 = XMVectorNegate(v3); + v4 = XMVectorNegate(v4); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), v1); + XMStoreFloat4(reinterpret_cast(&R._21), v2); + XMStoreFloat4(reinterpret_cast(&R._31), v3); + XMStoreFloat4(reinterpret_cast(&R._41), v4); + return R; +} + +//------------------------------------------------------------------------------ +// Binary operators +//------------------------------------------------------------------------------ + +inline Matrix operator+ (const Matrix& M1, const Matrix& M2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M1._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M1._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M1._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M1._41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M2._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M2._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M2._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M2._41)); + + x1 = XMVectorAdd(x1, y1); + x2 = XMVectorAdd(x2, y2); + x3 = XMVectorAdd(x3, y3); + x4 = XMVectorAdd(x4, y4); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), x1); + XMStoreFloat4(reinterpret_cast(&R._21), x2); + XMStoreFloat4(reinterpret_cast(&R._31), x3); + XMStoreFloat4(reinterpret_cast(&R._41), x4); + return R; +} + +inline Matrix operator- (const Matrix& M1, const Matrix& M2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M1._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M1._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M1._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M1._41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M2._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M2._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M2._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M2._41)); + + x1 = XMVectorSubtract(x1, y1); + x2 = XMVectorSubtract(x2, y2); + x3 = XMVectorSubtract(x3, y3); + x4 = XMVectorSubtract(x4, y4); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), x1); + XMStoreFloat4(reinterpret_cast(&R._21), x2); + XMStoreFloat4(reinterpret_cast(&R._31), x3); + XMStoreFloat4(reinterpret_cast(&R._41), x4); + return R; +} + +inline Matrix operator* (const Matrix& M1, const Matrix& M2) noexcept +{ + using namespace DirectX; + XMMATRIX m1 = XMLoadFloat4x4(&M1); + XMMATRIX m2 = XMLoadFloat4x4(&M2); + XMMATRIX X = XMMatrixMultiply(m1, m2); + + Matrix R; + XMStoreFloat4x4(&R, X); + return R; +} + +inline Matrix operator* (const Matrix& M, float S) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + x1 = XMVectorScale(x1, S); + x2 = XMVectorScale(x2, S); + x3 = XMVectorScale(x3, S); + x4 = XMVectorScale(x4, S); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), x1); + XMStoreFloat4(reinterpret_cast(&R._21), x2); + XMStoreFloat4(reinterpret_cast(&R._31), x3); + XMStoreFloat4(reinterpret_cast(&R._41), x4); + return R; +} + +inline Matrix operator/ (const Matrix& M, float S) noexcept +{ + using namespace DirectX; + assert(S != 0.f); + + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + float rs = 1.f / S; + + x1 = XMVectorScale(x1, rs); + x2 = XMVectorScale(x2, rs); + x3 = XMVectorScale(x3, rs); + x4 = XMVectorScale(x4, rs); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), x1); + XMStoreFloat4(reinterpret_cast(&R._21), x2); + XMStoreFloat4(reinterpret_cast(&R._31), x3); + XMStoreFloat4(reinterpret_cast(&R._41), x4); + return R; +} + +inline Matrix operator/ (const Matrix& M1, const Matrix& M2) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M1._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M1._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M1._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M1._41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M2._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M2._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M2._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M2._41)); + + x1 = XMVectorDivide(x1, y1); + x2 = XMVectorDivide(x2, y2); + x3 = XMVectorDivide(x3, y3); + x4 = XMVectorDivide(x4, y4); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), x1); + XMStoreFloat4(reinterpret_cast(&R._21), x2); + XMStoreFloat4(reinterpret_cast(&R._31), x3); + XMStoreFloat4(reinterpret_cast(&R._41), x4); + return R; +} + +inline Matrix operator* (float S, const Matrix& M) noexcept +{ + using namespace DirectX; + + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M._41)); + + x1 = XMVectorScale(x1, S); + x2 = XMVectorScale(x2, S); + x3 = XMVectorScale(x3, S); + x4 = XMVectorScale(x4, S); + + Matrix R; + XMStoreFloat4(reinterpret_cast(&R._11), x1); + XMStoreFloat4(reinterpret_cast(&R._21), x2); + XMStoreFloat4(reinterpret_cast(&R._31), x3); + XMStoreFloat4(reinterpret_cast(&R._41), x4); + return R; +} + +//------------------------------------------------------------------------------ +// Matrix operations +//------------------------------------------------------------------------------ + +inline bool Matrix::Decompose(Vector3& scale, Quaternion& rotation, Vector3& translation) noexcept +{ + using namespace DirectX; + + XMVECTOR s, r, t; + + if (!XMMatrixDecompose(&s, &r, &t, *this)) + return false; + + XMStoreFloat3(&scale, s); + XMStoreFloat4(&rotation, r); + XMStoreFloat3(&translation, t); + + return true; +} + +inline Matrix Matrix::Transpose() const noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(this); + Matrix R; + XMStoreFloat4x4(&R, XMMatrixTranspose(M)); + return R; +} + +inline void Matrix::Transpose(Matrix& result) const noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(this); + XMStoreFloat4x4(&result, XMMatrixTranspose(M)); +} + +inline Matrix Matrix::Invert() const noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(this); + Matrix R; + XMVECTOR det; + XMStoreFloat4x4(&R, XMMatrixInverse(&det, M)); + return R; +} + +inline void Matrix::Invert(Matrix& result) const noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(this); + XMVECTOR det; + XMStoreFloat4x4(&result, XMMatrixInverse(&det, M)); +} + +inline float Matrix::Determinant() const noexcept +{ + using namespace DirectX; + XMMATRIX M = XMLoadFloat4x4(this); + return XMVectorGetX(XMMatrixDeterminant(M)); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline Matrix Matrix::CreateBillboard( + const Vector3& object, + const Vector3& cameraPosition, + const Vector3& cameraUp, + const Vector3* cameraForward) noexcept +{ + using namespace DirectX; + XMVECTOR O = XMLoadFloat3(&object); + XMVECTOR C = XMLoadFloat3(&cameraPosition); + XMVECTOR Z = XMVectorSubtract(O, C); + + XMVECTOR N = XMVector3LengthSq(Z); + if (XMVector3Less(N, g_XMEpsilon)) + { + if (cameraForward) + { + XMVECTOR F = XMLoadFloat3(cameraForward); + Z = XMVectorNegate(F); + } + else + Z = g_XMNegIdentityR2; + } + else + { + Z = XMVector3Normalize(Z); + } + + XMVECTOR up = XMLoadFloat3(&cameraUp); + XMVECTOR X = XMVector3Cross(up, Z); + X = XMVector3Normalize(X); + + XMVECTOR Y = XMVector3Cross(Z, X); + + XMMATRIX M; + M.r[0] = X; + M.r[1] = Y; + M.r[2] = Z; + M.r[3] = XMVectorSetW(O, 1.f); + + Matrix R; + XMStoreFloat4x4(&R, M); + return R; +} + +_Use_decl_annotations_ +inline Matrix Matrix::CreateConstrainedBillboard( + const Vector3& object, + const Vector3& cameraPosition, + const Vector3& rotateAxis, + const Vector3* cameraForward, + const Vector3* objectForward) noexcept +{ + using namespace DirectX; + + static const XMVECTORF32 s_minAngle = { { { 0.99825467075f, 0.99825467075f, 0.99825467075f, 0.99825467075f } } }; // 1.0 - XMConvertToRadians( 0.1f ); + + XMVECTOR O = XMLoadFloat3(&object); + XMVECTOR C = XMLoadFloat3(&cameraPosition); + XMVECTOR faceDir = XMVectorSubtract(O, C); + + XMVECTOR N = XMVector3LengthSq(faceDir); + if (XMVector3Less(N, g_XMEpsilon)) + { + if (cameraForward) + { + XMVECTOR F = XMLoadFloat3(cameraForward); + faceDir = XMVectorNegate(F); + } + else + faceDir = g_XMNegIdentityR2; + } + else + { + faceDir = XMVector3Normalize(faceDir); + } + + XMVECTOR Y = XMLoadFloat3(&rotateAxis); + XMVECTOR X, Z; + + XMVECTOR dot = XMVectorAbs(XMVector3Dot(Y, faceDir)); + if (XMVector3Greater(dot, s_minAngle)) + { + if (objectForward) + { + Z = XMLoadFloat3(objectForward); + dot = XMVectorAbs(XMVector3Dot(Y, Z)); + if (XMVector3Greater(dot, s_minAngle)) + { + dot = XMVectorAbs(XMVector3Dot(Y, g_XMNegIdentityR2)); + Z = (XMVector3Greater(dot, s_minAngle)) ? g_XMIdentityR0 : g_XMNegIdentityR2; + } + } + else + { + dot = XMVectorAbs(XMVector3Dot(Y, g_XMNegIdentityR2)); + Z = (XMVector3Greater(dot, s_minAngle)) ? g_XMIdentityR0 : g_XMNegIdentityR2; + } + + X = XMVector3Cross(Y, Z); + X = XMVector3Normalize(X); + + Z = XMVector3Cross(X, Y); + Z = XMVector3Normalize(Z); + } + else + { + X = XMVector3Cross(Y, faceDir); + X = XMVector3Normalize(X); + + Z = XMVector3Cross(X, Y); + Z = XMVector3Normalize(Z); + } + + XMMATRIX M; + M.r[0] = X; + M.r[1] = Y; + M.r[2] = Z; + M.r[3] = XMVectorSetW(O, 1.f); + + Matrix R; + XMStoreFloat4x4(&R, M); + return R; +} + +inline Matrix Matrix::CreateTranslation(const Vector3& position) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixTranslation(position.x, position.y, position.z)); + return R; +} + +inline Matrix Matrix::CreateTranslation(float x, float y, float z) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixTranslation(x, y, z)); + return R; +} + +inline Matrix Matrix::CreateScale(const Vector3& scales) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixScaling(scales.x, scales.y, scales.z)); + return R; +} + +inline Matrix Matrix::CreateScale(float xs, float ys, float zs) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixScaling(xs, ys, zs)); + return R; +} + +inline Matrix Matrix::CreateScale(float scale) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixScaling(scale, scale, scale)); + return R; +} + +inline Matrix Matrix::CreateRotationX(float radians) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixRotationX(radians)); + return R; +} + +inline Matrix Matrix::CreateRotationY(float radians) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixRotationY(radians)); + return R; +} + +inline Matrix Matrix::CreateRotationZ(float radians) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixRotationZ(radians)); + return R; +} + +inline Matrix Matrix::CreateFromAxisAngle(const Vector3& axis, float angle) noexcept +{ + using namespace DirectX; + Matrix R; + XMVECTOR a = XMLoadFloat3(&axis); + XMStoreFloat4x4(&R, XMMatrixRotationAxis(a, angle)); + return R; +} + +inline Matrix Matrix::CreatePerspectiveFieldOfView(float fov, float aspectRatio, float nearPlane, float farPlane) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixPerspectiveFovRH(fov, aspectRatio, nearPlane, farPlane)); + return R; +} + +inline Matrix Matrix::CreatePerspective(float width, float height, float nearPlane, float farPlane) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixPerspectiveRH(width, height, nearPlane, farPlane)); + return R; +} + +inline Matrix Matrix::CreatePerspectiveOffCenter(float left, float right, float bottom, float top, float nearPlane, float farPlane) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixPerspectiveOffCenterRH(left, right, bottom, top, nearPlane, farPlane)); + return R; +} + +inline Matrix Matrix::CreateOrthographic(float width, float height, float zNearPlane, float zFarPlane) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixOrthographicRH(width, height, zNearPlane, zFarPlane)); + return R; +} + +inline Matrix Matrix::CreateOrthographicOffCenter(float left, float right, float bottom, float top, float zNearPlane, float zFarPlane) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixOrthographicOffCenterRH(left, right, bottom, top, zNearPlane, zFarPlane)); + return R; +} + +inline Matrix Matrix::CreateLookAt(const Vector3& eye, const Vector3& target, const Vector3& up) noexcept +{ + using namespace DirectX; + Matrix R; + XMVECTOR eyev = XMLoadFloat3(&eye); + XMVECTOR targetv = XMLoadFloat3(&target); + XMVECTOR upv = XMLoadFloat3(&up); + XMStoreFloat4x4(&R, XMMatrixLookAtRH(eyev, targetv, upv)); + return R; +} + +inline Matrix Matrix::CreateWorld(const Vector3& position, const Vector3& forward, const Vector3& up) noexcept +{ + using namespace DirectX; + XMVECTOR zaxis = XMVector3Normalize(XMVectorNegate(XMLoadFloat3(&forward))); + XMVECTOR yaxis = XMLoadFloat3(&up); + XMVECTOR xaxis = XMVector3Normalize(XMVector3Cross(yaxis, zaxis)); + yaxis = XMVector3Cross(zaxis, xaxis); + + Matrix R; + XMStoreFloat3(reinterpret_cast(&R._11), xaxis); + XMStoreFloat3(reinterpret_cast(&R._21), yaxis); + XMStoreFloat3(reinterpret_cast(&R._31), zaxis); + R._14 = R._24 = R._34 = 0.f; + R._41 = position.x; R._42 = position.y; R._43 = position.z; + R._44 = 1.f; + return R; +} + +inline Matrix Matrix::CreateFromQuaternion(const Quaternion& rotation) noexcept +{ + using namespace DirectX; + Matrix R; + XMVECTOR quatv = XMLoadFloat4(&rotation); + XMStoreFloat4x4(&R, XMMatrixRotationQuaternion(quatv)); + return R; +} + +inline Matrix Matrix::CreateFromYawPitchRoll(float yaw, float pitch, float roll) noexcept +{ + using namespace DirectX; + Matrix R; + XMStoreFloat4x4(&R, XMMatrixRotationRollPitchYaw(pitch, yaw, roll)); + return R; +} + +inline Matrix Matrix::CreateShadow(const Vector3& lightDir, const Plane& plane) noexcept +{ + using namespace DirectX; + Matrix R; + XMVECTOR light = XMLoadFloat3(&lightDir); + XMVECTOR planev = XMLoadFloat4(&plane); + XMStoreFloat4x4(&R, XMMatrixShadow(planev, light)); + return R; +} + +inline Matrix Matrix::CreateReflection(const Plane& plane) noexcept +{ + using namespace DirectX; + Matrix R; + XMVECTOR planev = XMLoadFloat4(&plane); + XMStoreFloat4x4(&R, XMMatrixReflect(planev)); + return R; +} + +inline void Matrix::Lerp(const Matrix& M1, const Matrix& M2, float t, Matrix& result) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M1._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M1._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M1._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M1._41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M2._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M2._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M2._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M2._41)); + + x1 = XMVectorLerp(x1, y1, t); + x2 = XMVectorLerp(x2, y2, t); + x3 = XMVectorLerp(x3, y3, t); + x4 = XMVectorLerp(x4, y4, t); + + XMStoreFloat4(reinterpret_cast(&result._11), x1); + XMStoreFloat4(reinterpret_cast(&result._21), x2); + XMStoreFloat4(reinterpret_cast(&result._31), x3); + XMStoreFloat4(reinterpret_cast(&result._41), x4); +} + +inline Matrix Matrix::Lerp(const Matrix& M1, const Matrix& M2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR x1 = XMLoadFloat4(reinterpret_cast(&M1._11)); + XMVECTOR x2 = XMLoadFloat4(reinterpret_cast(&M1._21)); + XMVECTOR x3 = XMLoadFloat4(reinterpret_cast(&M1._31)); + XMVECTOR x4 = XMLoadFloat4(reinterpret_cast(&M1._41)); + + XMVECTOR y1 = XMLoadFloat4(reinterpret_cast(&M2._11)); + XMVECTOR y2 = XMLoadFloat4(reinterpret_cast(&M2._21)); + XMVECTOR y3 = XMLoadFloat4(reinterpret_cast(&M2._31)); + XMVECTOR y4 = XMLoadFloat4(reinterpret_cast(&M2._41)); + + x1 = XMVectorLerp(x1, y1, t); + x2 = XMVectorLerp(x2, y2, t); + x3 = XMVectorLerp(x3, y3, t); + x4 = XMVectorLerp(x4, y4, t); + + Matrix result; + XMStoreFloat4(reinterpret_cast(&result._11), x1); + XMStoreFloat4(reinterpret_cast(&result._21), x2); + XMStoreFloat4(reinterpret_cast(&result._31), x3); + XMStoreFloat4(reinterpret_cast(&result._41), x4); + return result; +} + +inline void Matrix::Transform(const Matrix& M, const Quaternion& rotation, Matrix& result) noexcept +{ + using namespace DirectX; + XMVECTOR quatv = XMLoadFloat4(&rotation); + + XMMATRIX M0 = XMLoadFloat4x4(&M); + XMMATRIX M1 = XMMatrixRotationQuaternion(quatv); + + XMStoreFloat4x4(&result, XMMatrixMultiply(M0, M1)); +} + +inline Matrix Matrix::Transform(const Matrix& M, const Quaternion& rotation) noexcept +{ + using namespace DirectX; + XMVECTOR quatv = XMLoadFloat4(&rotation); + + XMMATRIX M0 = XMLoadFloat4x4(&M); + XMMATRIX M1 = XMMatrixRotationQuaternion(quatv); + + Matrix result; + XMStoreFloat4x4(&result, XMMatrixMultiply(M0, M1)); + return result; +} + + +/**************************************************************************** + * + * Plane + * + ****************************************************************************/ + +inline Plane::Plane(const Vector3& point1, const Vector3& point2, const Vector3& point3) noexcept +{ + using namespace DirectX; + XMVECTOR P0 = XMLoadFloat3(&point1); + XMVECTOR P1 = XMLoadFloat3(&point2); + XMVECTOR P2 = XMLoadFloat3(&point3); + XMStoreFloat4(this, XMPlaneFromPoints(P0, P1, P2)); +} + +inline Plane::Plane(const Vector3& point, const Vector3& normal) noexcept +{ + using namespace DirectX; + XMVECTOR P = XMLoadFloat3(&point); + XMVECTOR N = XMLoadFloat3(&normal); + XMStoreFloat4(this, XMPlaneFromPointNormal(P, N)); +} + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Plane::operator == (const Plane& p) const noexcept +{ + using namespace DirectX; + XMVECTOR p1 = XMLoadFloat4(this); + XMVECTOR p2 = XMLoadFloat4(&p); + return XMPlaneEqual(p1, p2); +} + +inline bool Plane::operator != (const Plane& p) const noexcept +{ + using namespace DirectX; + XMVECTOR p1 = XMLoadFloat4(this); + XMVECTOR p2 = XMLoadFloat4(&p); + return XMPlaneNotEqual(p1, p2); +} + +//------------------------------------------------------------------------------ +// Plane operations +//------------------------------------------------------------------------------ + +inline void Plane::Normalize() noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(this); + XMStoreFloat4(this, XMPlaneNormalize(p)); +} + +inline void Plane::Normalize(Plane& result) const noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(this); + XMStoreFloat4(&result, XMPlaneNormalize(p)); +} + +inline float Plane::Dot(const Vector4& v) const noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(this); + XMVECTOR v0 = XMLoadFloat4(&v); + return XMVectorGetX(XMPlaneDot(p, v0)); +} + +inline float Plane::DotCoordinate(const Vector3& position) const noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(this); + XMVECTOR v0 = XMLoadFloat3(&position); + return XMVectorGetX(XMPlaneDotCoord(p, v0)); +} + +inline float Plane::DotNormal(const Vector3& normal) const noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(this); + XMVECTOR n0 = XMLoadFloat3(&normal); + return XMVectorGetX(XMPlaneDotNormal(p, n0)); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline void Plane::Transform(const Plane& plane, const Matrix& M, Plane& result) noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(&plane); + XMMATRIX m0 = XMLoadFloat4x4(&M); + XMStoreFloat4(&result, XMPlaneTransform(p, m0)); +} + +inline Plane Plane::Transform(const Plane& plane, const Matrix& M) noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(&plane); + XMMATRIX m0 = XMLoadFloat4x4(&M); + + Plane result; + XMStoreFloat4(&result, XMPlaneTransform(p, m0)); + return result; +} + +inline void Plane::Transform(const Plane& plane, const Quaternion& rotation, Plane& result) noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(&plane); + XMVECTOR q = XMLoadFloat4(&rotation); + XMVECTOR X = XMVector3Rotate(p, q); + X = XMVectorSelect(p, X, g_XMSelect1110); // result.d = plane.d + XMStoreFloat4(&result, X); +} + +inline Plane Plane::Transform(const Plane& plane, const Quaternion& rotation) noexcept +{ + using namespace DirectX; + XMVECTOR p = XMLoadFloat4(&plane); + XMVECTOR q = XMLoadFloat4(&rotation); + XMVECTOR X = XMVector3Rotate(p, q); + X = XMVectorSelect(p, X, g_XMSelect1110); // result.d = plane.d + + Plane result; + XMStoreFloat4(&result, X); + return result; +} + + +/**************************************************************************** + * + * Quaternion + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Quaternion::operator == (const Quaternion& q) const noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + return XMQuaternionEqual(q1, q2); +} + +inline bool Quaternion::operator != (const Quaternion& q) const noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + return XMQuaternionNotEqual(q1, q2); +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Quaternion& Quaternion::operator+= (const Quaternion& q) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + XMStoreFloat4(this, XMVectorAdd(q1, q2)); + return *this; +} + +inline Quaternion& Quaternion::operator-= (const Quaternion& q) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + XMStoreFloat4(this, XMVectorSubtract(q1, q2)); + return *this; +} + +inline Quaternion& Quaternion::operator*= (const Quaternion& q) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + XMStoreFloat4(this, XMQuaternionMultiply(q1, q2)); + return *this; +} + +inline Quaternion& Quaternion::operator*= (float S) noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + XMStoreFloat4(this, XMVectorScale(q, S)); + return *this; +} + +inline Quaternion& Quaternion::operator/= (const Quaternion& q) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + q2 = XMQuaternionInverse(q2); + XMStoreFloat4(this, XMQuaternionMultiply(q1, q2)); + return *this; +} + +//------------------------------------------------------------------------------ +// Urnary operators +//------------------------------------------------------------------------------ + +inline Quaternion Quaternion::operator- () const noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + + Quaternion R; + XMStoreFloat4(&R, XMVectorNegate(q)); + return R; +} + +//------------------------------------------------------------------------------ +// Binary operators +//------------------------------------------------------------------------------ + +inline Quaternion operator+ (const Quaternion& Q1, const Quaternion& Q2) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(&Q1); + XMVECTOR q2 = XMLoadFloat4(&Q2); + + Quaternion R; + XMStoreFloat4(&R, XMVectorAdd(q1, q2)); + return R; +} + +inline Quaternion operator- (const Quaternion& Q1, const Quaternion& Q2) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(&Q1); + XMVECTOR q2 = XMLoadFloat4(&Q2); + + Quaternion R; + XMStoreFloat4(&R, XMVectorSubtract(q1, q2)); + return R; +} + +inline Quaternion operator* (const Quaternion& Q1, const Quaternion& Q2) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(&Q1); + XMVECTOR q2 = XMLoadFloat4(&Q2); + + Quaternion R; + XMStoreFloat4(&R, XMQuaternionMultiply(q1, q2)); + return R; +} + +inline Quaternion operator* (const Quaternion& Q, float S) noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(&Q); + + Quaternion R; + XMStoreFloat4(&R, XMVectorScale(q, S)); + return R; +} + +inline Quaternion operator/ (const Quaternion& Q1, const Quaternion& Q2) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(&Q1); + XMVECTOR q2 = XMLoadFloat4(&Q2); + q2 = XMQuaternionInverse(q2); + + Quaternion R; + XMStoreFloat4(&R, XMQuaternionMultiply(q1, q2)); + return R; +} + +inline Quaternion operator* (float S, const Quaternion& Q) noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(&Q); + + Quaternion R; + XMStoreFloat4(&R, XMVectorScale(q1, S)); + return R; +} + +//------------------------------------------------------------------------------ +// Quaternion operations +//------------------------------------------------------------------------------ + +inline float Quaternion::Length() const noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + return XMVectorGetX(XMQuaternionLength(q)); +} + +inline float Quaternion::LengthSquared() const noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + return XMVectorGetX(XMQuaternionLengthSq(q)); +} + +inline void Quaternion::Normalize() noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + XMStoreFloat4(this, XMQuaternionNormalize(q)); +} + +inline void Quaternion::Normalize(Quaternion& result) const noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + XMStoreFloat4(&result, XMQuaternionNormalize(q)); +} + +inline void Quaternion::Conjugate() noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + XMStoreFloat4(this, XMQuaternionConjugate(q)); +} + +inline void Quaternion::Conjugate(Quaternion& result) const noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + XMStoreFloat4(&result, XMQuaternionConjugate(q)); +} + +inline void Quaternion::Inverse(Quaternion& result) const noexcept +{ + using namespace DirectX; + XMVECTOR q = XMLoadFloat4(this); + XMStoreFloat4(&result, XMQuaternionInverse(q)); +} + +inline float Quaternion::Dot(const Quaternion& q) const noexcept +{ + using namespace DirectX; + XMVECTOR q1 = XMLoadFloat4(this); + XMVECTOR q2 = XMLoadFloat4(&q); + return XMVectorGetX(XMQuaternionDot(q1, q2)); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline Quaternion Quaternion::CreateFromAxisAngle(const Vector3& axis, float angle) noexcept +{ + using namespace DirectX; + XMVECTOR a = XMLoadFloat3(&axis); + + Quaternion R; + XMStoreFloat4(&R, XMQuaternionRotationAxis(a, angle)); + return R; +} + +inline Quaternion Quaternion::CreateFromYawPitchRoll(float yaw, float pitch, float roll) noexcept +{ + using namespace DirectX; + Quaternion R; + XMStoreFloat4(&R, XMQuaternionRotationRollPitchYaw(pitch, yaw, roll)); + return R; +} + +inline Quaternion Quaternion::CreateFromRotationMatrix(const Matrix& M) noexcept +{ + using namespace DirectX; + XMMATRIX M0 = XMLoadFloat4x4(&M); + + Quaternion R; + XMStoreFloat4(&R, XMQuaternionRotationMatrix(M0)); + return R; +} + +inline void Quaternion::Lerp(const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result) noexcept +{ + using namespace DirectX; + XMVECTOR Q0 = XMLoadFloat4(&q1); + XMVECTOR Q1 = XMLoadFloat4(&q2); + + XMVECTOR dot = XMVector4Dot(Q0, Q1); + + XMVECTOR R; + if (XMVector4GreaterOrEqual(dot, XMVectorZero())) + { + R = XMVectorLerp(Q0, Q1, t); + } + else + { + XMVECTOR tv = XMVectorReplicate(t); + XMVECTOR t1v = XMVectorReplicate(1.f - t); + XMVECTOR X0 = XMVectorMultiply(Q0, t1v); + XMVECTOR X1 = XMVectorMultiply(Q1, tv); + R = XMVectorSubtract(X0, X1); + } + + XMStoreFloat4(&result, XMQuaternionNormalize(R)); +} + +inline Quaternion Quaternion::Lerp(const Quaternion& q1, const Quaternion& q2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR Q0 = XMLoadFloat4(&q1); + XMVECTOR Q1 = XMLoadFloat4(&q2); + + XMVECTOR dot = XMVector4Dot(Q0, Q1); + + XMVECTOR R; + if (XMVector4GreaterOrEqual(dot, XMVectorZero())) + { + R = XMVectorLerp(Q0, Q1, t); + } + else + { + XMVECTOR tv = XMVectorReplicate(t); + XMVECTOR t1v = XMVectorReplicate(1.f - t); + XMVECTOR X0 = XMVectorMultiply(Q0, t1v); + XMVECTOR X1 = XMVectorMultiply(Q1, tv); + R = XMVectorSubtract(X0, X1); + } + + Quaternion result; + XMStoreFloat4(&result, XMQuaternionNormalize(R)); + return result; +} + +inline void Quaternion::Slerp(const Quaternion& q1, const Quaternion& q2, float t, Quaternion& result) noexcept +{ + using namespace DirectX; + XMVECTOR Q0 = XMLoadFloat4(&q1); + XMVECTOR Q1 = XMLoadFloat4(&q2); + XMStoreFloat4(&result, XMQuaternionSlerp(Q0, Q1, t)); +} + +inline Quaternion Quaternion::Slerp(const Quaternion& q1, const Quaternion& q2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR Q0 = XMLoadFloat4(&q1); + XMVECTOR Q1 = XMLoadFloat4(&q2); + + Quaternion result; + XMStoreFloat4(&result, XMQuaternionSlerp(Q0, Q1, t)); + return result; +} + +inline void Quaternion::Concatenate(const Quaternion& q1, const Quaternion& q2, Quaternion& result) noexcept +{ + using namespace DirectX; + XMVECTOR Q0 = XMLoadFloat4(&q1); + XMVECTOR Q1 = XMLoadFloat4(&q2); + XMStoreFloat4(&result, XMQuaternionMultiply(Q1, Q0)); +} + +inline Quaternion Quaternion::Concatenate(const Quaternion& q1, const Quaternion& q2) noexcept +{ + using namespace DirectX; + XMVECTOR Q0 = XMLoadFloat4(&q1); + XMVECTOR Q1 = XMLoadFloat4(&q2); + + Quaternion result; + XMStoreFloat4(&result, XMQuaternionMultiply(Q1, Q0)); + return result; +} + + +/**************************************************************************** + * + * Color + * + ****************************************************************************/ + +inline Color::Color(const DirectX::PackedVector::XMCOLOR& Packed) noexcept +{ + using namespace DirectX; + XMStoreFloat4(this, PackedVector::XMLoadColor(&Packed)); +} + +inline Color::Color(const DirectX::PackedVector::XMUBYTEN4& Packed) noexcept +{ + using namespace DirectX; + XMStoreFloat4(this, PackedVector::XMLoadUByteN4(&Packed)); +} + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ +inline bool Color::operator == (const Color& c) const noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(this); + XMVECTOR c2 = XMLoadFloat4(&c); + return XMColorEqual(c1, c2); +} + +inline bool Color::operator != (const Color& c) const noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(this); + XMVECTOR c2 = XMLoadFloat4(&c); + return XMColorNotEqual(c1, c2); +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Color& Color::operator= (const DirectX::PackedVector::XMCOLOR& Packed) noexcept +{ + using namespace DirectX; + XMStoreFloat4(this, PackedVector::XMLoadColor(&Packed)); + return *this; +} + +inline Color& Color::operator= (const DirectX::PackedVector::XMUBYTEN4& Packed) noexcept +{ + using namespace DirectX; + XMStoreFloat4(this, PackedVector::XMLoadUByteN4(&Packed)); + return *this; +} + +inline Color& Color::operator+= (const Color& c) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(this); + XMVECTOR c2 = XMLoadFloat4(&c); + XMStoreFloat4(this, XMVectorAdd(c1, c2)); + return *this; +} + +inline Color& Color::operator-= (const Color& c) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(this); + XMVECTOR c2 = XMLoadFloat4(&c); + XMStoreFloat4(this, XMVectorSubtract(c1, c2)); + return *this; +} + +inline Color& Color::operator*= (const Color& c) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(this); + XMVECTOR c2 = XMLoadFloat4(&c); + XMStoreFloat4(this, XMVectorMultiply(c1, c2)); + return *this; +} + +inline Color& Color::operator*= (float S) noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(this, XMVectorScale(c, S)); + return *this; +} + +inline Color& Color::operator/= (const Color& c) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(this); + XMVECTOR c2 = XMLoadFloat4(&c); + XMStoreFloat4(this, XMVectorDivide(c1, c2)); + return *this; +} + +//------------------------------------------------------------------------------ +// Urnary operators +//------------------------------------------------------------------------------ + +inline Color Color::operator- () const noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + Color R; + XMStoreFloat4(&R, XMVectorNegate(c)); + return R; +} + +//------------------------------------------------------------------------------ +// Binary operators +//------------------------------------------------------------------------------ + +inline Color operator+ (const Color& C1, const Color& C2) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(&C1); + XMVECTOR c2 = XMLoadFloat4(&C2); + Color R; + XMStoreFloat4(&R, XMVectorAdd(c1, c2)); + return R; +} + +inline Color operator- (const Color& C1, const Color& C2) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(&C1); + XMVECTOR c2 = XMLoadFloat4(&C2); + Color R; + XMStoreFloat4(&R, XMVectorSubtract(c1, c2)); + return R; +} + +inline Color operator* (const Color& C1, const Color& C2) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(&C1); + XMVECTOR c2 = XMLoadFloat4(&C2); + Color R; + XMStoreFloat4(&R, XMVectorMultiply(c1, c2)); + return R; +} + +inline Color operator* (const Color& C, float S) noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(&C); + Color R; + XMStoreFloat4(&R, XMVectorScale(c, S)); + return R; +} + +inline Color operator/ (const Color& C1, const Color& C2) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(&C1); + XMVECTOR c2 = XMLoadFloat4(&C2); + Color R; + XMStoreFloat4(&R, XMVectorDivide(c1, c2)); + return R; +} + +inline Color operator* (float S, const Color& C) noexcept +{ + using namespace DirectX; + XMVECTOR c1 = XMLoadFloat4(&C); + Color R; + XMStoreFloat4(&R, XMVectorScale(c1, S)); + return R; +} + +//------------------------------------------------------------------------------ +// Color operations +//------------------------------------------------------------------------------ + +inline DirectX::PackedVector::XMCOLOR Color::BGRA() const noexcept +{ + using namespace DirectX; + XMVECTOR clr = XMLoadFloat4(this); + PackedVector::XMCOLOR Packed; + PackedVector::XMStoreColor(&Packed, clr); + return Packed; +} + +inline DirectX::PackedVector::XMUBYTEN4 Color::RGBA() const noexcept +{ + using namespace DirectX; + XMVECTOR clr = XMLoadFloat4(this); + PackedVector::XMUBYTEN4 Packed; + PackedVector::XMStoreUByteN4(&Packed, clr); + return Packed; +} + +inline Vector3 Color::ToVector3() const noexcept +{ + return Vector3(x, y, z); +} + +inline Vector4 Color::ToVector4() const noexcept +{ + return Vector4(x, y, z, w); +} + +inline void Color::Negate() noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(this, XMColorNegative(c)); +} + +inline void Color::Negate(Color& result) const noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(&result, XMColorNegative(c)); +} + +inline void Color::Saturate() noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(this, XMVectorSaturate(c)); +} + +inline void Color::Saturate(Color& result) const noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(&result, XMVectorSaturate(c)); +} + +inline void Color::Premultiply() noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMVECTOR a = XMVectorSplatW(c); + a = XMVectorSelect(g_XMIdentityR3, a, g_XMSelect1110); + XMStoreFloat4(this, XMVectorMultiply(c, a)); +} + +inline void Color::Premultiply(Color& result) const noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMVECTOR a = XMVectorSplatW(c); + a = XMVectorSelect(g_XMIdentityR3, a, g_XMSelect1110); + XMStoreFloat4(&result, XMVectorMultiply(c, a)); +} + +inline void Color::AdjustSaturation(float sat) noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(this, XMColorAdjustSaturation(c, sat)); +} + +inline void Color::AdjustSaturation(float sat, Color& result) const noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(&result, XMColorAdjustSaturation(c, sat)); +} + +inline void Color::AdjustContrast(float contrast) noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(this, XMColorAdjustContrast(c, contrast)); +} + +inline void Color::AdjustContrast(float contrast, Color& result) const noexcept +{ + using namespace DirectX; + XMVECTOR c = XMLoadFloat4(this); + XMStoreFloat4(&result, XMColorAdjustContrast(c, contrast)); +} + +//------------------------------------------------------------------------------ +// Static functions +//------------------------------------------------------------------------------ + +inline void Color::Modulate(const Color& c1, const Color& c2, Color& result) noexcept +{ + using namespace DirectX; + XMVECTOR C0 = XMLoadFloat4(&c1); + XMVECTOR C1 = XMLoadFloat4(&c2); + XMStoreFloat4(&result, XMColorModulate(C0, C1)); +} + +inline Color Color::Modulate(const Color& c1, const Color& c2) noexcept +{ + using namespace DirectX; + XMVECTOR C0 = XMLoadFloat4(&c1); + XMVECTOR C1 = XMLoadFloat4(&c2); + + Color result; + XMStoreFloat4(&result, XMColorModulate(C0, C1)); + return result; +} + +inline void Color::Lerp(const Color& c1, const Color& c2, float t, Color& result) noexcept +{ + using namespace DirectX; + XMVECTOR C0 = XMLoadFloat4(&c1); + XMVECTOR C1 = XMLoadFloat4(&c2); + XMStoreFloat4(&result, XMVectorLerp(C0, C1, t)); +} + +inline Color Color::Lerp(const Color& c1, const Color& c2, float t) noexcept +{ + using namespace DirectX; + XMVECTOR C0 = XMLoadFloat4(&c1); + XMVECTOR C1 = XMLoadFloat4(&c2); + + Color result; + XMStoreFloat4(&result, XMVectorLerp(C0, C1, t)); + return result; +} + + +/**************************************************************************** + * + * Ray + * + ****************************************************************************/ + +//----------------------------------------------------------------------------- +// Comparision operators +//------------------------------------------------------------------------------ +inline bool Ray::operator == (const Ray& r) const noexcept +{ + using namespace DirectX; + XMVECTOR r1p = XMLoadFloat3(&position); + XMVECTOR r2p = XMLoadFloat3(&r.position); + XMVECTOR r1d = XMLoadFloat3(&direction); + XMVECTOR r2d = XMLoadFloat3(&r.direction); + return XMVector3Equal(r1p, r2p) && XMVector3Equal(r1d, r2d); +} + +inline bool Ray::operator != (const Ray& r) const noexcept +{ + using namespace DirectX; + XMVECTOR r1p = XMLoadFloat3(&position); + XMVECTOR r2p = XMLoadFloat3(&r.position); + XMVECTOR r1d = XMLoadFloat3(&direction); + XMVECTOR r2d = XMLoadFloat3(&r.direction); + return XMVector3NotEqual(r1p, r2p) && XMVector3NotEqual(r1d, r2d); +} + +//----------------------------------------------------------------------------- +// Ray operators +//------------------------------------------------------------------------------ + +inline bool Ray::Intersects(const BoundingSphere& sphere, _Out_ float& Dist) const noexcept +{ + return sphere.Intersects(position, direction, Dist); +} + +inline bool Ray::Intersects(const BoundingBox& box, _Out_ float& Dist) const noexcept +{ + return box.Intersects(position, direction, Dist); +} + +inline bool Ray::Intersects(const Vector3& tri0, const Vector3& tri1, const Vector3& tri2, _Out_ float& Dist) const noexcept +{ + return DirectX::TriangleTests::Intersects(position, direction, tri0, tri1, tri2, Dist); +} + +inline bool Ray::Intersects(const Plane& plane, _Out_ float& Dist) const noexcept +{ + using namespace DirectX; + + XMVECTOR p = XMLoadFloat4(&plane); + XMVECTOR dir = XMLoadFloat3(&direction); + + XMVECTOR nd = XMPlaneDotNormal(p, dir); + + if (XMVector3LessOrEqual(XMVectorAbs(nd), g_RayEpsilon)) + { + Dist = 0.f; + return false; + } + else + { + // t = -(dot(n,origin) + D) / dot(n,dir) + XMVECTOR pos = XMLoadFloat3(&position); + XMVECTOR v = XMPlaneDotNormal(p, pos); + v = XMVectorAdd(v, XMVectorSplatW(p)); + v = XMVectorDivide(v, nd); + float dist = -XMVectorGetX(v); + if (dist < 0) + { + Dist = 0.f; + return false; + } + else + { + Dist = dist; + return true; + } + } +} + + +/**************************************************************************** + * + * Viewport + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparision operators +//------------------------------------------------------------------------------ + +inline bool Viewport::operator == (const Viewport& vp) const noexcept +{ + return (x == vp.x && y == vp.y + && width == vp.width && height == vp.height + && minDepth == vp.minDepth && maxDepth == vp.maxDepth); +} + +inline bool Viewport::operator != (const Viewport& vp) const noexcept +{ + return (x != vp.x || y != vp.y + || width != vp.width || height != vp.height + || minDepth != vp.minDepth || maxDepth != vp.maxDepth); +} + +//------------------------------------------------------------------------------ +// Assignment operators +//------------------------------------------------------------------------------ + +inline Viewport& Viewport::operator= (const RECT& rct) noexcept +{ + x = float(rct.left); y = float(rct.top); + width = float(rct.right - rct.left); + height = float(rct.bottom - rct.top); + minDepth = 0.f; maxDepth = 1.f; + return *this; +} + +#if defined(__d3d11_h__) || defined(__d3d11_x_h__) +inline Viewport& Viewport::operator= (const D3D11_VIEWPORT& vp) noexcept +{ + x = vp.TopLeftX; y = vp.TopLeftY; + width = vp.Width; height = vp.Height; + minDepth = vp.MinDepth; maxDepth = vp.MaxDepth; + return *this; +} +#endif + +#if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) +inline Viewport& Viewport::operator= (const D3D12_VIEWPORT& vp) noexcept +{ + x = vp.TopLeftX; y = vp.TopLeftY; + width = vp.Width; height = vp.Height; + minDepth = vp.MinDepth; maxDepth = vp.MaxDepth; + return *this; +} +#endif + +//------------------------------------------------------------------------------ +// Viewport operations +//------------------------------------------------------------------------------ + +inline float Viewport::AspectRatio() const noexcept +{ + if (width == 0.f || height == 0.f) + return 0.f; + + return (width / height); +} + +inline Vector3 Viewport::Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world) const noexcept +{ + using namespace DirectX; + XMVECTOR v = XMLoadFloat3(&p); + XMMATRIX projection = XMLoadFloat4x4(&proj); + v = XMVector3Project(v, x, y, width, height, minDepth, maxDepth, projection, view, world); + Vector3 result; + XMStoreFloat3(&result, v); + return result; +} + +inline void Viewport::Project(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v = XMLoadFloat3(&p); + XMMATRIX projection = XMLoadFloat4x4(&proj); + v = XMVector3Project(v, x, y, width, height, minDepth, maxDepth, projection, view, world); + XMStoreFloat3(&result, v); +} + +inline Vector3 Viewport::Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world) const noexcept +{ + using namespace DirectX; + XMVECTOR v = XMLoadFloat3(&p); + XMMATRIX projection = XMLoadFloat4x4(&proj); + v = XMVector3Unproject(v, x, y, width, height, minDepth, maxDepth, projection, view, world); + Vector3 result; + XMStoreFloat3(&result, v); + return result; +} + +inline void Viewport::Unproject(const Vector3& p, const Matrix& proj, const Matrix& view, const Matrix& world, Vector3& result) const noexcept +{ + using namespace DirectX; + XMVECTOR v = XMLoadFloat3(&p); + XMMATRIX projection = XMLoadFloat4x4(&proj); + v = XMVector3Unproject(v, x, y, width, height, minDepth, maxDepth, projection, view, world); + XMStoreFloat3(&result, v); +} diff --git a/Sdk/External/DirectXTK/Inc/SpriteBatch.h b/Sdk/External/DirectXTK/Inc/SpriteBatch.h new file mode 100644 index 0000000..f9bdcba --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/SpriteBatch.h @@ -0,0 +1,98 @@ +//-------------------------------------------------------------------------------------- +// File: SpriteBatch.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#include +#endif + +#include +#include +#include +#include + + +namespace DirectX +{ + enum SpriteSortMode + { + SpriteSortMode_Deferred, + SpriteSortMode_Immediate, + SpriteSortMode_Texture, + SpriteSortMode_BackToFront, + SpriteSortMode_FrontToBack, + }; + + + enum SpriteEffects : uint32_t + { + SpriteEffects_None = 0, + SpriteEffects_FlipHorizontally = 1, + SpriteEffects_FlipVertically = 2, + SpriteEffects_FlipBoth = SpriteEffects_FlipHorizontally | SpriteEffects_FlipVertically, + }; + + + class SpriteBatch + { + public: + explicit SpriteBatch(_In_ ID3D11DeviceContext* deviceContext); + SpriteBatch(SpriteBatch&& moveFrom) noexcept; + SpriteBatch& operator= (SpriteBatch&& moveFrom) noexcept; + + SpriteBatch(SpriteBatch const&) = delete; + SpriteBatch& operator= (SpriteBatch const&) = delete; + + virtual ~SpriteBatch(); + + // Begin/End a batch of sprite drawing operations. + void XM_CALLCONV Begin(SpriteSortMode sortMode = SpriteSortMode_Deferred, + _In_opt_ ID3D11BlendState* blendState = nullptr, + _In_opt_ ID3D11SamplerState* samplerState = nullptr, + _In_opt_ ID3D11DepthStencilState* depthStencilState = nullptr, + _In_opt_ ID3D11RasterizerState* rasterizerState = nullptr, + _In_opt_ std::function setCustomShaders = nullptr, + FXMMATRIX transformMatrix = MatrixIdentity); + void __cdecl End(); + + // Draw overloads specifying position, origin and scale as XMFLOAT2. + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, FXMVECTOR color = Colors::White); + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0); + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0); + + // Draw overloads specifying position, origin and scale via the first two components of an XMVECTOR. + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, FXMVECTOR position, FXMVECTOR color = Colors::White); + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, FXMVECTOR position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color = Colors::White, float rotation = 0, FXMVECTOR origin = g_XMZero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0); + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, FXMVECTOR position, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0); + + // Draw overloads specifying position as a RECT. + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, RECT const& destinationRectangle, FXMVECTOR color = Colors::White); + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, RECT const& destinationRectangle, _In_opt_ RECT const* sourceRectangle, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0); + + // Rotation mode to be applied to the sprite transformation + void __cdecl SetRotation(DXGI_MODE_ROTATION mode); + DXGI_MODE_ROTATION __cdecl GetRotation() const noexcept; + + // Set viewport for sprite transformation + void __cdecl SetViewport(const D3D11_VIEWPORT& viewPort); + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + static const XMMATRIX MatrixIdentity; + static const XMFLOAT2 Float2Zero; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/SpriteFont.h b/Sdk/External/DirectXTK/Inc/SpriteFont.h new file mode 100644 index 0000000..a48b61e --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/SpriteFont.h @@ -0,0 +1,89 @@ +//-------------------------------------------------------------------------------------- +// File: SpriteFont.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include "SpriteBatch.h" + + +namespace DirectX +{ + class SpriteFont + { + public: + struct Glyph; + + SpriteFont(_In_ ID3D11Device* device, _In_z_ wchar_t const* fileName, bool forceSRGB = false); + SpriteFont(_In_ ID3D11Device* device, _In_reads_bytes_(dataSize) uint8_t const* dataBlob, _In_ size_t dataSize, bool forceSRGB = false); + SpriteFont(_In_ ID3D11ShaderResourceView* texture, _In_reads_(glyphCount) Glyph const* glyphs, _In_ size_t glyphCount, _In_ float lineSpacing); + + SpriteFont(SpriteFont&& moveFrom) noexcept; + SpriteFont& operator= (SpriteFont&& moveFrom) noexcept; + + SpriteFont(SpriteFont const&) = delete; + SpriteFont& operator= (SpriteFont const&) = delete; + + virtual ~SpriteFont(); + + // Wide-character / UTF-16LE + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, XMFLOAT2 const& position, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, FXMVECTOR position, FXMVECTOR color = Colors::White, float rotation = 0, FXMVECTOR origin = g_XMZero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + + XMVECTOR XM_CALLCONV MeasureString(_In_z_ wchar_t const* text, bool ignoreWhitespace = true) const; + + RECT __cdecl MeasureDrawBounds(_In_z_ wchar_t const* text, XMFLOAT2 const& position, bool ignoreWhitespace = true) const; + RECT XM_CALLCONV MeasureDrawBounds(_In_z_ wchar_t const* text, FXMVECTOR position, bool ignoreWhitespace = true) const; + + // UTF-8 + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, XMFLOAT2 const& position, FXMVECTOR color = Colors::White, float rotation = 0, XMFLOAT2 const& origin = Float2Zero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, FXMVECTOR position, FXMVECTOR color = Colors::White, float rotation = 0, FXMVECTOR origin = g_XMZero, float scale = 1, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + void XM_CALLCONV DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects = SpriteEffects_None, float layerDepth = 0) const; + + XMVECTOR XM_CALLCONV MeasureString(_In_z_ char const* text, bool ignoreWhitespace = true) const; + + RECT __cdecl MeasureDrawBounds(_In_z_ char const* text, XMFLOAT2 const& position, bool ignoreWhitespace = true) const; + RECT XM_CALLCONV MeasureDrawBounds(_In_z_ char const* text, FXMVECTOR position, bool ignoreWhitespace = true) const; + + // Spacing properties + float __cdecl GetLineSpacing() const noexcept; + void __cdecl SetLineSpacing(float spacing); + + // Font properties + wchar_t __cdecl GetDefaultCharacter() const noexcept; + void __cdecl SetDefaultCharacter(wchar_t character); + + bool __cdecl ContainsCharacter(wchar_t character) const; + + // Custom layout/rendering + Glyph const* __cdecl FindGlyph(wchar_t character) const; + void __cdecl GetSpriteSheet(ID3D11ShaderResourceView** texture) const; + + // Describes a single character glyph. + struct Glyph + { + uint32_t Character; + RECT Subrect; + float XOffset; + float YOffset; + float XAdvance; + }; + + + private: + // Private implementation. + class Impl; + + std::unique_ptr pImpl; + + static const XMFLOAT2 Float2Zero; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/VertexTypes.h b/Sdk/External/DirectXTK/Inc/VertexTypes.h new file mode 100644 index 0000000..ba3e403 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/VertexTypes.h @@ -0,0 +1,490 @@ +//-------------------------------------------------------------------------------------- +// File: VertexTypes.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include + + +namespace DirectX +{ + // Vertex struct holding position information. + struct VertexPosition + { + VertexPosition() = default; + + VertexPosition(const VertexPosition&) = default; + VertexPosition& operator=(const VertexPosition&) = default; + + VertexPosition(VertexPosition&&) = default; + VertexPosition& operator=(VertexPosition&&) = default; + + VertexPosition(XMFLOAT3 const& iposition) noexcept + : position(iposition) + { } + + VertexPosition(FXMVECTOR iposition) noexcept + { + XMStoreFloat3(&this->position, iposition); + } + + XMFLOAT3 position; + + static constexpr unsigned int InputElementCount = 1; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position and color information. + struct VertexPositionColor + { + VertexPositionColor() = default; + + VertexPositionColor(const VertexPositionColor&) = default; + VertexPositionColor& operator=(const VertexPositionColor&) = default; + + VertexPositionColor(VertexPositionColor&&) = default; + VertexPositionColor& operator=(VertexPositionColor&&) = default; + + VertexPositionColor(XMFLOAT3 const& iposition, XMFLOAT4 const& icolor) noexcept + : position(iposition), + color(icolor) + { } + + VertexPositionColor(FXMVECTOR iposition, FXMVECTOR icolor) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat4(&this->color, icolor); + } + + XMFLOAT3 position; + XMFLOAT4 color; + + static constexpr unsigned int InputElementCount = 2; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position and texture mapping information. + struct VertexPositionTexture + { + VertexPositionTexture() = default; + + VertexPositionTexture(const VertexPositionTexture&) = default; + VertexPositionTexture& operator=(const VertexPositionTexture&) = default; + + VertexPositionTexture(VertexPositionTexture&&) = default; + VertexPositionTexture& operator=(VertexPositionTexture&&) = default; + + VertexPositionTexture(XMFLOAT3 const& iposition, XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), + textureCoordinate(itextureCoordinate) + { } + + VertexPositionTexture(FXMVECTOR iposition, FXMVECTOR itextureCoordinate) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + } + + XMFLOAT3 position; + XMFLOAT2 textureCoordinate; + + static constexpr unsigned int InputElementCount = 2; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position and dual texture mapping information. + struct VertexPositionDualTexture + { + VertexPositionDualTexture() = default; + + VertexPositionDualTexture(const VertexPositionDualTexture&) = default; + VertexPositionDualTexture& operator=(const VertexPositionDualTexture&) = default; + + VertexPositionDualTexture(VertexPositionDualTexture&&) = default; + VertexPositionDualTexture& operator=(VertexPositionDualTexture&&) = default; + + VertexPositionDualTexture( + XMFLOAT3 const& iposition, + XMFLOAT2 const& itextureCoordinate0, + XMFLOAT2 const& itextureCoordinate1) noexcept + : position(iposition), + textureCoordinate0(itextureCoordinate0), + textureCoordinate1(itextureCoordinate1) + { } + + VertexPositionDualTexture( + FXMVECTOR iposition, + FXMVECTOR itextureCoordinate0, + FXMVECTOR itextureCoordinate1) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat2(&this->textureCoordinate0, itextureCoordinate0); + XMStoreFloat2(&this->textureCoordinate1, itextureCoordinate1); + } + + XMFLOAT3 position; + XMFLOAT2 textureCoordinate0; + XMFLOAT2 textureCoordinate1; + + static constexpr unsigned int InputElementCount = 3; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position and normal vector. + struct VertexPositionNormal + { + VertexPositionNormal() = default; + + VertexPositionNormal(const VertexPositionNormal&) = default; + VertexPositionNormal& operator=(const VertexPositionNormal&) = default; + + VertexPositionNormal(VertexPositionNormal&&) = default; + VertexPositionNormal& operator=(VertexPositionNormal&&) = default; + + VertexPositionNormal(XMFLOAT3 const& iposition, XMFLOAT3 const& inormal) noexcept + : position(iposition), + normal(inormal) + { } + + VertexPositionNormal(FXMVECTOR iposition, FXMVECTOR inormal) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat3(&this->normal, inormal); + } + + XMFLOAT3 position; + XMFLOAT3 normal; + + static constexpr unsigned int InputElementCount = 2; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position, color, and texture mapping information. + struct VertexPositionColorTexture + { + VertexPositionColorTexture() = default; + + VertexPositionColorTexture(const VertexPositionColorTexture&) = default; + VertexPositionColorTexture& operator=(const VertexPositionColorTexture&) = default; + + VertexPositionColorTexture(VertexPositionColorTexture&&) = default; + VertexPositionColorTexture& operator=(VertexPositionColorTexture&&) = default; + + VertexPositionColorTexture(XMFLOAT3 const& iposition, XMFLOAT4 const& icolor, XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), + color(icolor), + textureCoordinate(itextureCoordinate) + { } + + VertexPositionColorTexture(FXMVECTOR iposition, FXMVECTOR icolor, FXMVECTOR itextureCoordinate) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat4(&this->color, icolor); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + } + + XMFLOAT3 position; + XMFLOAT4 color; + XMFLOAT2 textureCoordinate; + + static constexpr unsigned int InputElementCount = 3; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position, normal vector, and color information. + struct VertexPositionNormalColor + { + VertexPositionNormalColor() = default; + + VertexPositionNormalColor(const VertexPositionNormalColor&) = default; + VertexPositionNormalColor& operator=(const VertexPositionNormalColor&) = default; + + VertexPositionNormalColor(VertexPositionNormalColor&&) = default; + VertexPositionNormalColor& operator=(VertexPositionNormalColor&&) = default; + + VertexPositionNormalColor(XMFLOAT3 const& iposition, XMFLOAT3 const& inormal, XMFLOAT4 const& icolor) noexcept + : position(iposition), + normal(inormal), + color(icolor) + { } + + VertexPositionNormalColor(FXMVECTOR iposition, FXMVECTOR inormal, FXMVECTOR icolor) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat3(&this->normal, inormal); + XMStoreFloat4(&this->color, icolor); + } + + XMFLOAT3 position; + XMFLOAT3 normal; + XMFLOAT4 color; + + static constexpr unsigned int InputElementCount = 3; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position, normal vector, and texture mapping information. + struct VertexPositionNormalTexture + { + VertexPositionNormalTexture() = default; + + VertexPositionNormalTexture(const VertexPositionNormalTexture&) = default; + VertexPositionNormalTexture& operator=(const VertexPositionNormalTexture&) = default; + + VertexPositionNormalTexture(VertexPositionNormalTexture&&) = default; + VertexPositionNormalTexture& operator=(VertexPositionNormalTexture&&) = default; + + VertexPositionNormalTexture(XMFLOAT3 const& iposition, XMFLOAT3 const& inormal, XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), + normal(inormal), + textureCoordinate(itextureCoordinate) + { } + + VertexPositionNormalTexture(FXMVECTOR iposition, FXMVECTOR inormal, FXMVECTOR itextureCoordinate) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat3(&this->normal, inormal); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + } + + XMFLOAT3 position; + XMFLOAT3 normal; + XMFLOAT2 textureCoordinate; + + static constexpr unsigned int InputElementCount = 3; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct holding position, normal vector, color, and texture mapping information. + struct VertexPositionNormalColorTexture + { + VertexPositionNormalColorTexture() = default; + + VertexPositionNormalColorTexture(const VertexPositionNormalColorTexture&) = default; + VertexPositionNormalColorTexture& operator=(const VertexPositionNormalColorTexture&) = default; + + VertexPositionNormalColorTexture(VertexPositionNormalColorTexture&&) = default; + VertexPositionNormalColorTexture& operator=(VertexPositionNormalColorTexture&&) = default; + + VertexPositionNormalColorTexture( + XMFLOAT3 const& iposition, + XMFLOAT3 const& inormal, + XMFLOAT4 const& icolor, + XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), + normal(inormal), + color(icolor), + textureCoordinate(itextureCoordinate) + { } + + VertexPositionNormalColorTexture(FXMVECTOR iposition, FXMVECTOR inormal, FXMVECTOR icolor, CXMVECTOR itextureCoordinate) noexcept + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat3(&this->normal, inormal); + XMStoreFloat4(&this->color, icolor); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + } + + XMFLOAT3 position; + XMFLOAT3 normal; + XMFLOAT4 color; + XMFLOAT2 textureCoordinate; + + static constexpr unsigned int InputElementCount = 4; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct for Visual Studio Shader Designer (DGSL) holding position, normal, + // tangent, color (RGBA), and texture mapping information + struct VertexPositionNormalTangentColorTexture + { + VertexPositionNormalTangentColorTexture() = default; + + VertexPositionNormalTangentColorTexture(const VertexPositionNormalTangentColorTexture&) = default; + VertexPositionNormalTangentColorTexture& operator=(const VertexPositionNormalTangentColorTexture&) = default; + + VertexPositionNormalTangentColorTexture(VertexPositionNormalTangentColorTexture&&) = default; + VertexPositionNormalTangentColorTexture& operator=(VertexPositionNormalTangentColorTexture&&) = default; + + XMFLOAT3 position; + XMFLOAT3 normal; + XMFLOAT4 tangent; + uint32_t color; + XMFLOAT2 textureCoordinate; + + VertexPositionNormalTangentColorTexture( + XMFLOAT3 const& iposition, + XMFLOAT3 const& inormal, + XMFLOAT4 const& itangent, + uint32_t irgba, + XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), + normal(inormal), + tangent(itangent), + color(irgba), + textureCoordinate(itextureCoordinate) + { + } + + VertexPositionNormalTangentColorTexture( + FXMVECTOR iposition, + FXMVECTOR inormal, + FXMVECTOR itangent, + uint32_t irgba, + CXMVECTOR itextureCoordinate) noexcept + : color(irgba) + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat3(&this->normal, inormal); + XMStoreFloat4(&this->tangent, itangent); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + } + + VertexPositionNormalTangentColorTexture( + XMFLOAT3 const& iposition, + XMFLOAT3 const& inormal, + XMFLOAT4 const& itangent, + XMFLOAT4 const& icolor, + XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), + normal(inormal), + tangent(itangent), + color{}, + textureCoordinate(itextureCoordinate) + { + SetColor(icolor); + } + + VertexPositionNormalTangentColorTexture( + FXMVECTOR iposition, + FXMVECTOR inormal, + FXMVECTOR itangent, + CXMVECTOR icolor, + CXMVECTOR itextureCoordinate) noexcept + : color{} + { + XMStoreFloat3(&this->position, iposition); + XMStoreFloat3(&this->normal, inormal); + XMStoreFloat4(&this->tangent, itangent); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + + SetColor(icolor); + } + + void __cdecl SetColor(XMFLOAT4 const& icolor) noexcept { SetColor(XMLoadFloat4(&icolor)); } + void XM_CALLCONV SetColor(FXMVECTOR icolor) noexcept; + + static constexpr unsigned int InputElementCount = 5; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; + + + // Vertex struct for Visual Studio Shader Designer (DGSL) holding position, normal, + // tangent, color (RGBA), texture mapping information, and skinning weights + struct VertexPositionNormalTangentColorTextureSkinning : public VertexPositionNormalTangentColorTexture + { + VertexPositionNormalTangentColorTextureSkinning() = default; + + VertexPositionNormalTangentColorTextureSkinning(const VertexPositionNormalTangentColorTextureSkinning&) = default; + VertexPositionNormalTangentColorTextureSkinning& operator=(const VertexPositionNormalTangentColorTextureSkinning&) = default; + + VertexPositionNormalTangentColorTextureSkinning(VertexPositionNormalTangentColorTextureSkinning&&) = default; + VertexPositionNormalTangentColorTextureSkinning& operator=(VertexPositionNormalTangentColorTextureSkinning&&) = default; + + uint32_t indices; + uint32_t weights; + + VertexPositionNormalTangentColorTextureSkinning( + XMFLOAT3 const& iposition, + XMFLOAT3 const& inormal, + XMFLOAT4 const& itangent, + uint32_t irgba, + XMFLOAT2 const& itextureCoordinate, + XMUINT4 const& iindices, + XMFLOAT4 const& iweights) noexcept + : VertexPositionNormalTangentColorTexture(iposition, inormal, itangent, irgba, itextureCoordinate), + indices{}, + weights{} + { + SetBlendIndices(iindices); + SetBlendWeights(iweights); + } + + VertexPositionNormalTangentColorTextureSkinning( + FXMVECTOR iposition, + FXMVECTOR inormal, + FXMVECTOR itangent, + uint32_t irgba, + CXMVECTOR itextureCoordinate, + XMUINT4 const& iindices, + CXMVECTOR iweights) noexcept + : VertexPositionNormalTangentColorTexture(iposition, inormal, itangent, irgba, itextureCoordinate), + indices{}, + weights{} + { + SetBlendIndices(iindices); + SetBlendWeights(iweights); + } + + VertexPositionNormalTangentColorTextureSkinning( + XMFLOAT3 const& iposition, + XMFLOAT3 const& inormal, + XMFLOAT4 const& itangent, + XMFLOAT4 const& icolor, + XMFLOAT2 const& itextureCoordinate, + XMUINT4 const& iindices, + XMFLOAT4 const& iweights) noexcept + : VertexPositionNormalTangentColorTexture(iposition, inormal, itangent, icolor, itextureCoordinate), + indices{}, + weights{} + { + SetBlendIndices(iindices); + SetBlendWeights(iweights); + } + + VertexPositionNormalTangentColorTextureSkinning( + FXMVECTOR iposition, + FXMVECTOR inormal, + FXMVECTOR itangent, + CXMVECTOR icolor, + CXMVECTOR itextureCoordinate, + XMUINT4 const& iindices, + CXMVECTOR iweights) noexcept + : VertexPositionNormalTangentColorTexture(iposition, inormal, itangent, icolor, itextureCoordinate), + indices{}, + weights{} + { + SetBlendIndices(iindices); + SetBlendWeights(iweights); + } + + void __cdecl SetBlendIndices(XMUINT4 const& iindices) noexcept; + + void __cdecl SetBlendWeights(XMFLOAT4 const& iweights) noexcept { SetBlendWeights(XMLoadFloat4(&iweights)); } + void XM_CALLCONV SetBlendWeights(FXMVECTOR iweights) noexcept; + + static constexpr unsigned int InputElementCount = 7; + static const D3D11_INPUT_ELEMENT_DESC InputElements[InputElementCount]; + }; +} diff --git a/Sdk/External/DirectXTK/Inc/WICTextureLoader.h b/Sdk/External/DirectXTK/Inc/WICTextureLoader.h new file mode 100644 index 0000000..3ba3004 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/WICTextureLoader.h @@ -0,0 +1,168 @@ +//-------------------------------------------------------------------------------------- +// File: WICTextureLoader.h +// +// Function for loading a WIC image and creating a Direct3D runtime texture for it +// (auto-generating mipmaps if possible) +// +// Note: Assumes application has already called CoInitializeEx +// +// Warning: CreateWICTexture* functions are not thread-safe if given a d3dContext instance for +// auto-gen mipmap support. +// +// Note these functions are useful for images created as simple 2D textures. For +// more complex resources, DDSTextureLoader is an excellent light-weight runtime loader. +// For a full-featured DDS file reader, writer, and texture processing pipeline see +// the 'Texconv' sample and the 'DirectXTex' library. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#include + +#pragma comment(lib,"uuid.lib") + + +namespace DirectX +{ + enum WIC_LOADER_FLAGS : uint32_t + { + WIC_LOADER_DEFAULT = 0, + WIC_LOADER_FORCE_SRGB = 0x1, + WIC_LOADER_IGNORE_SRGB = 0x2, + WIC_LOADER_SRGB_DEFAULT = 0x4, + WIC_LOADER_FIT_POW2 = 0x20, + WIC_LOADER_MAKE_SQUARE = 0x40, + WIC_LOADER_FORCE_RGBA32 = 0x80, + }; + + // Standard version + HRESULT __cdecl CreateWICTextureFromMemory( + _In_ ID3D11Device* d3dDevice, + _In_reads_bytes_(wicDataSize) const uint8_t* wicData, + _In_ size_t wicDataSize, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0) noexcept; + + HRESULT __cdecl CreateWICTextureFromFile( + _In_ ID3D11Device* d3dDevice, + _In_z_ const wchar_t* szFileName, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0) noexcept; + + // Standard version with optional auto-gen mipmap support + HRESULT __cdecl CreateWICTextureFromMemory( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_reads_bytes_(wicDataSize) const uint8_t* wicData, + _In_ size_t wicDataSize, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0) noexcept; + + HRESULT __cdecl CreateWICTextureFromFile( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_z_ const wchar_t* szFileName, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _In_ size_t maxsize = 0) noexcept; + + // Extended version + HRESULT __cdecl CreateWICTextureFromMemoryEx( + _In_ ID3D11Device* d3dDevice, + _In_reads_bytes_(wicDataSize) const uint8_t* wicData, + _In_ size_t wicDataSize, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ WIC_LOADER_FLAGS loadFlags, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept; + + HRESULT __cdecl CreateWICTextureFromFileEx( + _In_ ID3D11Device* d3dDevice, + _In_z_ const wchar_t* szFileName, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ WIC_LOADER_FLAGS loadFlags, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept; + + // Extended version with optional auto-gen mipmap support + HRESULT __cdecl CreateWICTextureFromMemoryEx( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_reads_bytes_(wicDataSize) const uint8_t* wicData, + _In_ size_t wicDataSize, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ WIC_LOADER_FLAGS loadFlags, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept; + + HRESULT __cdecl CreateWICTextureFromFileEx( + #if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* d3dDevice, + _In_opt_ ID3D11DeviceContextX* d3dContext, + #else + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, + #endif + _In_z_ const wchar_t* szFileName, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ WIC_LOADER_FLAGS loadFlags, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec" +#endif + + DEFINE_ENUM_FLAG_OPERATORS(WIC_LOADER_FLAGS); + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} diff --git a/Sdk/External/DirectXTK/Inc/XboxDDSTextureLoader.h b/Sdk/External/DirectXTK/Inc/XboxDDSTextureLoader.h new file mode 100644 index 0000000..a2d1105 --- /dev/null +++ b/Sdk/External/DirectXTK/Inc/XboxDDSTextureLoader.h @@ -0,0 +1,66 @@ +//-------------------------------------------------------------------------------------- +// File: XboxDDSTextureLoader.h +// +// Functions for loading a DDS texture using the XBOX extended header and creating a +// Direct3D11.X runtime resource for it via the CreatePlacement APIs +// +// Note these functions will not load standard DDS files. Use the DDSTextureLoader +// module in the DirectXTex package or as part of the DirectXTK library to load +// these files which use standard Direct3D resource creation APIs. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#if !defined(_XBOX_ONE) || !defined(_TITLE) +#error This module only supports Xbox One exclusive apps +#endif + +#include + +#include + +#ifndef DDS_ALPHA_MODE_DEFINED +#define DDS_ALPHA_MODE_DEFINED +namespace DirectX +{ + enum DDS_ALPHA_MODE : uint32_t + { + DDS_ALPHA_MODE_UNKNOWN = 0, + DDS_ALPHA_MODE_STRAIGHT = 1, + DDS_ALPHA_MODE_PREMULTIPLIED = 2, + DDS_ALPHA_MODE_OPAQUE = 3, + DDS_ALPHA_MODE_CUSTOM = 4, + }; +} +#endif + +namespace Xbox +{ + using DirectX::DDS_ALPHA_MODE; + + HRESULT __cdecl CreateDDSTextureFromMemory( + _In_ ID3D11DeviceX* d3dDevice, + _In_reads_bytes_(ddsDataSize) const uint8_t* ddsData, + _In_ size_t ddsDataSize, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Outptr_ void** grfxMemory, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr, + _In_ bool forceSRGB = false) noexcept; + + HRESULT __cdecl CreateDDSTextureFromFile( _In_ ID3D11DeviceX* d3dDevice, + _In_z_ const wchar_t* szFileName, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Outptr_ void** grfxMemory, + _Out_opt_ DDS_ALPHA_MODE* alphaMode = nullptr, + _In_ bool forceSRGB = false) noexcept; + + void FreeDDSTextureMemory( _In_opt_ void* grfxMemory ) noexcept; +} diff --git a/Sdk/External/DirectXTK/LICENSE b/Sdk/External/DirectXTK/LICENSE new file mode 100644 index 0000000..63c16dd --- /dev/null +++ b/Sdk/External/DirectXTK/LICENSE @@ -0,0 +1,21 @@ + The MIT License (MIT) + +Copyright (c) 2012-2020 Microsoft Corp + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, +merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be included in all copies +or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/BitmapImporter.cs b/Sdk/External/DirectXTK/MakeSpriteFont/BitmapImporter.cs new file mode 100644 index 0000000..7868137 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/BitmapImporter.cs @@ -0,0 +1,121 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.Linq; +using System.Collections.Generic; +using System.Drawing; +using System.Drawing.Imaging; + +namespace MakeSpriteFont +{ + // Extracts font glyphs from a specially marked 2D bitmap. Characters should be + // arranged in a grid ordered from top left to bottom right. Monochrome characters + // should use white for solid areas and black for transparent areas. To include + // multicolored characters, add an alpha channel to the bitmap and use that to + // control which parts of the character are solid. The spaces between characters + // and around the edges of the grid should be filled with bright pink (red=255, + // green=0, blue=255). It doesn't matter if your grid includes lots of wasted space, + // because the converter will rearrange characters, packing as tightly as possible. + public class BitmapImporter : IFontImporter + { + // Properties hold the imported font data. + public IEnumerable Glyphs { get; private set; } + + public float LineSpacing { get; private set; } + + + public void Import(CommandLineOptions options) + { + // Load the source bitmap. + Bitmap bitmap; + + try + { + bitmap = new Bitmap(options.SourceFont); + } + catch + { + throw new Exception(string.Format("Unable to load '{0}'.", options.SourceFont)); + } + + // Convert to our desired pixel format. + bitmap = BitmapUtils.ChangePixelFormat(bitmap, PixelFormat.Format32bppArgb); + + // What characters are included in this font? + var characters = CharacterRegion.Flatten(options.CharacterRegions).ToArray(); + int characterIndex = 0; + char currentCharacter = '\0'; + + // Split the source image into a list of individual glyphs. + var glyphList = new List(); + + Glyphs = glyphList; + LineSpacing = 0; + + foreach (Rectangle rectangle in FindGlyphs(bitmap)) + { + if (characterIndex < characters.Length) + currentCharacter = characters[characterIndex++]; + else + currentCharacter++; + + glyphList.Add(new Glyph(currentCharacter, bitmap, rectangle)); + + LineSpacing = Math.Max(LineSpacing, rectangle.Height); + } + + // If the bitmap doesn't already have an alpha channel, create one now. + if (BitmapUtils.IsAlphaEntirely(255, bitmap)) + { + BitmapUtils.ConvertGreyToAlpha(bitmap); + } + } + + + // Searches a 2D bitmap for characters that are surrounded by a marker pink color. + static IEnumerable FindGlyphs(Bitmap bitmap) + { + using (var bitmapData = new BitmapUtils.PixelAccessor(bitmap, ImageLockMode.ReadOnly)) + { + for (int y = 1; y < bitmap.Height; y++) + { + for (int x = 1; x < bitmap.Width; x++) + { + // Look for the top left corner of a character (a pixel that is not pink, but was pink immediately to the left and above it) + if (!IsMarkerColor(bitmapData[x, y]) && + IsMarkerColor(bitmapData[x - 1, y]) && + IsMarkerColor(bitmapData[x, y - 1])) + { + // Measure the size of this character. + int w = 1, h = 1; + + while ((x + w < bitmap.Width) && !IsMarkerColor(bitmapData[x + w, y])) + { + w++; + } + + while ((y + h < bitmap.Height) && !IsMarkerColor(bitmapData[x, y + h])) + { + h++; + } + + yield return new Rectangle(x, y, w, h); + } + } + } + } + } + + + // Checks whether a color is the magic magenta marker value. + static bool IsMarkerColor(Color color) + { + return color.ToArgb() == Color.Magenta.ToArgb(); + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/BitmapUtils.cs b/Sdk/External/DirectXTK/MakeSpriteFont/BitmapUtils.cs new file mode 100644 index 0000000..e9a9f5e --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/BitmapUtils.cs @@ -0,0 +1,240 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.Drawing; +using System.Drawing.Imaging; +using System.Runtime.InteropServices; + +namespace MakeSpriteFont +{ + // Assorted helpers for doing useful things with bitmaps. + public static class BitmapUtils + { + // Copies a rectangular area from one bitmap to another. + public static void CopyRect(Bitmap source, Rectangle sourceRegion, Bitmap output, Rectangle outputRegion) + { + if (sourceRegion.Width != outputRegion.Width || + sourceRegion.Height != outputRegion.Height) + { + throw new ArgumentException(); + } + + using (var sourceData = new PixelAccessor(source, ImageLockMode.ReadOnly, sourceRegion)) + using (var outputData = new PixelAccessor(output, ImageLockMode.WriteOnly, outputRegion)) + { + for (int y = 0; y < sourceRegion.Height; y++) + { + for (int x = 0; x < sourceRegion.Width; x++) + { + outputData[x, y] = sourceData[x, y]; + } + } + } + } + + + // Checks whether an area of a bitmap contains entirely the specified alpha value. + public static bool IsAlphaEntirely(byte expectedAlpha, Bitmap bitmap, Rectangle? region = null) + { + using (var bitmapData = new PixelAccessor(bitmap, ImageLockMode.ReadOnly, region)) + { + for (int y = 0; y < bitmapData.Region.Height; y++) + { + for (int x = 0; x < bitmapData.Region.Width; x++) + { + byte alpha = bitmapData[x, y].A; + + if (alpha != expectedAlpha) + return false; + } + } + } + + return true; + } + + + // Checks whether a bitmap contains entirely the specified RGB value. + public static bool IsRgbEntirely(Color expectedRgb, Bitmap bitmap) + { + using (var bitmapData = new PixelAccessor(bitmap, ImageLockMode.ReadOnly)) + { + for (int y = 0; y < bitmap.Height; y++) + { + for (int x = 0; x < bitmap.Width; x++) + { + Color color = bitmapData[x, y]; + + if (color.A == 0) + continue; + + if ((color.R != expectedRgb.R) || + (color.G != expectedRgb.G) || + (color.B != expectedRgb.B)) + { + return false; + } + } + } + } + + return true; + } + + + // Converts greyscale luminosity to alpha data. + public static void ConvertGreyToAlpha(Bitmap bitmap) + { + using (var bitmapData = new PixelAccessor(bitmap, ImageLockMode.ReadWrite)) + { + for (int y = 0; y < bitmap.Height; y++) + { + for (int x = 0; x < bitmap.Width; x++) + { + Color color = bitmapData[x, y]; + + // Average the red, green and blue values to compute brightness. + int alpha = (color.R + color.G + color.B) / 3; + + bitmapData[x, y] = Color.FromArgb(alpha, 255, 255, 255); + } + } + } + } + + + // Converts a bitmap to premultiplied alpha format. + public static void PremultiplyAlpha(Bitmap bitmap) + { + using (var bitmapData = new PixelAccessor(bitmap, ImageLockMode.ReadWrite)) + { + for (int y = 0; y < bitmap.Height; y++) + { + for (int x = 0; x < bitmap.Width; x++) + { + Color color = bitmapData[x, y]; + + int a = color.A; + int r = color.R * a / 255; + int g = color.G * a / 255; + int b = color.B * a / 255; + + bitmapData[x, y] = Color.FromArgb(a, r, g, b); + } + } + } + } + + + // To avoid filtering artifacts when scaling or rotating fonts that do not use premultiplied alpha, + // make sure the one pixel border around each glyph contains the same RGB values as the edge of the + // glyph itself, but with zero alpha. This processing is an elaborate no-op when using premultiplied + // alpha, because the premultiply conversion will change the RGB of all such zero alpha pixels to black. + public static void PadBorderPixels(Bitmap bitmap, Rectangle region) + { + using (var bitmapData = new PixelAccessor(bitmap, ImageLockMode.ReadWrite)) + { + // Pad the top and bottom. + for (int x = region.Left; x < region.Right; x++) + { + CopyBorderPixel(bitmapData, x, region.Top, x, region.Top - 1); + CopyBorderPixel(bitmapData, x, region.Bottom - 1, x, region.Bottom); + } + + // Pad the left and right. + for (int y = region.Top; y < region.Bottom; y++) + { + CopyBorderPixel(bitmapData, region.Left, y, region.Left - 1, y); + CopyBorderPixel(bitmapData, region.Right - 1, y, region.Right, y); + } + + // Pad the four corners. + CopyBorderPixel(bitmapData, region.Left, region.Top, region.Left - 1, region.Top - 1); + CopyBorderPixel(bitmapData, region.Right - 1, region.Top, region.Right, region.Top - 1); + CopyBorderPixel(bitmapData, region.Left, region.Bottom - 1, region.Left - 1, region.Bottom); + CopyBorderPixel(bitmapData, region.Right - 1, region.Bottom - 1, region.Right, region.Bottom); + } + } + + + // Copies a single pixel within a bitmap, preserving RGB but forcing alpha to zero. + static void CopyBorderPixel(PixelAccessor bitmapData, int sourceX, int sourceY, int destX, int destY) + { + Color color = bitmapData[sourceX, sourceY]; + + bitmapData[destX, destY] = Color.FromArgb(0, color); + } + + + // Converts a bitmap to the specified pixel format. + public static Bitmap ChangePixelFormat(Bitmap bitmap, PixelFormat format) + { + Rectangle bounds = new Rectangle(0, 0, bitmap.Width, bitmap.Height); + + return bitmap.Clone(bounds, format); + } + + + // Helper for locking a bitmap and efficiently reading or writing its pixels. + public sealed class PixelAccessor : IDisposable + { + // Constructor locks the bitmap. + public PixelAccessor(Bitmap bitmap, ImageLockMode mode, Rectangle? region = null) + { + this.bitmap = bitmap; + + this.Region = region.GetValueOrDefault(new Rectangle(0, 0, bitmap.Width, bitmap.Height)); + + this.data = bitmap.LockBits(Region, mode, PixelFormat.Format32bppArgb); + } + + + // Dispose unlocks the bitmap. + public void Dispose() + { + if (data != null) + { + bitmap.UnlockBits(data); + + data = null; + } + } + + + // Query what part of the bitmap is locked. + public Rectangle Region { get; private set; } + + + // Get or set a pixel value. + public Color this[int x, int y] + { + get + { + return Color.FromArgb(Marshal.ReadInt32(PixelAddress(x, y))); + } + + set + { + Marshal.WriteInt32(PixelAddress(x, y), value.ToArgb()); + } + } + + + // Helper computes the address of the specified pixel. + IntPtr PixelAddress(int x, int y) + { + return data.Scan0 + (y * data.Stride) + (x * sizeof(int)); + } + + + // Fields. + Bitmap bitmap; + BitmapData data; + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/CharacterRegion.cs b/Sdk/External/DirectXTK/MakeSpriteFont/CharacterRegion.cs new file mode 100644 index 0000000..e962605 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/CharacterRegion.cs @@ -0,0 +1,133 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.Linq; +using System.ComponentModel; +using System.Globalization; +using System.Collections.Generic; + +namespace MakeSpriteFont +{ + // Describes a range of consecutive characters that should be included in the font. + [TypeConverter(typeof(CharacterRegionTypeConverter))] + public class CharacterRegion + { + // Constructor. + public CharacterRegion(char start, char end) + { + if (start > end) + throw new ArgumentException(); + + this.Start = start; + this.End = end; + } + + + // Fields. + public char Start; + public char End; + + + // Enumerates all characters within the region. + public IEnumerable Characters + { + get + { + for (char c = Start; c <= End; c++) + { + yield return c; + } + } + } + + + // Flattens a list of character regions into a combined list of individual characters. + public static IEnumerable Flatten(IEnumerable regions) + { + if (regions.Any()) + { + // If we have any regions, flatten them and remove duplicates. + return regions.SelectMany(region => region.Characters).Distinct(); + } + else + { + // If no regions were specified, use the default. + return defaultRegion.Characters; + } + } + + + // Default to just the base ASCII character set. + static CharacterRegion defaultRegion = new CharacterRegion(' ', '~'); + } + + + + // Custom type converter enables CommandLineParser to parse CharacterRegion command line options. + public class CharacterRegionTypeConverter : TypeConverter + { + public override bool CanConvertFrom(ITypeDescriptorContext context, Type sourceType) + { + return sourceType == typeof(string); + } + + + public override object ConvertFrom(ITypeDescriptorContext context, CultureInfo culture, object value) + { + // Input must be a string. + string source = value as string; + + if (string.IsNullOrEmpty(source)) + { + throw new ArgumentException(); + } + + // Supported input formats: + // A + // A-Z + // 32-127 + // 0x20-0x7F + + char[] split = source.Split('-') + .Select(ConvertCharacter) + .ToArray(); + + switch (split.Length) + { + case 1: + // Only a single character (eg. "a"). + return new CharacterRegion(split[0], split[0]); + + case 2: + // Range of characters (eg. "a-z"). + return new CharacterRegion(split[0], split[1]); + + default: + throw new ArgumentException(); + } + } + + + static char ConvertCharacter(string value) + { + if (value.Length == 1) + { + // Single character directly specifies a codepoint. + return value[0]; + } + else + { + // Otherwise it must be an integer (eg. "32" or "0x20"). + return (char)(int)intConverter.ConvertFromInvariantString(value); + } + } + + + static TypeConverter intConverter = TypeDescriptor.GetConverter(typeof(int)); + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/CommandLineOptions.cs b/Sdk/External/DirectXTK/MakeSpriteFont/CommandLineOptions.cs new file mode 100644 index 0000000..a6d5f8b --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/CommandLineOptions.cs @@ -0,0 +1,95 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System.Collections.Generic; +using System.Drawing; + +namespace MakeSpriteFont +{ + // Available output texture formats. + public enum TextureFormat + { + Auto, + Rgba32, + Bgra4444, + CompressedMono, + } + + + // Feature levels + public enum FeatureLevel + { + FL9_1, + FL9_2, + FL9_3, + FL10_0, + FL10_1, + FL11_0, + FL11_1, + FL12_0, + FL12_1, + } + + + // Options telling the tool what to do. + public class CommandLineOptions + { + // Input can be either a system (TrueType) font or a specially marked bitmap file. + [CommandLineParser.Required] + public string SourceFont; + + + // Output spritefont binary. + [CommandLineParser.Required] + public string OutputFile; + + + // Which characters to include in the font (eg. "/CharacterRegion:0x20-0x7F /CharacterRegion:0x123") + [CommandLineParser.Name("CharacterRegion")] + public readonly List CharacterRegions = new List(); + + + // Fallback character used when asked to render a codepoint that is not + // included in the font. If zero, missing characters throw exceptions. + public readonly int DefaultCharacter = 0; + + + // Size and style for TrueType fonts (ignored when converting a bitmap font). + public float FontSize = 23; + + public FontStyle FontStyle = FontStyle.Regular; + + + // Spacing overrides. Zero is default spacing, negative closer together, positive further apart. + public float LineSpacing = 0; + public float CharacterSpacing = 0; + + + // Use smooth or sharp antialiasing mode for TrueType rasterization? + public bool Sharp = false; + + + // What format should the output texture be? + public TextureFormat TextureFormat = TextureFormat.Auto; + + + // By default, font textures use premultiplied alpha format. Set this if you want interpolative alpha instead. + public bool NoPremultiply = false; + + + // Dumps the generated sprite texture to a bitmap file (useful for debugging). + public string DebugOutputSpriteSheet = null; + + + // Controls texture-size based warnings + public FeatureLevel FeatureLevel = FeatureLevel.FL9_1; + + + // For large fonts, the default tightest pack is too slow + public bool FastPack = false; + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/CommandLineParser.cs b/Sdk/External/DirectXTK/MakeSpriteFont/CommandLineParser.cs new file mode 100644 index 0000000..a10e991 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/CommandLineParser.cs @@ -0,0 +1,249 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.IO; +using System.Linq; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.Reflection; +using System.ComponentModel; + +namespace MakeSpriteFont +{ + // Reusable, reflection based helper for parsing commandline options. + public class CommandLineParser + { + object optionsObject; + + Queue requiredOptions = new Queue(); + Dictionary optionalOptions = new Dictionary(); + + List requiredUsageHelp = new List(); + List optionalUsageHelp = new List(); + + + // Constructor. + public CommandLineParser(object optionsObject) + { + this.optionsObject = optionsObject; + + // Reflect to find what commandline options are available. + foreach (FieldInfo field in optionsObject.GetType().GetFields()) + { + string fieldName = GetOptionName(field); + + if (GetAttribute(field) != null) + { + // Record a required option. + requiredOptions.Enqueue(field); + + requiredUsageHelp.Add(string.Format("<{0}>", fieldName)); + } + else + { + // Record an optional option. + optionalOptions.Add(fieldName.ToLowerInvariant(), field); + + if (field.FieldType == typeof(bool)) + { + optionalUsageHelp.Add(string.Format("/{0}", fieldName)); + } + else + { + optionalUsageHelp.Add(string.Format("/{0}:value", fieldName)); + } + } + } + } + + + public bool ParseCommandLine(string[] args) + { + // Parse each argument in turn. + foreach (string arg in args) + { + if (!ParseArgument(arg.Trim())) + { + return false; + } + } + + // Make sure we got all the required options. + FieldInfo missingRequiredOption = requiredOptions.FirstOrDefault(field => !IsList(field) || GetList(field).Count == 0); + + if (missingRequiredOption != null) + { + ShowError("Missing argument '{0}'", GetOptionName(missingRequiredOption)); + return false; + } + + return true; + } + + + bool ParseArgument(string arg) + { + if (arg.StartsWith("/")) + { + // Parse an optional argument. + char[] separators = { ':' }; + + string[] split = arg.Substring(1).Split(separators, 2, StringSplitOptions.None); + + string name = split[0]; + string value = (split.Length > 1) ? split[1] : "true"; + + FieldInfo field; + + if (!optionalOptions.TryGetValue(name.ToLowerInvariant(), out field)) + { + ShowError("Unknown option '{0}'", name); + return false; + } + + return SetOption(field, value); + } + else + { + // Parse a required argument. + if (requiredOptions.Count == 0) + { + ShowError("Too many arguments"); + return false; + } + + FieldInfo field = requiredOptions.Peek(); + + if (!IsList(field)) + { + requiredOptions.Dequeue(); + } + + return SetOption(field, arg); + } + } + + + bool SetOption(FieldInfo field, string value) + { + try + { + if (IsList(field)) + { + // Append this value to a list of options. + GetList(field).Add(ChangeType(value, ListElementType(field))); + } + else + { + // Set the value of a single option. + field.SetValue(optionsObject, ChangeType(value, field.FieldType)); + } + + return true; + } + catch + { + ShowError("Invalid value '{0}' for option '{1}'", value, GetOptionName(field)); + return false; + } + } + + + static object ChangeType(string value, Type type) + { + TypeConverter converter = TypeDescriptor.GetConverter(type); + + return converter.ConvertFromInvariantString(value); + } + + + static bool IsList(FieldInfo field) + { + return typeof(IList).IsAssignableFrom(field.FieldType); + } + + + IList GetList(FieldInfo field) + { + return (IList)field.GetValue(optionsObject); + } + + + static Type ListElementType(FieldInfo field) + { + var interfaces = from i in field.FieldType.GetInterfaces() + where i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>) + select i; + + return interfaces.First().GetGenericArguments()[0]; + } + + + static string GetOptionName(FieldInfo field) + { + var nameAttribute = GetAttribute(field); + + if (nameAttribute != null) + { + return nameAttribute.Name; + } + else + { + return field.Name; + } + } + + + void ShowError(string message, params object[] args) + { + string name = Path.GetFileNameWithoutExtension(Process.GetCurrentProcess().ProcessName); + + Console.Error.WriteLine(message, args); + Console.Error.WriteLine(); + Console.Error.WriteLine("Usage: {0} {1}", name, string.Join(" ", requiredUsageHelp)); + + if (optionalUsageHelp.Count > 0) + { + Console.Error.WriteLine(); + Console.Error.WriteLine("Options:"); + + foreach (string optional in optionalUsageHelp) + { + Console.Error.WriteLine(" {0}", optional); + } + } + } + + + static T GetAttribute(ICustomAttributeProvider provider) where T : Attribute + { + return provider.GetCustomAttributes(typeof(T), false).OfType().FirstOrDefault(); + } + + + // Used on optionsObject fields to indicate which options are required. + [AttributeUsage(AttributeTargets.Field)] + public sealed class RequiredAttribute : Attribute + { + } + + + // Used on an optionsObject field to rename the corresponding commandline option. + [AttributeUsage(AttributeTargets.Field)] + public sealed class NameAttribute : Attribute + { + public NameAttribute(string name) + { + this.Name = name; + } + + public string Name { get; private set; } + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/Glyph.cs b/Sdk/External/DirectXTK/MakeSpriteFont/Glyph.cs new file mode 100644 index 0000000..f738549 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/Glyph.cs @@ -0,0 +1,39 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System.Drawing; + +namespace MakeSpriteFont +{ + // Represents a single character within a font. + public class Glyph + { + // Constructor. + public Glyph(char character, Bitmap bitmap, Rectangle? subrect = null) + { + this.Character = character; + this.Bitmap = bitmap; + this.Subrect = subrect.GetValueOrDefault(new Rectangle(0, 0, bitmap.Width, bitmap.Height)); + } + + + // Unicode codepoint. + public char Character; + + + // Glyph image data (may only use a portion of a larger bitmap). + public Bitmap Bitmap; + public Rectangle Subrect; + + + // Layout information. + public float XOffset; + public float YOffset; + + public float XAdvance; + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/GlyphCropper.cs b/Sdk/External/DirectXTK/MakeSpriteFont/GlyphCropper.cs new file mode 100644 index 0000000..06efbdc --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/GlyphCropper.cs @@ -0,0 +1,50 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System.Drawing; + +namespace MakeSpriteFont +{ + // Crops unused space from around the edge of a glyph bitmap. + public static class GlyphCropper + { + public static void Crop(Glyph glyph) + { + // Crop the top. + while ((glyph.Subrect.Height > 1) && BitmapUtils.IsAlphaEntirely(0, glyph.Bitmap, new Rectangle(glyph.Subrect.X, glyph.Subrect.Y, glyph.Subrect.Width, 1))) + { + glyph.Subrect.Y++; + glyph.Subrect.Height--; + + glyph.YOffset++; + } + + // Crop the bottom. + while ((glyph.Subrect.Height > 1) && BitmapUtils.IsAlphaEntirely(0, glyph.Bitmap, new Rectangle(glyph.Subrect.X, glyph.Subrect.Bottom - 1, glyph.Subrect.Width, 1))) + { + glyph.Subrect.Height--; + } + + // Crop the left. + while ((glyph.Subrect.Width > 1) && BitmapUtils.IsAlphaEntirely(0, glyph.Bitmap, new Rectangle(glyph.Subrect.X, glyph.Subrect.Y, 1, glyph.Subrect.Height))) + { + glyph.Subrect.X++; + glyph.Subrect.Width--; + + glyph.XOffset++; + } + + // Crop the right. + while ((glyph.Subrect.Width > 1) && BitmapUtils.IsAlphaEntirely(0, glyph.Bitmap, new Rectangle(glyph.Subrect.Right - 1, glyph.Subrect.Y, 1, glyph.Subrect.Height))) + { + glyph.Subrect.Width--; + + glyph.XAdvance++; + } + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/GlyphPacker.cs b/Sdk/External/DirectXTK/MakeSpriteFont/GlyphPacker.cs new file mode 100644 index 0000000..c30275b --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/GlyphPacker.cs @@ -0,0 +1,281 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.Collections.Generic; +using System.Drawing; +using System.Drawing.Imaging; + +namespace MakeSpriteFont +{ + // Helper for arranging many small bitmaps onto a single larger surface. + public static class GlyphPacker + { + public static Bitmap ArrangeGlyphsFast(Glyph[] sourceGlyphs) + { + // Build up a list of all the glyphs needing to be arranged. + List glyphs = new List(); + + int largestWidth = 1; + int largestHeight = 1; + + for (int i = 0; i < sourceGlyphs.Length; i++) + { + ArrangedGlyph glyph = new ArrangedGlyph(); + + glyph.Source = sourceGlyphs[i]; + + // Leave a one pixel border around every glyph in the output bitmap. + glyph.Width = sourceGlyphs[i].Subrect.Width + 2; + glyph.Height = sourceGlyphs[i].Subrect.Height + 2; + + if (glyph.Width > largestWidth) + largestWidth = glyph.Width; + + if (glyph.Height > largestHeight) + largestHeight = glyph.Height; + + glyphs.Add(glyph); + } + + // Work out how big the output bitmap should be. + int outputWidth = GuessOutputWidth(sourceGlyphs); + + // Place each glyph in a grid based on the largest glyph size + int curx = 0; + int cury = 0; + + for (int i = 0; i < glyphs.Count; i++) + { + glyphs[i].X = curx; + glyphs[i].Y = cury; + + curx += largestWidth; + + if (curx + largestWidth > outputWidth) + { + curx = 0; + cury += largestHeight; + } + } + + // Create the merged output bitmap. + int outputHeight = MakeValidTextureSize(cury + largestHeight, false); + + return CopyGlyphsToOutput(glyphs, outputWidth, outputHeight); + } + + public static Bitmap ArrangeGlyphs(Glyph[] sourceGlyphs) + { + // Build up a list of all the glyphs needing to be arranged. + List glyphs = new List(); + + for (int i = 0; i < sourceGlyphs.Length; i++) + { + ArrangedGlyph glyph = new ArrangedGlyph(); + + glyph.Source = sourceGlyphs[i]; + + // Leave a one pixel border around every glyph in the output bitmap. + glyph.Width = sourceGlyphs[i].Subrect.Width + 2; + glyph.Height = sourceGlyphs[i].Subrect.Height + 2; + + glyphs.Add(glyph); + } + + // Sort so the largest glyphs get arranged first. + glyphs.Sort(CompareGlyphSizes); + + // Work out how big the output bitmap should be. + int outputWidth = GuessOutputWidth(sourceGlyphs); + int outputHeight = 0; + + // Choose positions for each glyph, one at a time. + for (int i = 0; i < glyphs.Count; i++) + { + if (i > 0 && (i % 500) == 0) + { + Console.Write("."); + } + + PositionGlyph(glyphs, i, outputWidth); + + outputHeight = Math.Max(outputHeight, glyphs[i].Y + glyphs[i].Height); + } + + if (glyphs.Count >= 500) + { + Console.WriteLine(); + } + + // Create the merged output bitmap. + outputHeight = MakeValidTextureSize(outputHeight, false); + + return CopyGlyphsToOutput(glyphs, outputWidth, outputHeight); + } + + + // Once arranging is complete, copies each glyph to its chosen position in the single larger output bitmap. + static Bitmap CopyGlyphsToOutput(List glyphs, int width, int height) + { + Bitmap output = new Bitmap(width, height, PixelFormat.Format32bppArgb); + + int usedPixels = 0; + + foreach (ArrangedGlyph glyph in glyphs) + { + Glyph sourceGlyph = glyph.Source; + Rectangle sourceRegion = sourceGlyph.Subrect; + Rectangle destinationRegion = new Rectangle(glyph.X + 1, glyph.Y + 1, sourceRegion.Width, sourceRegion.Height); + + BitmapUtils.CopyRect(sourceGlyph.Bitmap, sourceRegion, output, destinationRegion); + + BitmapUtils.PadBorderPixels(output, destinationRegion); + + sourceGlyph.Bitmap = output; + sourceGlyph.Subrect = destinationRegion; + + usedPixels += (glyph.Width * glyph.Height); + } + + float utilization = ( (float)usedPixels / (float)(width * height) ) * 100; + + Console.WriteLine("Packing efficiency {0}%", utilization ); + + return output; + } + + + // Internal helper class keeps track of a glyph while it is being arranged. + class ArrangedGlyph + { + public Glyph Source; + + public int X; + public int Y; + + public int Width; + public int Height; + } + + + // Works out where to position a single glyph. + static void PositionGlyph(List glyphs, int index, int outputWidth) + { + int x = 0; + int y = 0; + + while (true) + { + // Is this position free for us to use? + int intersects = FindIntersectingGlyph(glyphs, index, x, y); + + if (intersects < 0) + { + glyphs[index].X = x; + glyphs[index].Y = y; + + return; + } + + // Skip past the existing glyph that we collided with. + x = glyphs[intersects].X + glyphs[intersects].Width; + + // If we ran out of room to move to the right, try the next line down instead. + if (x + glyphs[index].Width > outputWidth) + { + x = 0; + y++; + } + } + } + + + // Checks if a proposed glyph position collides with anything that we already arranged. + static int FindIntersectingGlyph(List glyphs, int index, int x, int y) + { + int w = glyphs[index].Width; + int h = glyphs[index].Height; + + for (int i = 0; i < index; i++) + { + if (glyphs[i].X >= x + w) + continue; + + if (glyphs[i].X + glyphs[i].Width <= x) + continue; + + if (glyphs[i].Y >= y + h) + continue; + + if (glyphs[i].Y + glyphs[i].Height <= y) + continue; + + return i; + } + + return -1; + } + + + // Comparison function for sorting glyphs by size. + static int CompareGlyphSizes(ArrangedGlyph a, ArrangedGlyph b) + { + const int heightWeight = 1024; + + int aSize = a.Height * heightWeight + a.Width; + int bSize = b.Height * heightWeight + b.Width; + + if (aSize != bSize) + return bSize.CompareTo(aSize); + else + return a.Source.Character.CompareTo(b.Source.Character); + } + + + // Heuristic guesses what might be a good output width for a list of glyphs. + static int GuessOutputWidth(Glyph[] sourceGlyphs) + { + int maxWidth = 0; + int totalSize = 0; + + foreach (Glyph glyph in sourceGlyphs) + { + maxWidth = Math.Max(maxWidth, glyph.Subrect.Width); + totalSize += glyph.Subrect.Width * glyph.Subrect.Height; + } + + int width = Math.Max((int)Math.Sqrt(totalSize), maxWidth); + + return MakeValidTextureSize(width, true); + } + + + // Rounds a value up to the next larger valid texture size. + static int MakeValidTextureSize(int value, bool requirePowerOfTwo) + { + // In case we want to DXT compress, make sure the size is a multiple of 4. + const int blockSize = 4; + + if (requirePowerOfTwo) + { + // Round up to a power of two. + int powerOfTwo = blockSize; + + while (powerOfTwo < value) + powerOfTwo <<= 1; + + return powerOfTwo; + } + else + { + // Round up to the specified block size. + return (value + blockSize - 1) & ~(blockSize - 1); + } + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/IFontImporter.cs b/Sdk/External/DirectXTK/MakeSpriteFont/IFontImporter.cs new file mode 100644 index 0000000..5af1d95 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/IFontImporter.cs @@ -0,0 +1,21 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System.Collections.Generic; + +namespace MakeSpriteFont +{ + // Importer interface allows the conversion tool to support multiple source font formats. + public interface IFontImporter + { + void Import(CommandLineOptions options); + + IEnumerable Glyphs { get; } + + float LineSpacing { get; } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/MakeSpriteFont.csproj b/Sdk/External/DirectXTK/MakeSpriteFont/MakeSpriteFont.csproj new file mode 100644 index 0000000..c35145f --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/MakeSpriteFont.csproj @@ -0,0 +1,60 @@ + + + + Debug + AnyCPU + 8.0.30703 + 2.0 + {7329B02D-C504-482A-A156-181D48CE493C} + Exe + Properties + MakeSpriteFont + MakeSpriteFont + v4.0 + Client + 512 + + + true + bin\Debug\ + DEBUG;TRACE + full + AnyCPU + prompt + + + bin\Release\ + TRACE + true + pdbonly + AnyCPU + prompt + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/Program.cs b/Sdk/External/DirectXTK/MakeSpriteFont/Program.cs new file mode 100644 index 0000000..98e39ed --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/Program.cs @@ -0,0 +1,183 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.IO; +using System.Linq; +using System.Drawing; + +namespace MakeSpriteFont +{ + public class Program + { + public static int Main(string[] args) + { + // Parse the commandline options. + var options = new CommandLineOptions(); + var parser = new CommandLineParser(options); + + if (!parser.ParseCommandLine(args)) + return 1; + + try + { + // Convert the font. + MakeSpriteFont(options); + + return 0; + } + catch (Exception e) + { + // Print an error message if conversion failed. + Console.WriteLine(); + Console.Error.WriteLine("Error: {0}", e.Message); + + return 1; + } + } + + + static void MakeSpriteFont(CommandLineOptions options) + { + // Import. + Console.WriteLine("Importing {0}", options.SourceFont); + + float lineSpacing; + + Glyph[] glyphs = ImportFont(options, out lineSpacing); + + Console.WriteLine("Captured {0} glyphs", glyphs.Length); + + // Optimize. + Console.WriteLine("Cropping glyph borders"); + + foreach (Glyph glyph in glyphs) + { + GlyphCropper.Crop(glyph); + } + + Console.WriteLine("Packing glyphs into sprite sheet"); + + Bitmap bitmap; + + if (options.FastPack) + { + bitmap = GlyphPacker.ArrangeGlyphsFast(glyphs); + } + else + { + bitmap = GlyphPacker.ArrangeGlyphs(glyphs); + } + + // Emit texture size warning based on known Feature Level limits. + if (bitmap.Width > 16384 || bitmap.Height > 16384) + { + Console.WriteLine("WARNING: Resulting texture is too large for all known Feature Levels (9.1 - 12.1)"); + } + else if (bitmap.Width > 8192 || bitmap.Height > 8192) + { + if (options.FeatureLevel < FeatureLevel.FL11_0) + { + Console.WriteLine("WARNING: Resulting texture requires a Feature Level 11.0 or later device."); + } + } + else if (bitmap.Width > 4096 || bitmap.Height > 4096) + { + if (options.FeatureLevel < FeatureLevel.FL10_0) + { + Console.WriteLine("WARNING: Resulting texture requires a Feature Level 10.0 or later device."); + } + } + else if (bitmap.Width > 2048 || bitmap.Height > 2048) + { + if (options.FeatureLevel < FeatureLevel.FL9_3) + { + Console.WriteLine("WARNING: Resulting texture requires a Feature Level 9.3 or later device."); + } + } + + // Adjust line and character spacing. + lineSpacing += options.LineSpacing; + + foreach (Glyph glyph in glyphs) + { + glyph.XAdvance += options.CharacterSpacing; + } + + // Automatically detect whether this is a monochromatic or color font? + if (options.TextureFormat == TextureFormat.Auto) + { + bool isMono = BitmapUtils.IsRgbEntirely(Color.White, bitmap); + + options.TextureFormat = isMono ? TextureFormat.CompressedMono : + TextureFormat.Rgba32; + } + + // Convert to premultiplied alpha format. + if (!options.NoPremultiply) + { + Console.WriteLine("Premultiplying alpha"); + + BitmapUtils.PremultiplyAlpha(bitmap); + } + + // Save output files. + if (!string.IsNullOrEmpty(options.DebugOutputSpriteSheet)) + { + Console.WriteLine("Saving debug output spritesheet {0}", options.DebugOutputSpriteSheet); + + bitmap.Save(options.DebugOutputSpriteSheet); + } + + Console.WriteLine("Writing {0} ({1} format)", options.OutputFile, options.TextureFormat); + + SpriteFontWriter.WriteSpriteFont(options, glyphs, lineSpacing, bitmap); + } + + + static Glyph[] ImportFont(CommandLineOptions options, out float lineSpacing) + { + // Which importer knows how to read this source font? + IFontImporter importer; + + string fileExtension = Path.GetExtension(options.SourceFont).ToLowerInvariant(); + + string[] BitmapFileExtensions = { ".bmp", ".png", ".gif" }; + + if (BitmapFileExtensions.Contains(fileExtension)) + { + importer = new BitmapImporter(); + } + else + { + importer = new TrueTypeImporter(); + } + + // Import the source font data. + importer.Import(options); + + lineSpacing = importer.LineSpacing; + + var glyphs = importer.Glyphs + .OrderBy(glyph => glyph.Character) + .ToArray(); + + // Validate. + if (glyphs.Length == 0) + { + throw new Exception("Font does not contain any glyphs."); + } + + if ((options.DefaultCharacter != 0) && !glyphs.Any(glyph => glyph.Character == options.DefaultCharacter)) + { + throw new Exception("The specified DefaultCharacter is not part of this font."); + } + + return glyphs; + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/Properties/AssemblyInfo.cs b/Sdk/External/DirectXTK/MakeSpriteFont/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..8ef5762 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/Properties/AssemblyInfo.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +[assembly: AssemblyTitle("MakeSpriteFont")] +[assembly: AssemblyDescription(".spritefont creator command-line tool")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("Microsoft Corporation")] +[assembly: AssemblyProduct("DirectX Tool Kit")] +[assembly: AssemblyCopyright("Copyright (c) Microsoft Corporation. All rights reserved")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] +[assembly: ComVisible(false)] +[assembly: Guid("12c0da00-f622-41f2-ab8f-1b4e19aa2a6f")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/SpriteFontWriter.cs b/Sdk/External/DirectXTK/MakeSpriteFont/SpriteFontWriter.cs new file mode 100644 index 0000000..fb16b32 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/SpriteFontWriter.cs @@ -0,0 +1,268 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.IO; +using System.Drawing; +using System.Drawing.Imaging; + +namespace MakeSpriteFont +{ + // Writes the output spritefont binary file. + public static class SpriteFontWriter + { + const string spriteFontMagic = "DXTKfont"; + + const int DXGI_FORMAT_R8G8B8A8_UNORM = 28; + const int DXGI_FORMAT_B4G4R4A4_UNORM = 115; + const int DXGI_FORMAT_BC2_UNORM = 74; + + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Usage", "CA2202:Do not dispose objects multiple times")] + public static void WriteSpriteFont(CommandLineOptions options, Glyph[] glyphs, float lineSpacing, Bitmap bitmap) + { + using (FileStream file = File.OpenWrite(options.OutputFile)) + using (BinaryWriter writer = new BinaryWriter(file)) + { + WriteMagic(writer); + WriteGlyphs(writer, glyphs); + + writer.Write(lineSpacing); + writer.Write(options.DefaultCharacter); + + WriteBitmap(writer, options, bitmap); + } + } + + + static void WriteMagic(BinaryWriter writer) + { + foreach (char magic in spriteFontMagic) + { + writer.Write((byte)magic); + } + } + + + static void WriteGlyphs(BinaryWriter writer, Glyph[] glyphs) + { + writer.Write(glyphs.Length); + + foreach (Glyph glyph in glyphs) + { + writer.Write((int)glyph.Character); + + writer.Write(glyph.Subrect.Left); + writer.Write(glyph.Subrect.Top); + writer.Write(glyph.Subrect.Right); + writer.Write(glyph.Subrect.Bottom); + + writer.Write(glyph.XOffset); + writer.Write(glyph.YOffset); + writer.Write(glyph.XAdvance); + } + } + + + static void WriteBitmap(BinaryWriter writer, CommandLineOptions options, Bitmap bitmap) + { + writer.Write(bitmap.Width); + writer.Write(bitmap.Height); + + switch (options.TextureFormat) + { + case TextureFormat.Rgba32: + WriteRgba32(writer, bitmap); + break; + + case TextureFormat.Bgra4444: + WriteBgra4444(writer, bitmap); + break; + + case TextureFormat.CompressedMono: + WriteCompressedMono(writer, bitmap, options); + break; + + default: + throw new NotSupportedException(); + } + } + + + // Writes an uncompressed 32 bit font texture. + static void WriteRgba32(BinaryWriter writer, Bitmap bitmap) + { + writer.Write(DXGI_FORMAT_R8G8B8A8_UNORM); + + writer.Write(bitmap.Width * 4); + writer.Write(bitmap.Height); + + using (var bitmapData = new BitmapUtils.PixelAccessor(bitmap, ImageLockMode.ReadOnly)) + { + for (int y = 0; y < bitmap.Height; y++) + { + for (int x = 0; x < bitmap.Width; x++) + { + Color color = bitmapData[x, y]; + + writer.Write(color.R); + writer.Write(color.G); + writer.Write(color.B); + writer.Write(color.A); + } + } + } + } + + + // Writes a 16 bit font texture. + static void WriteBgra4444(BinaryWriter writer, Bitmap bitmap) + { + writer.Write(DXGI_FORMAT_B4G4R4A4_UNORM); + + writer.Write(bitmap.Width * sizeof(ushort)); + writer.Write(bitmap.Height); + + using (var bitmapData = new BitmapUtils.PixelAccessor(bitmap, ImageLockMode.ReadOnly)) + { + for (int y = 0; y < bitmap.Height; y++) + { + for (int x = 0; x < bitmap.Width; x++) + { + Color color = bitmapData[x, y]; + + int r = color.R >> 4; + int g = color.G >> 4; + int b = color.B >> 4; + int a = color.A >> 4; + + int packed = b | (g << 4) | (r << 8) | (a << 12); + + writer.Write((ushort)packed); + } + } + } + } + + + // Writes a block compressed monochromatic font texture. + static void WriteCompressedMono(BinaryWriter writer, Bitmap bitmap, CommandLineOptions options) + { + if ((bitmap.Width & 3) != 0 || + (bitmap.Height & 3) != 0) + { + throw new ArgumentException("Block compression requires texture size to be a multiple of 4."); + } + + writer.Write(DXGI_FORMAT_BC2_UNORM); + + writer.Write(bitmap.Width * 4); + writer.Write(bitmap.Height / 4); + + using (var bitmapData = new BitmapUtils.PixelAccessor(bitmap, ImageLockMode.ReadOnly)) + { + for (int y = 0; y < bitmap.Height; y += 4) + { + for (int x = 0; x < bitmap.Width; x += 4) + { + CompressBlock(writer, bitmapData, x, y, options); + } + } + } + } + + + // We want to compress our font textures, because, like, smaller is better, + // right? But a standard DXT compressor doesn't do a great job with fonts that + // are in premultiplied alpha format. Our font data is greyscale, so all of the + // RGBA channels have the same value. If one channel is compressed differently + // to another, this causes an ugly variation in brightness of the rendered text. + // Also, fonts are mostly either black or white, with grey values only used for + // antialiasing along their edges. It is very important that the black and white + // areas be accurately represented, while the precise value of grey is less + // important. + // + // Trouble is, your average DXT compressor knows nothing about these + // requirements. It will optimize to minimize a generic error metric such as + // RMS, but this will often sacrifice crisp black and white in exchange for + // needless accuracy of the antialiasing pixels, or encode RGB differently to + // alpha. UGLY! + // + // Fortunately, encoding monochrome fonts turns out to be trivial. Using DXT3, + // we can fix the end colors as black and white, which gives guaranteed exact + // encoding of the font inside and outside, plus two fractional values for edge + // antialiasing. Also, these RGB values (0, 1/3, 2/3, 1) map exactly to four of + // the possible 16 alpha values available in DXT3, so we can ensure the RGB and + // alpha channels always exactly match. + + static void CompressBlock(BinaryWriter writer, BitmapUtils.PixelAccessor bitmapData, int blockX, int blockY, CommandLineOptions options) + { + long alphaBits = 0; + int rgbBits = 0; + + int pixelCount = 0; + + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 4; x++) + { + long alpha; + int rgb; + + int value = bitmapData[blockX + x, blockY + y].A; + + if (options.NoPremultiply) + { + // If we are not premultiplied, RGB is always white and we have 4 bit alpha. + alpha = value >> 4; + rgb = 0; + } + else + { + // For premultiplied encoding, quantize the source value to 2 bit precision. + if (value < 256 / 6) + { + alpha = 0; + rgb = 1; + } + else if (value < 256 / 2) + { + alpha = 5; + rgb = 3; + } + else if (value < 256 * 5 / 6) + { + alpha = 10; + rgb = 2; + } + else + { + alpha = 15; + rgb = 0; + } + } + + // Add this pixel to the alpha and RGB bit masks. + alphaBits |= alpha << (pixelCount * 4); + rgbBits |= rgb << (pixelCount * 2); + + pixelCount++; + } + } + + // Output the alpha bit mask. + writer.Write(alphaBits); + + // Output the two endpoint colors (black and white in 5.6.5 format). + writer.Write((ushort)0xFFFF); + writer.Write((ushort)0); + + // Output the RGB bit mask. + writer.Write(rgbBits); + } + } +} diff --git a/Sdk/External/DirectXTK/MakeSpriteFont/TrueTypeImporter.cs b/Sdk/External/DirectXTK/MakeSpriteFont/TrueTypeImporter.cs new file mode 100644 index 0000000..24fed59 --- /dev/null +++ b/Sdk/External/DirectXTK/MakeSpriteFont/TrueTypeImporter.cs @@ -0,0 +1,248 @@ +// DirectXTK MakeSpriteFont tool +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Drawing; +using System.Drawing.Drawing2D; +using System.Drawing.Imaging; +using System.Drawing.Text; +using System.Runtime.InteropServices; + +namespace MakeSpriteFont +{ + // Uses System.Drawing (aka GDI+) to rasterize TrueType fonts into a series of glyph bitmaps. + public class TrueTypeImporter : IFontImporter + { + // Properties hold the imported font data. + public IEnumerable Glyphs { get; private set; } + + public float LineSpacing { get; private set; } + + + // Size of the temp surface used for GDI+ rasterization. + const int MaxGlyphSize = 1024; + + + public void Import(CommandLineOptions options) + { + // Create a bunch of GDI+ objects. + using (Font font = CreateFont(options)) + using (Brush brush = new SolidBrush(Color.White)) + using (StringFormat stringFormat = new StringFormat(StringFormatFlags.NoFontFallback)) + using (Bitmap bitmap = new Bitmap(MaxGlyphSize, MaxGlyphSize, PixelFormat.Format32bppArgb)) + using (Graphics graphics = Graphics.FromImage(bitmap)) + { + graphics.PixelOffsetMode = options.Sharp ? PixelOffsetMode.None : PixelOffsetMode.HighQuality; + graphics.InterpolationMode = InterpolationMode.HighQualityBicubic; + graphics.TextRenderingHint = TextRenderingHint.AntiAliasGridFit; + + // Which characters do we want to include? + var characters = CharacterRegion.Flatten(options.CharacterRegions); + + var glyphList = new List(); + + // Rasterize each character in turn. + int count = 0; + + foreach (char character in characters) + { + ++count; + + if (count == 500) + { + if (!options.FastPack) + { + Console.WriteLine("WARNING: capturing a large font. This may take a long time to complete and could result in too large a texture. Consider using /FastPack."); + } + Console.Write("."); + } + else if ((count % 500) == 0) + { + Console.Write("."); + } + + Glyph glyph = ImportGlyph(character, font, brush, stringFormat, bitmap, graphics); + + glyphList.Add(glyph); + } + + if (count > 500) + { + Console.WriteLine(); + } + + Glyphs = glyphList; + + // Store the font height. + LineSpacing = font.GetHeight(); + } + } + + + // Attempts to instantiate the requested GDI+ font object. + static Font CreateFont(CommandLineOptions options) + { + Font font = new Font(options.SourceFont, PointsToPixels(options.FontSize), options.FontStyle, GraphicsUnit.Pixel); + + try + { + // The font constructor automatically substitutes fonts if it can't find the one requested. + // But we prefer the caller to know if anything is wrong with their data. A simple string compare + // isn't sufficient because some fonts (eg. MS Mincho) change names depending on the locale. + + // Early out: in most cases the name will match the current or invariant culture. + if (options.SourceFont.Equals(font.FontFamily.GetName(CultureInfo.CurrentCulture.LCID), StringComparison.OrdinalIgnoreCase) || + options.SourceFont.Equals(font.FontFamily.GetName(CultureInfo.InvariantCulture.LCID), StringComparison.OrdinalIgnoreCase)) + { + return font; + } + + // Check the font name in every culture. + foreach (CultureInfo culture in CultureInfo.GetCultures(CultureTypes.SpecificCultures)) + { + if (options.SourceFont.Equals(font.FontFamily.GetName(culture.LCID), StringComparison.OrdinalIgnoreCase)) + { + return font; + } + } + + // A font substitution must have occurred. + throw new Exception(string.Format("Can't find font '{0}'.", options.SourceFont)); + } + catch + { + font.Dispose(); + throw; + } + } + + + // Converts a font size from points to pixels. Can't just let GDI+ do this for us, + // because we want identical results on every machine regardless of system DPI settings. + static float PointsToPixels(float points) + { + return points * 96 / 72; + } + + + // Rasterizes a single character glyph. + static Glyph ImportGlyph(char character, Font font, Brush brush, StringFormat stringFormat, Bitmap bitmap, Graphics graphics) + { + string characterString = character.ToString(); + + // Measure the size of this character. + SizeF size = graphics.MeasureString(characterString, font, Point.Empty, stringFormat); + + int characterWidth = (int)Math.Ceiling(size.Width); + int characterHeight = (int)Math.Ceiling(size.Height); + + // Pad to make sure we capture any overhangs (negative ABC spacing, etc.) + int padWidth = characterWidth; + int padHeight = characterHeight / 2; + + int bitmapWidth = characterWidth + padWidth * 2; + int bitmapHeight = characterHeight + padHeight * 2; + + if (bitmapWidth > MaxGlyphSize || bitmapHeight > MaxGlyphSize) + throw new Exception("Excessively large glyph won't fit in my lazily implemented fixed size temp surface."); + + // Render the character. + graphics.Clear(Color.Black); + graphics.DrawString(characterString, font, brush, padWidth, padHeight, stringFormat); + graphics.Flush(); + + // Clone the newly rendered image. + Bitmap glyphBitmap = bitmap.Clone(new Rectangle(0, 0, bitmapWidth, bitmapHeight), PixelFormat.Format32bppArgb); + + BitmapUtils.ConvertGreyToAlpha(glyphBitmap); + + // Query its ABC spacing. + float? abc = GetCharacterWidth(character, font, graphics); + + // Construct the output Glyph object. + return new Glyph(character, glyphBitmap) + { + XOffset = -padWidth, + XAdvance = abc.HasValue ? padWidth - bitmapWidth + abc.Value : -padWidth, + YOffset = -padHeight, + }; + } + + + // Queries APC spacing for the specified character. + static float? GetCharacterWidth(char character, Font font, Graphics graphics) + { + // Look up the native device context and font handles. + IntPtr hdc = graphics.GetHdc(); + + try + { + IntPtr hFont = font.ToHfont(); + + try + { + // Select our font into the DC. + IntPtr oldFont = NativeMethods.SelectObject(hdc, hFont); + + try + { + // Query the character spacing. + var result = new NativeMethods.ABCFloat[1]; + + if (NativeMethods.GetCharABCWidthsFloat(hdc, character, character, result)) + { + return result[0].A + + result[0].B + + result[0].C; + } + else + { + return null; + } + } + finally + { + NativeMethods.SelectObject(hdc, oldFont); + } + } + finally + { + NativeMethods.DeleteObject(hFont); + } + } + finally + { + graphics.ReleaseHdc(hdc); + } + } + + + // Interop to the native GDI GetCharABCWidthsFloat method. + static class NativeMethods + { + [DllImport("gdi32.dll")] + public static extern IntPtr SelectObject(IntPtr hdc, IntPtr hObject); + + [DllImport("gdi32.dll")] + public static extern bool DeleteObject(IntPtr hObject); + + [DllImport("gdi32.dll", CharSet = CharSet.Unicode)] + public static extern bool GetCharABCWidthsFloat(IntPtr hdc, uint iFirstChar, uint iLastChar, [Out] ABCFloat[] lpABCF); + + + [StructLayout(LayoutKind.Sequential)] + public struct ABCFloat + { + public float A; + public float B; + public float C; + } + } + } +} diff --git a/Sdk/External/DirectXTK/README.md b/Sdk/External/DirectXTK/README.md new file mode 100644 index 0000000..37d8515 --- /dev/null +++ b/Sdk/External/DirectXTK/README.md @@ -0,0 +1,93 @@ +![DirectX Logo](https://github.com/Microsoft/DirectXTK/wiki/X_jpg.jpg) + +# DirectX Tool Kit for DirectX 11 + +http://go.microsoft.com/fwlink/?LinkId=248929 + +Copyright (c) Microsoft Corporation. All rights reserved. + +**September 30, 2020** + +This package contains the "DirectX Tool Kit", a collection of helper classes for writing Direct3D 11 C++ code for Universal Windows Platform (UWP) apps for Windows 10, Xbox One, and Win32 desktop applications for Windows 7 Service Pack 1 or later. + +This code is designed to build with Visual Studio 2017 ([15.9](https://walbourn.github.io/vs-2017-15-9-update/)), Visual Studio 2019, or clang for Windows v9 or later. It is recommended that you make use of the Windows 10 May 2020 Update SDK ([19041](https://walbourn.github.io/windows-10-may-2020-update-sdk/)). + +These components are designed to work without requiring any content from the legacy DirectX SDK. For details, see [Where is the DirectX SDK?](https://aka.ms/dxsdk). + +## Directory Layout + +* ``Inc\`` + + + Public Header Files (in the DirectX C++ namespace): + + * Audio.h - low-level audio API using XAudio2 (DirectXTK for Audio public header) + * BufferHelpers.h - C++ helpers for creating D3D resources from CPU data + * CommonStates.h - factory providing commonly used D3D state objects + * DDSTextureLoader.h - light-weight DDS file texture loader + * DirectXHelpers.h - misc C++ helpers for D3D programming + * Effects.h - set of built-in shaders for common rendering tasks + * GamePad.h - gamepad controller helper using XInput + * GeometricPrimitive.h - draws basic shapes such as cubes and spheres + * GraphicsMemory.h - helper for managing dynamic graphics memory allocation + * Keyboard.h - keyboard state tracking helper + * Model.h - draws meshes loaded from .CMO, .SDKMESH, or .VBO files + * Mouse.h - mouse helper + * PostProcess.h - set of built-in shaders for common post-processing operations + * PrimitiveBatch.h - simple and efficient way to draw user primitives + * ScreenGrab.h - light-weight screen shot saver + * SimpleMath.h - simplified C++ wrapper for DirectXMath + * SpriteBatch.h - simple & efficient 2D sprite rendering + * SpriteFont.h - bitmap based text rendering + * VertexTypes.h - structures for commonly used vertex data formats + * WICTextureLoader.h - WIC-based image file texture loader + * XboxDDSTextureLoader.h - Xbox One exclusive apps variant of DDSTextureLoader + +* ``Src\`` + + + DirectXTK source files and internal implementation headers + +* ``Audio\`` + + + DirectXTK for Audio source files and internal implementation headers + +* ``MakeSpriteFont\`` + + + Command line tool used to generate binary resources for use with SpriteFont + +* ``XWBTool\`` + + + Command line tool for building XACT-style wave banks for use with DirectXTK for Audio's WaveBank class + +# Documentation + +Documentation is available on the [GitHub wiki](https://github.com/Microsoft/DirectXTK/wiki). + +## Notices + +All content and source code for this package are subject to the terms of the [MIT License](http://opensource.org/licenses/MIT). + +For the latest version of DirectXTK, bug reports, etc. please visit the project site on [GitHub](https://github.com/microsoft/DirectXTK). + +## Release Notes + +* Starting with the June 2020 release, this library makes use of typed enum bitmask flags per the recommendation of the _C++ Standard_ section *17.5.2.1.3 Bitmask types*. This may have *breaking change* impacts to client code: + + * You cannot pass the ``0`` literal as your flags value. Instead you must make use of the appropriate default enum value: ``AudioEngine_Default``, ``SoundEffectInstance_Default``, ``ModelLoader_Clockwise``, or ``WIC_LOADER_DEFAULT``. + + * Use the enum type instead of ``DWORD`` if building up flags values locally with bitmask operations. For example, ```WIC_LOADER_FLAGS flags = WIC_LOADER_DEFAULT; if (...) flags |= WIC_LOADER_FORCE_SRGB;``` + +* The UWP projects and the VS 2019 Win10 classic desktop project include configurations for the ARM64 platform. These require VS 2017 (15.9 update) or VS 2019 to build, with the ARM64 toolset installed. + +* The ``CompileShaders.cmd`` script must have Windows-style (CRLF) line-endings. If it is changed to Linux-style (LF) line-endings, it can fail to build all the required shaders. + +## Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Trademarks + +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies. diff --git a/Sdk/External/DirectXTK/SECURITY.md b/Sdk/External/DirectXTK/SECURITY.md new file mode 100644 index 0000000..f7b8998 --- /dev/null +++ b/Sdk/External/DirectXTK/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/Src/AlignedNew.h b/Sdk/External/DirectXTK/Src/AlignedNew.h new file mode 100644 index 0000000..5a0e9c5 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/AlignedNew.h @@ -0,0 +1,64 @@ +//-------------------------------------------------------------------------------------- +// File: AlignedNew.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include + + +namespace DirectX +{ + // Derive from this to customize operator new and delete for + // types that have special heap alignment requirements. + // + // Example usage: + // + // __declspec(align(16)) struct MyAlignedType : public AlignedNew + + template + struct AlignedNew + { + // Allocate aligned memory. + static void* operator new (size_t size) + { + const size_t alignment = __alignof(TDerived); + + static_assert(alignment > 8, "AlignedNew is only useful for types with > 8 byte alignment. Did you forget a __declspec(align) on TDerived?"); + + void* ptr = _aligned_malloc(size, alignment); + + if (!ptr) + throw std::bad_alloc(); + + return ptr; + } + + + // Free aligned memory. + static void operator delete (void* ptr) + { + _aligned_free(ptr); + } + + + // Array overloads. + static void* operator new[](size_t size) + { + return operator new(size); + } + + + static void operator delete[](void* ptr) + { + operator delete(ptr); + } + }; +} diff --git a/Sdk/External/DirectXTK/Src/AlphaTestEffect.cpp b/Sdk/External/DirectXTK/Src/AlphaTestEffect.cpp new file mode 100644 index 0000000..8e65cdd --- /dev/null +++ b/Sdk/External/DirectXTK/Src/AlphaTestEffect.cpp @@ -0,0 +1,444 @@ +//-------------------------------------------------------------------------------------- +// File: AlphaTestEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; + +namespace +{ + // Constant buffer layout. Must match the shader! + struct AlphaTestEffectConstants + { + XMVECTOR diffuseColor; + XMVECTOR alphaTest; + XMVECTOR fogColor; + XMVECTOR fogVector; + XMMATRIX worldViewProj; + }; + + static_assert((sizeof(AlphaTestEffectConstants) % 16) == 0, "CB size not padded correctly"); + + + // Traits type describes our characteristics to the EffectBase template. + struct AlphaTestEffectTraits + { + using ConstantBufferType = AlphaTestEffectConstants; + + static constexpr int VertexShaderCount = 4; + static constexpr int PixelShaderCount = 4; + static constexpr int ShaderPermutationCount = 8; + }; +} + +// Internal AlphaTestEffect implementation class. +class AlphaTestEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + D3D11_COMPARISON_FUNC alphaFunction; + int referenceAlpha; + + bool vertexColorEnabled; + + EffectColor color; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneAlphaTestEffect_VSAlphaTest.inc" + #include "Shaders/Compiled/XboxOneAlphaTestEffect_VSAlphaTestNoFog.inc" + #include "Shaders/Compiled/XboxOneAlphaTestEffect_VSAlphaTestVc.inc" + #include "Shaders/Compiled/XboxOneAlphaTestEffect_VSAlphaTestVcNoFog.inc" + + #include "Shaders/Compiled/XboxOneAlphaTestEffect_PSAlphaTestLtGt.inc" + #include "Shaders/Compiled/XboxOneAlphaTestEffect_PSAlphaTestLtGtNoFog.inc" + #include "Shaders/Compiled/XboxOneAlphaTestEffect_PSAlphaTestEqNe.inc" + #include "Shaders/Compiled/XboxOneAlphaTestEffect_PSAlphaTestEqNeNoFog.inc" +#else + #include "Shaders/Compiled/AlphaTestEffect_VSAlphaTest.inc" + #include "Shaders/Compiled/AlphaTestEffect_VSAlphaTestNoFog.inc" + #include "Shaders/Compiled/AlphaTestEffect_VSAlphaTestVc.inc" + #include "Shaders/Compiled/AlphaTestEffect_VSAlphaTestVcNoFog.inc" + + #include "Shaders/Compiled/AlphaTestEffect_PSAlphaTestLtGt.inc" + #include "Shaders/Compiled/AlphaTestEffect_PSAlphaTestLtGtNoFog.inc" + #include "Shaders/Compiled/AlphaTestEffect_PSAlphaTestEqNe.inc" + #include "Shaders/Compiled/AlphaTestEffect_PSAlphaTestEqNeNoFog.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { AlphaTestEffect_VSAlphaTest, sizeof(AlphaTestEffect_VSAlphaTest) }, + { AlphaTestEffect_VSAlphaTestNoFog, sizeof(AlphaTestEffect_VSAlphaTestNoFog) }, + { AlphaTestEffect_VSAlphaTestVc, sizeof(AlphaTestEffect_VSAlphaTestVc) }, + { AlphaTestEffect_VSAlphaTestVcNoFog, sizeof(AlphaTestEffect_VSAlphaTestVcNoFog) }, +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // lt/gt + 1, // lt/gt, no fog + 2, // lt/gt, vertex color + 3, // lt/gt, vertex color, no fog + + 0, // eq/ne + 1, // eq/ne, no fog + 2, // eq/ne, vertex color + 3, // eq/ne, vertex color, no fog +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { AlphaTestEffect_PSAlphaTestLtGt, sizeof(AlphaTestEffect_PSAlphaTestLtGt) }, + { AlphaTestEffect_PSAlphaTestLtGtNoFog, sizeof(AlphaTestEffect_PSAlphaTestLtGtNoFog) }, + { AlphaTestEffect_PSAlphaTestEqNe, sizeof(AlphaTestEffect_PSAlphaTestEqNe) }, + { AlphaTestEffect_PSAlphaTestEqNeNoFog, sizeof(AlphaTestEffect_PSAlphaTestEqNeNoFog) }, +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // lt/gt + 1, // lt/gt, no fog + 0, // lt/gt, vertex color + 1, // lt/gt, vertex color, no fog + + 2, // eq/ne + 3, // eq/ne, no fog + 2, // eq/ne, vertex color + 3, // eq/ne, vertex color, no fog +}; + + +// Global pool of per-device AlphaTestEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +AlphaTestEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + alphaFunction(D3D11_COMPARISON_GREATER), + referenceAlpha(0), + vertexColorEnabled(false) +{ + static_assert(_countof(EffectBase::VertexShaderIndices) == AlphaTestEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == AlphaTestEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == AlphaTestEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == AlphaTestEffectTraits::ShaderPermutationCount, "array/max mismatch"); +} + + +int AlphaTestEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Use optimized shaders if fog is disabled. + if (!fog.enabled) + { + permutation += 1; + } + + // Support vertex coloring? + if (vertexColorEnabled) + { + permutation += 2; + } + + // Which alpha compare mode? + if (alphaFunction == D3D11_COMPARISON_EQUAL || + alphaFunction == D3D11_COMPARISON_NOT_EQUAL) + { + permutation += 4; + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void AlphaTestEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + fog.SetConstants(dirtyFlags, matrices.worldView, constants.fogVector); + + color.SetConstants(dirtyFlags, constants.diffuseColor); + + // Recompute the alpha test settings? + if (dirtyFlags & EffectDirtyFlags::AlphaTest) + { + // Convert reference alpha from 8 bit integer to 0-1 float format. + auto reference = static_cast(referenceAlpha) / 255.0f; + + // Comparison tolerance of half the 8 bit integer precision. + const float threshold = 0.5f / 255.0f; + + // What to do if the alpha comparison passes or fails. Positive accepts the pixel, negative clips it. + static const XMVECTORF32 selectIfTrue = { { { 1, -1 } } }; + static const XMVECTORF32 selectIfFalse = { { { -1, 1 } } }; + static const XMVECTORF32 selectNever = { { { -1, -1 } } }; + static const XMVECTORF32 selectAlways = { { { 1, 1 } } }; + + float compareTo; + XMVECTOR resultSelector; + + switch (alphaFunction) + { + case D3D11_COMPARISON_LESS: + // Shader will evaluate: clip((a < x) ? z : w) + compareTo = reference - threshold; + resultSelector = selectIfTrue; + break; + + case D3D11_COMPARISON_LESS_EQUAL: + // Shader will evaluate: clip((a < x) ? z : w) + compareTo = reference + threshold; + resultSelector = selectIfTrue; + break; + + case D3D11_COMPARISON_GREATER_EQUAL: + // Shader will evaluate: clip((a < x) ? z : w) + compareTo = reference - threshold; + resultSelector = selectIfFalse; + break; + + case D3D11_COMPARISON_GREATER: + // Shader will evaluate: clip((a < x) ? z : w) + compareTo = reference + threshold; + resultSelector = selectIfFalse; + break; + + case D3D11_COMPARISON_EQUAL: + // Shader will evaluate: clip((abs(a - x) < y) ? z : w) + compareTo = reference; + resultSelector = selectIfTrue; + break; + + case D3D11_COMPARISON_NOT_EQUAL: + // Shader will evaluate: clip((abs(a - x) < y) ? z : w) + compareTo = reference; + resultSelector = selectIfFalse; + break; + + case D3D11_COMPARISON_NEVER: + // Shader will evaluate: clip((a < x) ? z : w) + compareTo = 0; + resultSelector = selectNever; + break; + + case D3D11_COMPARISON_ALWAYS: + // Shader will evaluate: clip((a < x) ? z : w) + compareTo = 0; + resultSelector = selectAlways; + break; + + default: + throw std::exception("Unknown alpha test function"); + } + + // x = compareTo, y = threshold, zw = resultSelector. + constants.alphaTest = XMVectorPermute<0, 1, 4, 5>(XMVectorSet(compareTo, threshold, 0, 0), resultSelector); + + dirtyFlags &= ~EffectDirtyFlags::AlphaTest; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + + // Set the texture. + ID3D11ShaderResourceView* textures[1] = { texture.Get() }; + + deviceContext->PSSetShaderResources(0, 1, textures); + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +AlphaTestEffect::AlphaTestEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +AlphaTestEffect::AlphaTestEffect(AlphaTestEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +AlphaTestEffect& AlphaTestEffect::operator= (AlphaTestEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +AlphaTestEffect::~AlphaTestEffect() +{ +} + + +// IEffect methods. +void AlphaTestEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void AlphaTestEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV AlphaTestEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV AlphaTestEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV AlphaTestEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV AlphaTestEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +// Material settings +void XM_CALLCONV AlphaTestEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->color.diffuseColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void AlphaTestEffect::SetAlpha(float value) +{ + pImpl->color.alpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV AlphaTestEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->color.diffuseColor = value; + pImpl->color.alpha = XMVectorGetW(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +// Fog settings. +void AlphaTestEffect::SetFogEnabled(bool value) +{ + pImpl->fog.enabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogEnable; +} + + +void AlphaTestEffect::SetFogStart(float value) +{ + pImpl->fog.start = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void AlphaTestEffect::SetFogEnd(float value) +{ + pImpl->fog.end = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV AlphaTestEffect::SetFogColor(FXMVECTOR value) +{ + pImpl->constants.fogColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Vertex color setting. +void AlphaTestEffect::SetVertexColorEnabled(bool value) +{ + pImpl->vertexColorEnabled = value; +} + + +// Texture settings. +void AlphaTestEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +void AlphaTestEffect::SetAlphaFunction(D3D11_COMPARISON_FUNC value) +{ + pImpl->alphaFunction = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::AlphaTest; +} + + +void AlphaTestEffect::SetReferenceAlpha(int value) +{ + pImpl->referenceAlpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::AlphaTest; +} diff --git a/Sdk/External/DirectXTK/Src/BasicEffect.cpp b/Sdk/External/DirectXTK/Src/BasicEffect.cpp new file mode 100644 index 0000000..5108f5b --- /dev/null +++ b/Sdk/External/DirectXTK/Src/BasicEffect.cpp @@ -0,0 +1,731 @@ +//-------------------------------------------------------------------------------------- +// File: BasicEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; + +namespace +{ + // Constant buffer layout. Must match the shader! + struct BasicEffectConstants + { + XMVECTOR diffuseColor; + XMVECTOR emissiveColor; + XMVECTOR specularColorAndPower; + + XMVECTOR lightDirection[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightSpecularColor[IEffectLights::MaxDirectionalLights]; + + XMVECTOR eyePosition; + + XMVECTOR fogColor; + XMVECTOR fogVector; + + XMMATRIX world; + XMVECTOR worldInverseTranspose[3]; + XMMATRIX worldViewProj; + }; + + static_assert((sizeof(BasicEffectConstants) % 16) == 0, "CB size not padded correctly"); + + + // Traits type describes our characteristics to the EffectBase template. + struct BasicEffectTraits + { + using ConstantBufferType = BasicEffectConstants; + + static constexpr int VertexShaderCount = 32; + static constexpr int PixelShaderCount = 10; + static constexpr int ShaderPermutationCount = 56; + }; +} + +// Internal BasicEffect implementation class. +class BasicEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + bool lightingEnabled; + bool preferPerPixelLighting; + bool vertexColorEnabled; + bool textureEnabled; + bool biasedVertexNormals; + + EffectLights lights; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasic.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicNoFog.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVc.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVcNoFog.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicTx.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicTxNoFog.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicTxVc.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicTxVcNoFog.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLighting.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingVc.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingTx.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingTxVc.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLight.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightVc.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightTx.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightTxVc.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLighting.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingVc.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingTx.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingTxVc.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingVcBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingTxBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicVertexLightingTxVcBn.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightVcBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightTxBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicOneLightTxVcBn.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingVcBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingTxBn.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_VSBasicPixelLightingTxVcBn.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasic.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicNoFog.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicTx.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicTxNoFog.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicVertexLighting.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicVertexLightingNoFog.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicVertexLightingTx.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicVertexLightingTxNoFog.inc" + + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicPixelLighting.inc" + #include "Shaders/Compiled/XboxOneBasicEffect_PSBasicPixelLightingTx.inc" +#else + #include "Shaders/Compiled/BasicEffect_VSBasic.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicNoFog.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVc.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVcNoFog.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicTx.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicTxNoFog.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicTxVc.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicTxVcNoFog.inc" + + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLighting.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingVc.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingTx.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingTxVc.inc" + + #include "Shaders/Compiled/BasicEffect_VSBasicOneLight.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightVc.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightTx.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightTxVc.inc" + + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLighting.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingVc.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingTx.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingTxVc.inc" + + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingVcBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingTxBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicVertexLightingTxVcBn.inc" + + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightVcBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightTxBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicOneLightTxVcBn.inc" + + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingVcBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingTxBn.inc" + #include "Shaders/Compiled/BasicEffect_VSBasicPixelLightingTxVcBn.inc" + + #include "Shaders/Compiled/BasicEffect_PSBasic.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicNoFog.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicTx.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicTxNoFog.inc" + + #include "Shaders/Compiled/BasicEffect_PSBasicVertexLighting.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicVertexLightingNoFog.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicVertexLightingTx.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicVertexLightingTxNoFog.inc" + + #include "Shaders/Compiled/BasicEffect_PSBasicPixelLighting.inc" + #include "Shaders/Compiled/BasicEffect_PSBasicPixelLightingTx.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { BasicEffect_VSBasic, sizeof(BasicEffect_VSBasic) }, + { BasicEffect_VSBasicNoFog, sizeof(BasicEffect_VSBasicNoFog) }, + { BasicEffect_VSBasicVc, sizeof(BasicEffect_VSBasicVc) }, + { BasicEffect_VSBasicVcNoFog, sizeof(BasicEffect_VSBasicVcNoFog) }, + { BasicEffect_VSBasicTx, sizeof(BasicEffect_VSBasicTx) }, + { BasicEffect_VSBasicTxNoFog, sizeof(BasicEffect_VSBasicTxNoFog) }, + { BasicEffect_VSBasicTxVc, sizeof(BasicEffect_VSBasicTxVc) }, + { BasicEffect_VSBasicTxVcNoFog, sizeof(BasicEffect_VSBasicTxVcNoFog) }, + + { BasicEffect_VSBasicVertexLighting, sizeof(BasicEffect_VSBasicVertexLighting) }, + { BasicEffect_VSBasicVertexLightingVc, sizeof(BasicEffect_VSBasicVertexLightingVc) }, + { BasicEffect_VSBasicVertexLightingTx, sizeof(BasicEffect_VSBasicVertexLightingTx) }, + { BasicEffect_VSBasicVertexLightingTxVc, sizeof(BasicEffect_VSBasicVertexLightingTxVc) }, + + { BasicEffect_VSBasicOneLight, sizeof(BasicEffect_VSBasicOneLight) }, + { BasicEffect_VSBasicOneLightVc, sizeof(BasicEffect_VSBasicOneLightVc) }, + { BasicEffect_VSBasicOneLightTx, sizeof(BasicEffect_VSBasicOneLightTx) }, + { BasicEffect_VSBasicOneLightTxVc, sizeof(BasicEffect_VSBasicOneLightTxVc) }, + + { BasicEffect_VSBasicPixelLighting, sizeof(BasicEffect_VSBasicPixelLighting) }, + { BasicEffect_VSBasicPixelLightingVc, sizeof(BasicEffect_VSBasicPixelLightingVc) }, + { BasicEffect_VSBasicPixelLightingTx, sizeof(BasicEffect_VSBasicPixelLightingTx) }, + { BasicEffect_VSBasicPixelLightingTxVc, sizeof(BasicEffect_VSBasicPixelLightingTxVc) }, + + { BasicEffect_VSBasicVertexLightingBn, sizeof(BasicEffect_VSBasicVertexLightingBn) }, + { BasicEffect_VSBasicVertexLightingVcBn, sizeof(BasicEffect_VSBasicVertexLightingVcBn) }, + { BasicEffect_VSBasicVertexLightingTxBn, sizeof(BasicEffect_VSBasicVertexLightingTxBn) }, + { BasicEffect_VSBasicVertexLightingTxVcBn, sizeof(BasicEffect_VSBasicVertexLightingTxVcBn) }, + + { BasicEffect_VSBasicOneLightBn, sizeof(BasicEffect_VSBasicOneLightBn) }, + { BasicEffect_VSBasicOneLightVcBn, sizeof(BasicEffect_VSBasicOneLightVcBn) }, + { BasicEffect_VSBasicOneLightTxBn, sizeof(BasicEffect_VSBasicOneLightTxBn) }, + { BasicEffect_VSBasicOneLightTxVcBn, sizeof(BasicEffect_VSBasicOneLightTxVcBn) }, + + { BasicEffect_VSBasicPixelLightingBn, sizeof(BasicEffect_VSBasicPixelLightingBn) }, + { BasicEffect_VSBasicPixelLightingVcBn, sizeof(BasicEffect_VSBasicPixelLightingVcBn) }, + { BasicEffect_VSBasicPixelLightingTxBn, sizeof(BasicEffect_VSBasicPixelLightingTxBn) }, + { BasicEffect_VSBasicPixelLightingTxVcBn, sizeof(BasicEffect_VSBasicPixelLightingTxVcBn) }, +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // basic + 1, // no fog + 2, // vertex color + 3, // vertex color, no fog + 4, // texture + 5, // texture, no fog + 6, // texture + vertex color + 7, // texture + vertex color, no fog + + 8, // vertex lighting + 8, // vertex lighting, no fog + 9, // vertex lighting + vertex color + 9, // vertex lighting + vertex color, no fog + 10, // vertex lighting + texture + 10, // vertex lighting + texture, no fog + 11, // vertex lighting + texture + vertex color + 11, // vertex lighting + texture + vertex color, no fog + + 12, // one light + 12, // one light, no fog + 13, // one light + vertex color + 13, // one light + vertex color, no fog + 14, // one light + texture + 14, // one light + texture, no fog + 15, // one light + texture + vertex color + 15, // one light + texture + vertex color, no fog + + 16, // pixel lighting + 16, // pixel lighting, no fog + 17, // pixel lighting + vertex color + 17, // pixel lighting + vertex color, no fog + 18, // pixel lighting + texture + 18, // pixel lighting + texture, no fog + 19, // pixel lighting + texture + vertex color + 19, // pixel lighting + texture + vertex color, no fog + + 20, // vertex lighting (biased vertex normals) + 20, // vertex lighting (biased vertex normals), no fog + 21, // vertex lighting (biased vertex normals) + vertex color + 21, // vertex lighting (biased vertex normals) + vertex color, no fog + 22, // vertex lighting (biased vertex normals) + texture + 22, // vertex lighting (biased vertex normals) + texture, no fog + 23, // vertex lighting (biased vertex normals) + texture + vertex color + 23, // vertex lighting (biased vertex normals) + texture + vertex color, no fog + + 24, // one light (biased vertex normals) + 24, // one light (biased vertex normals), no fog + 25, // one light (biased vertex normals) + vertex color + 25, // one light (biased vertex normals) + vertex color, no fog + 26, // one light (biased vertex normals) + texture + 26, // one light (biased vertex normals) + texture, no fog + 27, // one light (biased vertex normals) + texture + vertex color + 27, // one light (biased vertex normals) + texture + vertex color, no fog + + 28, // pixel lighting (biased vertex normals) + 28, // pixel lighting (biased vertex normals), no fog + 29, // pixel lighting (biased vertex normals) + vertex color + 29, // pixel lighting (biased vertex normals) + vertex color, no fog + 30, // pixel lighting (biased vertex normals) + texture + 30, // pixel lighting (biased vertex normals) + texture, no fog + 31, // pixel lighting (biased vertex normals) + texture + vertex color + 31, // pixel lighting (biased vertex normals) + texture + vertex color, no fog +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { BasicEffect_PSBasic, sizeof(BasicEffect_PSBasic) }, + { BasicEffect_PSBasicNoFog, sizeof(BasicEffect_PSBasicNoFog) }, + { BasicEffect_PSBasicTx, sizeof(BasicEffect_PSBasicTx) }, + { BasicEffect_PSBasicTxNoFog, sizeof(BasicEffect_PSBasicTxNoFog) }, + + { BasicEffect_PSBasicVertexLighting, sizeof(BasicEffect_PSBasicVertexLighting) }, + { BasicEffect_PSBasicVertexLightingNoFog, sizeof(BasicEffect_PSBasicVertexLightingNoFog) }, + { BasicEffect_PSBasicVertexLightingTx, sizeof(BasicEffect_PSBasicVertexLightingTx) }, + { BasicEffect_PSBasicVertexLightingTxNoFog, sizeof(BasicEffect_PSBasicVertexLightingTxNoFog) }, + + { BasicEffect_PSBasicPixelLighting, sizeof(BasicEffect_PSBasicPixelLighting) }, + { BasicEffect_PSBasicPixelLightingTx, sizeof(BasicEffect_PSBasicPixelLightingTx) }, +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // basic + 1, // no fog + 0, // vertex color + 1, // vertex color, no fog + 2, // texture + 3, // texture, no fog + 2, // texture + vertex color + 3, // texture + vertex color, no fog + + 4, // vertex lighting + 5, // vertex lighting, no fog + 4, // vertex lighting + vertex color + 5, // vertex lighting + vertex color, no fog + 6, // vertex lighting + texture + 7, // vertex lighting + texture, no fog + 6, // vertex lighting + texture + vertex color + 7, // vertex lighting + texture + vertex color, no fog + + 4, // one light + 5, // one light, no fog + 4, // one light + vertex color + 5, // one light + vertex color, no fog + 6, // one light + texture + 7, // one light + texture, no fog + 6, // one light + texture + vertex color + 7, // one light + texture + vertex color, no fog + + 8, // pixel lighting + 8, // pixel lighting, no fog + 8, // pixel lighting + vertex color + 8, // pixel lighting + vertex color, no fog + 9, // pixel lighting + texture + 9, // pixel lighting + texture, no fog + 9, // pixel lighting + texture + vertex color + 9, // pixel lighting + texture + vertex color, no fog + + 4, // vertex lighting (biased vertex normals) + 5, // vertex lighting (biased vertex normals), no fog + 4, // vertex lighting (biased vertex normals) + vertex color + 5, // vertex lighting (biased vertex normals) + vertex color, no fog + 6, // vertex lighting (biased vertex normals) + texture + 7, // vertex lighting (biased vertex normals) + texture, no fog + 6, // vertex lighting (biased vertex normals) + texture + vertex color + 7, // vertex lighting (biased vertex normals) + texture + vertex color, no fog + + 4, // one light (biased vertex normals) + 5, // one light (biased vertex normals), no fog + 4, // one light (biased vertex normals) + vertex color + 5, // one light (biased vertex normals) + vertex color, no fog + 6, // one light (biased vertex normals) + texture + 7, // one light (biased vertex normals) + texture, no fog + 6, // one light (biased vertex normals) + texture + vertex color + 7, // one light (biased vertex normals) + texture + vertex color, no fog + + 8, // pixel lighting (biased vertex normals) + 8, // pixel lighting (biased vertex normals), no fog + 8, // pixel lighting (biased vertex normals) + vertex color + 8, // pixel lighting (biased vertex normals) + vertex color, no fog + 9, // pixel lighting (biased vertex normals) + texture + 9, // pixel lighting (biased vertex normals) + texture, no fog + 9, // pixel lighting (biased vertex normals) + texture + vertex color + 9, // pixel lighting (biased vertex normals) + texture + vertex color, no fog +}; + + +// Global pool of per-device BasicEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +BasicEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + lightingEnabled(false), + preferPerPixelLighting(false), + vertexColorEnabled(false), + textureEnabled(false), + biasedVertexNormals(false) +{ + static_assert(_countof(EffectBase::VertexShaderIndices) == BasicEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == BasicEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == BasicEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == BasicEffectTraits::ShaderPermutationCount, "array/max mismatch"); + + lights.InitializeConstants(constants.specularColorAndPower, constants.lightDirection, constants.lightDiffuseColor, constants.lightSpecularColor); +} + + +int BasicEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Use optimized shaders if fog is disabled. + if (!fog.enabled) + { + permutation += 1; + } + + // Support vertex coloring? + if (vertexColorEnabled) + { + permutation += 2; + } + + // Support texturing? + if (textureEnabled) + { + permutation += 4; + } + + if (lightingEnabled) + { + if (preferPerPixelLighting) + { + // Do lighting in the pixel shader. + permutation += 24; + } + else if (!lights.lightEnabled[1] && !lights.lightEnabled[2]) + { + // Use the only-bother-with-the-first-light shader optimization. + permutation += 16; + } + else + { + // Compute all three lights in the vertex shader. + permutation += 8; + } + + if (biasedVertexNormals) + { + // Compressed normals need to be scaled and biased in the vertex shader. + permutation += 24; + } + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void BasicEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + fog.SetConstants(dirtyFlags, matrices.worldView, constants.fogVector); + + lights.SetConstants(dirtyFlags, matrices, constants.world, constants.worldInverseTranspose, constants.eyePosition, constants.diffuseColor, constants.emissiveColor, lightingEnabled); + + // Set the texture. + if (textureEnabled) + { + ID3D11ShaderResourceView* textures[1] = { texture.Get() }; + + deviceContext->PSSetShaderResources(0, 1, textures); + } + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +BasicEffect::BasicEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +BasicEffect::BasicEffect(BasicEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +BasicEffect& BasicEffect::operator= (BasicEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +BasicEffect::~BasicEffect() +{ +} + + +// IEffect methods. +void BasicEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void BasicEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV BasicEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV BasicEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV BasicEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV BasicEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +// Material settings. +void XM_CALLCONV BasicEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV BasicEffect::SetEmissiveColor(FXMVECTOR value) +{ + pImpl->lights.emissiveColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV BasicEffect::SetSpecularColor(FXMVECTOR value) +{ + // Set xyz to new value, but preserve existing w (specular power). + pImpl->constants.specularColorAndPower = XMVectorSelect(pImpl->constants.specularColorAndPower, value, g_XMSelect1110); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void BasicEffect::SetSpecularPower(float value) +{ + // Set w to new value, but preserve existing xyz (specular color). + pImpl->constants.specularColorAndPower = XMVectorSetW(pImpl->constants.specularColorAndPower, value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void BasicEffect::DisableSpecular() +{ + // Set specular color to black, power to 1 + // Note: Don't use a power of 0 or the shader will generate strange highlights on non-specular materials + + pImpl->constants.specularColorAndPower = g_XMIdentityR3; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void BasicEffect::SetAlpha(float value) +{ + pImpl->lights.alpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV BasicEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + pImpl->lights.alpha = XMVectorGetW(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +// Light settings. +void BasicEffect::SetLightingEnabled(bool value) +{ + pImpl->lightingEnabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void BasicEffect::SetPerPixelLighting(bool value) +{ + pImpl->preferPerPixelLighting = value; +} + + +void XM_CALLCONV BasicEffect::SetAmbientLightColor(FXMVECTOR value) +{ + pImpl->lights.ambientLightColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void BasicEffect::SetLightEnabled(int whichLight, bool value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightEnabled(whichLight, value, pImpl->constants.lightDiffuseColor, pImpl->constants.lightSpecularColor); +} + + +void XM_CALLCONV BasicEffect::SetLightDirection(int whichLight, FXMVECTOR value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->constants.lightDirection[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV BasicEffect::SetLightDiffuseColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightDiffuseColor(whichLight, value, pImpl->constants.lightDiffuseColor); +} + + +void XM_CALLCONV BasicEffect::SetLightSpecularColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightSpecularColor(whichLight, value, pImpl->constants.lightSpecularColor); +} + + +void BasicEffect::EnableDefaultLighting() +{ + EffectLights::EnableDefaultLighting(this); +} + + +// Fog settings. +void BasicEffect::SetFogEnabled(bool value) +{ + pImpl->fog.enabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogEnable; +} + + +void BasicEffect::SetFogStart(float value) +{ + pImpl->fog.start = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void BasicEffect::SetFogEnd(float value) +{ + pImpl->fog.end = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV BasicEffect::SetFogColor(FXMVECTOR value) +{ + pImpl->constants.fogColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Vertex color setting. +void BasicEffect::SetVertexColorEnabled(bool value) +{ + pImpl->vertexColorEnabled = value; +} + + +// Texture settings. +void BasicEffect::SetTextureEnabled(bool value) +{ + pImpl->textureEnabled = value; +} + + +void BasicEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +// Normal compression settings. +void BasicEffect::SetBiasedVertexNormals(bool value) +{ + pImpl->biasedVertexNormals = value; +} diff --git a/Sdk/External/DirectXTK/Src/BasicPostProcess.cpp b/Sdk/External/DirectXTK/Src/BasicPostProcess.cpp new file mode 100644 index 0000000..4d27519 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/BasicPostProcess.cpp @@ -0,0 +1,606 @@ +//-------------------------------------------------------------------------------------- +// File: BasicPostProcess.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "PostProcess.h" + +#include "BufferHelpers.h" +#include "CommonStates.h" +#include "DirectXHelpers.h" +#include "AlignedNew.h" +#include "DemandCreate.h" +#include "SharedResourcePool.h" + +using namespace DirectX; + +using Microsoft::WRL::ComPtr; + +namespace +{ + constexpr int c_MaxSamples = 16; + + constexpr int Dirty_ConstantBuffer = 0x01; + constexpr int Dirty_Parameters = 0x02; + + // Constant buffer layout. Must match the shader! + __declspec(align(16)) struct PostProcessConstants + { + XMVECTOR sampleOffsets[c_MaxSamples]; + XMVECTOR sampleWeights[c_MaxSamples]; + }; + + static_assert((sizeof(PostProcessConstants) % 16) == 0, "CB size not padded correctly"); + + // 2-parameter Gaussian distribution given standard deviation (rho) + inline float GaussianDistribution(float x, float y, float rho) noexcept + { + return expf(-(x * x + y * y) / (2 * rho * rho)) / sqrtf(2 * XM_PI * rho * rho); + } +} + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOnePostProcess_VSQuad.inc" + + #include "Shaders/Compiled/XboxOnePostProcess_PSCopy.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSMonochrome.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSSepia.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSDownScale2x2.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSDownScale4x4.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSGaussianBlur5x5.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSBloomExtract.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSBloomBlur.inc" +#else + #include "Shaders/Compiled/PostProcess_VSQuad.inc" + + #include "Shaders/Compiled/PostProcess_PSCopy.inc" + #include "Shaders/Compiled/PostProcess_PSMonochrome.inc" + #include "Shaders/Compiled/PostProcess_PSSepia.inc" + #include "Shaders/Compiled/PostProcess_PSDownScale2x2.inc" + #include "Shaders/Compiled/PostProcess_PSDownScale4x4.inc" + #include "Shaders/Compiled/PostProcess_PSGaussianBlur5x5.inc" + #include "Shaders/Compiled/PostProcess_PSBloomExtract.inc" + #include "Shaders/Compiled/PostProcess_PSBloomBlur.inc" +#endif +} + +namespace +{ + struct ShaderBytecode + { + void const* code; + size_t length; + }; + + const ShaderBytecode pixelShaders[] = + { + { PostProcess_PSCopy, sizeof(PostProcess_PSCopy) }, + { PostProcess_PSMonochrome, sizeof(PostProcess_PSMonochrome) }, + { PostProcess_PSSepia, sizeof(PostProcess_PSSepia) }, + { PostProcess_PSDownScale2x2, sizeof(PostProcess_PSDownScale2x2) }, + { PostProcess_PSDownScale4x4, sizeof(PostProcess_PSDownScale4x4) }, + { PostProcess_PSGaussianBlur5x5, sizeof(PostProcess_PSGaussianBlur5x5) }, + { PostProcess_PSBloomExtract, sizeof(PostProcess_PSBloomExtract) }, + { PostProcess_PSBloomBlur, sizeof(PostProcess_PSBloomBlur) }, + }; + + static_assert(_countof(pixelShaders) == BasicPostProcess::Effect_Max, "array/max mismatch"); + + // Factory for lazily instantiating shaders. + class DeviceResources + { + public: + DeviceResources(_In_ ID3D11Device* device) + : stateObjects(device), + mDevice(device), + mVertexShader{}, + mPixelShaders{}, + mMutex{} + { } + + // Gets or lazily creates the vertex shader. + ID3D11VertexShader* GetVertexShader() + { + return DemandCreate(mVertexShader, mMutex, [&](ID3D11VertexShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreateVertexShader(PostProcess_VSQuad, sizeof(PostProcess_VSQuad), nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "BasicPostProcess"); + + return hr; + }); + } + + // Gets or lazily creates the specified pixel shader. + ID3D11PixelShader* GetPixelShader(unsigned int shaderIndex) + { + assert(shaderIndex < BasicPostProcess::Effect_Max); + _Analysis_assume_(shaderIndex < BasicPostProcess::Effect_Max); + + return DemandCreate(mPixelShaders[shaderIndex], mMutex, [&](ID3D11PixelShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreatePixelShader(pixelShaders[shaderIndex].code, pixelShaders[shaderIndex].length, nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "BasicPostProcess"); + + return hr; + }); + } + + CommonStates stateObjects; + + protected: + ComPtr mDevice; + ComPtr mVertexShader; + ComPtr mPixelShaders[BasicPostProcess::Effect_Max]; + std::mutex mMutex; + }; +} + +class BasicPostProcess::Impl : public AlignedNew +{ +public: + Impl(_In_ ID3D11Device* device); + + void Process(_In_ ID3D11DeviceContext* deviceContext, std::function& setCustomState); + + void SetConstants(bool value = true) noexcept { mUseConstants = value; mDirtyFlags = INT_MAX; } + void SetDirtyFlag() noexcept { mDirtyFlags = INT_MAX; } + + // Fields. + PostProcessConstants constants; + BasicPostProcess::Effect fx; + ComPtr texture; + unsigned texWidth; + unsigned texHeight; + float guassianMultiplier; + float bloomSize; + float bloomBrightness; + float bloomThreshold; + bool bloomHorizontal; + +private: + bool mUseConstants; + int mDirtyFlags; + + void DownScale2x2(); + void DownScale4x4(); + void GaussianBlur5x5(float multiplier); + void Bloom(bool horizontal, float size, float brightness); + + ConstantBuffer mConstantBuffer; + + // Per-device resources. + std::shared_ptr mDeviceResources; + + static SharedResourcePool deviceResourcesPool; +}; + + +// Global pool of per-device BasicPostProcess resources. +SharedResourcePool BasicPostProcess::Impl::deviceResourcesPool; + + +// Constructor. +BasicPostProcess::Impl::Impl(_In_ ID3D11Device* device) + : constants{}, + fx(BasicPostProcess::Copy), + texWidth(0), + texHeight(0), + guassianMultiplier(1.f), + bloomSize(1.f), + bloomBrightness(1.f), + bloomThreshold(0.25f), + bloomHorizontal(true), + mUseConstants(false), + mDirtyFlags(INT_MAX), + mConstantBuffer(device), + mDeviceResources(deviceResourcesPool.DemandCreate(device)) +{ + if (device->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("BasicPostProcess requires Feature Level 10.0 or later"); + } + + SetDebugObjectName(mConstantBuffer.GetBuffer(), "BasicPostProcess"); +} + + +// Sets our state onto the D3D device. +void BasicPostProcess::Impl::Process( + _In_ ID3D11DeviceContext* deviceContext, + std::function& setCustomState) +{ + // Set the texture. + ID3D11ShaderResourceView* textures[1] = { texture.Get() }; + deviceContext->PSSetShaderResources(0, 1, textures); + + auto sampler = mDeviceResources->stateObjects.LinearClamp(); + deviceContext->PSSetSamplers(0, 1, &sampler); + + // Set state objects. + deviceContext->OMSetBlendState(mDeviceResources->stateObjects.Opaque(), nullptr, 0xffffffff); + deviceContext->OMSetDepthStencilState(mDeviceResources->stateObjects.DepthNone(), 0); + deviceContext->RSSetState(mDeviceResources->stateObjects.CullNone()); + + // Set shaders. + auto vertexShader = mDeviceResources->GetVertexShader(); + auto pixelShader = mDeviceResources->GetPixelShader(fx); + + deviceContext->VSSetShader(vertexShader, nullptr, 0); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + // Set constants. + if (mUseConstants) + { + if (mDirtyFlags & Dirty_Parameters) + { + mDirtyFlags &= ~Dirty_Parameters; + mDirtyFlags |= Dirty_ConstantBuffer; + + switch (fx) + { + case DownScale_2x2: + DownScale2x2(); + break; + + case DownScale_4x4: + DownScale4x4(); + break; + + case GaussianBlur_5x5: + GaussianBlur5x5(guassianMultiplier); + break; + + case BloomExtract: + constants.sampleWeights[0] = XMVectorReplicate(bloomThreshold); + break; + + case BloomBlur: + Bloom(bloomHorizontal, bloomSize, bloomBrightness); + break; + + default: + break; + } + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + void *grfxMemory; + mConstantBuffer.SetData(deviceContext, constants, &grfxMemory); + + Microsoft::WRL::ComPtr deviceContextX; + ThrowIfFailed(deviceContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceContextX.GetAddressOf()))); + + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContextX->PSSetPlacementConstantBuffer(0, buffer, grfxMemory); +#else + if (mDirtyFlags & Dirty_ConstantBuffer) + { + mDirtyFlags &= ~Dirty_ConstantBuffer; + mConstantBuffer.SetData(deviceContext, constants); + } + + // Set the constant buffer. + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContext->PSSetConstantBuffers(0, 1, &buffer); +#endif + } + + if (setCustomState) + { + setCustomState(); + } + + // Draw quad. + deviceContext->IASetInputLayout(nullptr); + deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + deviceContext->Draw(3, 0); +} + + +void BasicPostProcess::Impl::DownScale2x2() +{ + mUseConstants = true; + + if (!texWidth || !texHeight) + { + throw std::exception("Call SetSourceTexture before setting post-process effect"); + } + + float tu = 1.0f / float(texWidth); + float tv = 1.0f / float(texHeight); + + // Sample from the 4 surrounding points. Since the center point will be in the exact + // center of 4 texels, a 0.5f offset is needed to specify a texel center. + auto ptr = reinterpret_cast(constants.sampleOffsets); + for (int y = 0; y < 2; ++y) + { + for (int x = 0; x < 2; ++x) + { + ptr->x = (float(x) - 0.5f) * tu; + ptr->y = (float(y) - 0.5f) * tv; + ++ptr; + } + } +} + + +void BasicPostProcess::Impl::DownScale4x4() +{ + mUseConstants = true; + + if (!texWidth || !texHeight) + { + throw std::exception("Call SetSourceTexture before setting post-process effect"); + } + + float tu = 1.0f / float(texWidth); + float tv = 1.0f / float(texHeight); + + // Sample from the 16 surrounding points. Since the center point will be in the + // exact center of 16 texels, a 1.5f offset is needed to specify a texel center. + auto ptr = reinterpret_cast(constants.sampleOffsets); + for (int y = 0; y < 4; ++y) + { + for (int x = 0; x < 4; ++x) + { + ptr->x = (float(x) - 1.5f) * tu; + ptr->y = (float(y) - 1.5f) * tv; + ++ptr; + } + } + +} + + +void BasicPostProcess::Impl::GaussianBlur5x5(float multiplier) +{ + mUseConstants = true; + + if (!texWidth || !texHeight) + { + throw std::exception("Call SetSourceTexture before setting post-process effect"); + } + + float tu = 1.0f / float(texWidth); + float tv = 1.0f / float(texHeight); + + float totalWeight = 0.0f; + size_t index = 0; + auto offsets = reinterpret_cast(constants.sampleOffsets); + auto weights = constants.sampleWeights; + for (int x = -2; x <= 2; ++x) + { + for (int y = -2; y <= 2; ++y) + { + // Exclude pixels with a block distance greater than 2. This will + // create a kernel which approximates a 5x5 kernel using only 13 + // sample points instead of 25; this is necessary since 2.0 shaders + // only support 16 texture grabs. + if (fabsf(float(x)) + fabsf(float(y)) > 2.0f) + continue; + + // Get the unscaled Gaussian intensity for this offset + offsets[index].x = float(x) * tu; + offsets[index].y = float(y) * tv; + offsets[index].z = 0.0f; + offsets[index].w = 0.0f; + + float g = GaussianDistribution(float(x), float(y), 1.0f); + weights[index] = XMVectorReplicate(g); + + totalWeight += XMVectorGetX(weights[index]); + + ++index; + } + } + + // Divide the current weight by the total weight of all the samples; Gaussian + // blur kernels add to 1.0f to ensure that the intensity of the image isn't + // changed when the blur occurs. An optional multiplier variable is used to + // add or remove image intensity during the blur. + XMVECTOR vtw = XMVectorReplicate(totalWeight); + XMVECTOR vm = XMVectorReplicate(multiplier); + for (size_t i = 0; i < index; ++i) + { + weights[i] = XMVectorDivide(weights[i], vtw); + weights[i] = XMVectorMultiply(weights[i], vm); + } +} + + +void BasicPostProcess::Impl::Bloom(bool horizontal, float size, float brightness) +{ + mUseConstants = true; + + if (!texWidth || !texHeight) + { + throw std::exception("Call SetSourceTexture before setting post-process effect"); + } + + float tu = 0.f; + float tv = 0.f; + if (horizontal) + { + tu = 1.f / float(texWidth); + } + else + { + tv = 1.f / float(texHeight); + } + + auto weights = reinterpret_cast(constants.sampleWeights); + auto offsets = reinterpret_cast(constants.sampleOffsets); + + // Fill the center texel + float weight = brightness * GaussianDistribution(0, 0, size); + weights[0] = XMFLOAT4(weight, weight, weight, 1.0f); + offsets[0].x = offsets[0].y = offsets[0].z = offsets[0].w = 0.f; + + // Fill the first half + for (int i = 1; i < 8; ++i) + { + // Get the Gaussian intensity for this offset + weight = brightness * GaussianDistribution(float(i), 0, size); + weights[i] = XMFLOAT4(weight, weight, weight, 1.0f); + offsets[i] = XMFLOAT4(float(i) * tu, float(i) * tv, 0.f, 0.f); + } + + // Mirror to the second half + for (int i = 8; i < 15; i++) + { + weights[i] = weights[i - 7]; + offsets[i] = XMFLOAT4(-offsets[i - 7].x, -offsets[i - 7].y, 0.f, 0.f); + } +} + + +// Public constructor. +BasicPostProcess::BasicPostProcess(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +BasicPostProcess::BasicPostProcess(BasicPostProcess&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +BasicPostProcess& BasicPostProcess::operator= (BasicPostProcess&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +BasicPostProcess::~BasicPostProcess() +{ +} + + +// IPostProcess methods. +void BasicPostProcess::Process( + _In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState) +{ + pImpl->Process(deviceContext, setCustomState); +} + + +// Shader control. +void BasicPostProcess::SetEffect(Effect fx) +{ + if (fx >= Effect_Max) + throw std::out_of_range("Effect not defined"); + + pImpl->fx = fx; + + switch (fx) + { + case Copy: + case Monochrome: + case Sepia: + // These shaders don't use the constant buffer + pImpl->SetConstants(false); + break; + + default: + pImpl->SetConstants(true); + break; + } +} + + +// Properties +void BasicPostProcess::SetSourceTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; + + if (value) + { + ComPtr res; + value->GetResource(res.GetAddressOf()); + + D3D11_RESOURCE_DIMENSION resType = D3D11_RESOURCE_DIMENSION_UNKNOWN; + res->GetType(&resType); + + switch (resType) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + { + ComPtr tex; + ThrowIfFailed(res.As(&tex)); + + D3D11_TEXTURE1D_DESC desc = {}; + tex->GetDesc(&desc); + pImpl->texWidth = desc.Width; + pImpl->texHeight = 1; + break; + } + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + { + ComPtr tex; + ThrowIfFailed(res.As(&tex)); + + D3D11_TEXTURE2D_DESC desc = {}; + tex->GetDesc(&desc); + pImpl->texWidth = desc.Width; + pImpl->texHeight = desc.Height; + break; + } + + case D3D11_RESOURCE_DIMENSION_UNKNOWN: + case D3D11_RESOURCE_DIMENSION_BUFFER: + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + default: + throw std::exception("Unsupported texture type"); + } + } + else + { + pImpl->texWidth = pImpl->texHeight = 0; + } +} + + +void BasicPostProcess::SetGaussianParameter(float multiplier) +{ + pImpl->guassianMultiplier = multiplier; + pImpl->SetDirtyFlag(); +} + + +void BasicPostProcess::SetBloomExtractParameter(float threshold) +{ + pImpl->bloomThreshold = threshold; + pImpl->SetDirtyFlag(); +} + + +void BasicPostProcess::SetBloomBlurParameters(bool horizontal, float size, float brightness) +{ + pImpl->bloomSize = size; + pImpl->bloomBrightness = brightness; + pImpl->bloomHorizontal = horizontal; + pImpl->SetDirtyFlag(); +} diff --git a/Sdk/External/DirectXTK/Src/Bezier.h b/Sdk/External/DirectXTK/Src/Bezier.h new file mode 100644 index 0000000..367de97 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Bezier.h @@ -0,0 +1,196 @@ +//-------------------------------------------------------------------------------------- +// File: Bezier.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + + +namespace Bezier +{ + // Performs a cubic bezier interpolation between four control points, + // returning the value at the specified time (t ranges 0 to 1). + template + inline T CubicInterpolate(T const& p1, T const& p2, T const& p3, T const& p4, float t) noexcept + { + return p1 * (1 - t) * (1 - t) * (1 - t) + + p2 * 3 * t * (1 - t) * (1 - t) + + p3 * 3 * t * t * (1 - t) + + p4 * t * t * t; + } + + template<> + inline DirectX::XMVECTOR CubicInterpolate(DirectX::XMVECTOR const& p1, DirectX::XMVECTOR const& p2, DirectX::XMVECTOR const& p3, DirectX::XMVECTOR const& p4, float t) noexcept + { + using namespace DirectX; + + XMVECTOR T0 = XMVectorReplicate((1 - t) * (1 - t) * (1 - t)); + XMVECTOR T1 = XMVectorReplicate(3 * t * (1 - t) * (1 - t)); + XMVECTOR T2 = XMVectorReplicate(3 * t * t * (1 - t)); + XMVECTOR T3 = XMVectorReplicate(t * t * t); + + XMVECTOR Result = XMVectorMultiply(p1, T0); + Result = XMVectorMultiplyAdd(p2, T1, Result); + Result = XMVectorMultiplyAdd(p3, T2, Result); + Result = XMVectorMultiplyAdd(p4, T3, Result); + + return Result; + } + + + // Computes the tangent of a cubic bezier curve at the specified time. + template + inline T CubicTangent(T const& p1, T const& p2, T const& p3, T const& p4, float t) noexcept + { + using DirectX::operator*; + using DirectX::operator+; + + return p1 * (-1 + 2 * t - t * t) + + p2 * (1 - 4 * t + 3 * t * t) + + p3 * (2 * t - 3 * t * t) + + p4 * (t * t); + } + + template<> + inline DirectX::XMVECTOR CubicTangent(DirectX::XMVECTOR const& p1, DirectX::XMVECTOR const& p2, DirectX::XMVECTOR const& p3, DirectX::XMVECTOR const& p4, float t) noexcept + { + using namespace DirectX; + + XMVECTOR T0 = XMVectorReplicate(-1 + 2 * t - t * t); + XMVECTOR T1 = XMVectorReplicate(1 - 4 * t + 3 * t * t); + XMVECTOR T2 = XMVectorReplicate(2 * t - 3 * t * t); + XMVECTOR T3 = XMVectorReplicate(t * t); + + XMVECTOR Result = XMVectorMultiply(p1, T0); + Result = XMVectorMultiplyAdd(p2, T1, Result); + Result = XMVectorMultiplyAdd(p3, T2, Result); + Result = XMVectorMultiplyAdd(p4, T3, Result); + + return Result; + } + + + // Creates vertices for a patch that is tessellated at the specified level. + // Calls the specified outputVertex function for each generated vertex, + // passing the position, normal, and texture coordinate as parameters. + template + void CreatePatchVertices(_In_reads_(16) DirectX::XMVECTOR patch[16], size_t tessellation, bool isMirrored, TOutputFunc outputVertex) + { + using namespace DirectX; + + for (size_t i = 0; i <= tessellation; i++) + { + float u = float(i) / float(tessellation); + + for (size_t j = 0; j <= tessellation; j++) + { + float v = float(j) / float(tessellation); + + // Perform four horizontal bezier interpolations + // between the control points of this patch. + XMVECTOR p1 = CubicInterpolate(patch[0], patch[1], patch[2], patch[3], u); + XMVECTOR p2 = CubicInterpolate(patch[4], patch[5], patch[6], patch[7], u); + XMVECTOR p3 = CubicInterpolate(patch[8], patch[9], patch[10], patch[11], u); + XMVECTOR p4 = CubicInterpolate(patch[12], patch[13], patch[14], patch[15], u); + + // Perform a vertical interpolation between the results of the + // previous horizontal interpolations, to compute the position. + XMVECTOR position = CubicInterpolate(p1, p2, p3, p4, v); + + // Perform another four bezier interpolations between the control + // points, but this time vertically rather than horizontally. + XMVECTOR q1 = CubicInterpolate(patch[0], patch[4], patch[8], patch[12], v); + XMVECTOR q2 = CubicInterpolate(patch[1], patch[5], patch[9], patch[13], v); + XMVECTOR q3 = CubicInterpolate(patch[2], patch[6], patch[10], patch[14], v); + XMVECTOR q4 = CubicInterpolate(patch[3], patch[7], patch[11], patch[15], v); + + // Compute vertical and horizontal tangent vectors. + XMVECTOR tangent1 = CubicTangent(p1, p2, p3, p4, v); + XMVECTOR tangent2 = CubicTangent(q1, q2, q3, q4, u); + + // Cross the two tangent vectors to compute the normal. + XMVECTOR normal = XMVector3Cross(tangent1, tangent2); + + if (!XMVector3NearEqual(normal, XMVectorZero(), g_XMEpsilon)) + { + normal = XMVector3Normalize(normal); + + // If this patch is mirrored, we must invert the normal. + if (isMirrored) + { + normal = XMVectorNegate(normal); + } + } + else + { + // In a tidy and well constructed bezier patch, the preceding + // normal computation will always work. But the classic teapot + // model is not tidy or well constructed! At the top and bottom + // of the teapot, it contains degenerate geometry where a patch + // has several control points in the same place, which causes + // the tangent computation to fail and produce a zero normal. + // We 'fix' these cases by just hard-coding a normal that points + // either straight up or straight down, depending on whether we + // are on the top or bottom of the teapot. This is not a robust + // solution for all possible degenerate bezier patches, but hey, + // it's good enough to make the teapot work correctly! + + normal = XMVectorSelect(g_XMIdentityR1, g_XMNegIdentityR1, XMVectorLess(position, XMVectorZero())); + } + + // Compute the texture coordinate. + float mirroredU = isMirrored ? 1 - u : u; + + XMVECTOR textureCoordinate = XMVectorSet(mirroredU, v, 0, 0); + + // Output this vertex. + outputVertex(position, normal, textureCoordinate); + } + } + } + + + // Creates indices for a patch that is tessellated at the specified level. + // Calls the specified outputIndex function for each generated index value. + template + void CreatePatchIndices(size_t tessellation, bool isMirrored, TOutputFunc outputIndex) + { + size_t stride = tessellation + 1; + + for (size_t i = 0; i < tessellation; i++) + { + for (size_t j = 0; j < tessellation; j++) + { + // Make a list of six index values (two triangles). + std::array indices = + { + i * stride + j, + (i + 1) * stride + j, + (i + 1) * stride + j + 1, + + i * stride + j, + (i + 1) * stride + j + 1, + i * stride + j + 1, + }; + + // If this patch is mirrored, reverse indices to fix the winding order. + if (isMirrored) + { + std::reverse(indices.begin(), indices.end()); + } + + // Output these index values. + std::for_each(indices.begin(), indices.end(), outputIndex); + } + } + } +} diff --git a/Sdk/External/DirectXTK/Src/BinaryReader.cpp b/Sdk/External/DirectXTK/Src/BinaryReader.cpp new file mode 100644 index 0000000..6c7c5a7 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/BinaryReader.cpp @@ -0,0 +1,90 @@ +//-------------------------------------------------------------------------------------- +// File: BinaryReader.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include "BinaryReader.h" + +using namespace DirectX; + + +// Constructor reads from the filesystem. +BinaryReader::BinaryReader(_In_z_ wchar_t const* fileName) noexcept(false) : + mPos(nullptr), + mEnd(nullptr) +{ + size_t dataSize; + + HRESULT hr = ReadEntireFile(fileName, mOwnedData, &dataSize); + if (FAILED(hr)) + { + DebugTrace("ERROR: BinaryReader failed (%08X) to load '%ls'\n", + static_cast(hr), fileName); + throw std::exception("BinaryReader"); + } + + mPos = mOwnedData.get(); + mEnd = mOwnedData.get() + dataSize; +} + + +// Constructor reads from an existing memory buffer. +BinaryReader::BinaryReader(_In_reads_bytes_(dataSize) uint8_t const* dataBlob, size_t dataSize) noexcept : + mPos(dataBlob), + mEnd(dataBlob + dataSize) +{ +} + + +// Reads from the filesystem into memory. +HRESULT BinaryReader::ReadEntireFile(_In_z_ wchar_t const* fileName, _Inout_ std::unique_ptr& data, _Out_ size_t* dataSize) +{ + // Open the file. +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + ScopedHandle hFile(safe_handle(CreateFile2(fileName, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, nullptr))); +#else + ScopedHandle hFile(safe_handle(CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr))); +#endif + + if (!hFile) + return HRESULT_FROM_WIN32(GetLastError()); + + // Get the file size. + FILE_STANDARD_INFO fileInfo; + if (!GetFileInformationByHandleEx(hFile.get(), FileStandardInfo, &fileInfo, sizeof(fileInfo))) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // File is too big for 32-bit allocation, so reject read. + if (fileInfo.EndOfFile.HighPart > 0) + return E_FAIL; + + // Create enough space for the file data. + data.reset(new uint8_t[fileInfo.EndOfFile.LowPart]); + + if (!data) + return E_OUTOFMEMORY; + + // Read the data in. + DWORD bytesRead = 0; + + if (!ReadFile(hFile.get(), data.get(), fileInfo.EndOfFile.LowPart, &bytesRead, nullptr)) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (bytesRead < fileInfo.EndOfFile.LowPart) + return E_FAIL; + + *dataSize = bytesRead; + + return S_OK; +} diff --git a/Sdk/External/DirectXTK/Src/BinaryReader.h b/Sdk/External/DirectXTK/Src/BinaryReader.h new file mode 100644 index 0000000..97ee5ab --- /dev/null +++ b/Sdk/External/DirectXTK/Src/BinaryReader.h @@ -0,0 +1,72 @@ +//-------------------------------------------------------------------------------------- +// File: BinaryReader.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include +#include + +#include "PlatformHelpers.h" + + +namespace DirectX +{ + // Helper for reading binary data, either from the filesystem a memory buffer. + class BinaryReader + { + public: + explicit BinaryReader(_In_z_ wchar_t const* fileName) noexcept(false); + BinaryReader(_In_reads_bytes_(dataSize) uint8_t const* dataBlob, size_t dataSize) noexcept; + + BinaryReader(BinaryReader const&) = delete; + BinaryReader& operator= (BinaryReader const&) = delete; + + // Reads a single value. + template T const& Read() + { + return *ReadArray(1); + } + + + // Reads an array of values. + template T const* ReadArray(size_t elementCount) + { + static_assert(std::is_pod::value, "Can only read plain-old-data types"); + + uint8_t const* newPos = mPos + sizeof(T) * elementCount; + + if (newPos < mPos) + throw std::overflow_error("ReadArray"); + + if (newPos > mEnd) + throw std::exception("End of file"); + + auto result = reinterpret_cast(mPos); + + mPos = newPos; + + return result; + } + + + // Lower level helper reads directly from the filesystem into memory. + static HRESULT ReadEntireFile(_In_z_ wchar_t const* fileName, _Inout_ std::unique_ptr& data, _Out_ size_t* dataSize); + + + private: + // The data currently being read. + uint8_t const* mPos; + uint8_t const* mEnd; + + std::unique_ptr mOwnedData; + }; +} diff --git a/Sdk/External/DirectXTK/Src/BufferHelpers.cpp b/Sdk/External/DirectXTK/Src/BufferHelpers.cpp new file mode 100644 index 0000000..f865bc1 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/BufferHelpers.cpp @@ -0,0 +1,393 @@ +//-------------------------------------------------------------------------------------- +// File: BufferHelpers.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "BufferHelpers.h" +#include "PlatformHelpers.h" + + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::CreateStaticBuffer( + ID3D11Device* device, + const void* ptr, + size_t count, + size_t stride, + unsigned int bindFlags, + ID3D11Buffer** pBuffer) noexcept +{ + if (!pBuffer) + return E_INVALIDARG; + + *pBuffer = nullptr; + + if (!device || !ptr || !count || !stride) + return E_INVALIDARG; + + uint64_t sizeInbytes = uint64_t(count) * uint64_t(stride); + + static constexpr uint64_t c_maxBytes = D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u; + static_assert(c_maxBytes <= UINT32_MAX, "Exceeded integer limits"); + + if (sizeInbytes > c_maxBytes) + { + DebugTrace("ERROR: Resource size too large for DirectX 11 (size %llu)\n", sizeInbytes); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + D3D11_BUFFER_DESC bufferDesc = {}; + bufferDesc.ByteWidth = static_cast(sizeInbytes); + bufferDesc.BindFlags = bindFlags; + bufferDesc.Usage = D3D11_USAGE_DEFAULT; + + D3D11_SUBRESOURCE_DATA initData = { ptr, 0, 0 }; + + return device->CreateBuffer(&bufferDesc, &initData, pBuffer); +} + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::CreateTextureFromMemory( + ID3D11Device* device, + size_t width, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + ID3D11Texture1D** texture, + ID3D11ShaderResourceView** textureView, + unsigned int bindFlags) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!device || !width || !initData.pSysMem) + return E_INVALIDARG; + + if (!texture && !textureView) + return E_INVALIDARG; + + static_assert(D3D11_REQ_TEXTURE1D_U_DIMENSION <= UINT32_MAX, "Exceeded integer limits"); + + if (width > D3D11_REQ_TEXTURE1D_U_DIMENSION) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (1D: size %zu)\n", width); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + D3D11_TEXTURE1D_DESC desc = {}; + desc.Width = static_cast(width); + desc.MipLevels = desc.ArraySize = 1; + desc.Format = format; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = bindFlags; + + ComPtr tex; + HRESULT hr = device->CreateTexture1D(&desc, &initData, tex.GetAddressOf()); + if (SUCCEEDED(hr)) + { + if (textureView) + { + hr = device->CreateShaderResourceView(tex.Get(), nullptr, textureView); + if (FAILED(hr)) + return hr; + } + + if (texture) + { + *texture = tex.Detach(); + } + } + + return hr; +} + +_Use_decl_annotations_ +HRESULT DirectX::CreateTextureFromMemory( + ID3D11Device* device, + size_t width, + size_t height, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + ID3D11Texture2D** texture, + ID3D11ShaderResourceView** textureView, + unsigned int bindFlags) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!device || !width || !height + || !initData.pSysMem || !initData.SysMemPitch) + return E_INVALIDARG; + + if (!texture && !textureView) + return E_INVALIDARG; + + static_assert(D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION <= UINT32_MAX, "Exceeded integer limits"); + + if ((width > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION) + || (height > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (2D: size %zu by %zu)\n", width, height); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.MipLevels = desc.ArraySize = 1; + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = bindFlags; + + ComPtr tex; + HRESULT hr = device->CreateTexture2D(&desc, &initData, tex.GetAddressOf()); + if (SUCCEEDED(hr)) + { + if (textureView) + { + hr = device->CreateShaderResourceView(tex.Get(), nullptr, textureView); + if (FAILED(hr)) + return hr; + } + + if (texture) + { + *texture = tex.Detach(); + } + } + + return hr; +} + + +_Use_decl_annotations_ +HRESULT DirectX::CreateTextureFromMemory( +#if defined(_XBOX_ONE) && defined(_TITLE) + _In_ ID3D11DeviceX* device, + _In_ ID3D11DeviceContextX* d3dContext, +#else + _In_ ID3D11Device* device, + _In_ ID3D11DeviceContext* d3dContext, +#endif + size_t width, + size_t height, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + ID3D11Texture2D** texture, + ID3D11ShaderResourceView** textureView) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!device || !d3dContext || !width || !height + || !initData.pSysMem || !initData.SysMemPitch) + return E_INVALIDARG; + + if (!texture && !textureView) + return E_INVALIDARG; + + static_assert(D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION <= UINT32_MAX, "Exceeded integer limits"); + + if ((width > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION) + || (height > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (2D: size %zu by %zu)\n", width, height); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.ArraySize = 1; + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + UINT fmtSupport = 0; + if (SUCCEEDED(device->CheckFormatSupport(format, &fmtSupport)) && (fmtSupport & D3D11_FORMAT_SUPPORT_MIP_AUTOGEN)) + { + desc.BindFlags |= D3D11_BIND_RENDER_TARGET; + desc.MiscFlags |= D3D11_RESOURCE_MISC_GENERATE_MIPS; + } + else + { + // Autogen not supported. + desc.MipLevels = 1; + } + + ComPtr tex; + HRESULT hr = device->CreateTexture2D(&desc, nullptr, tex.GetAddressOf()); + if (SUCCEEDED(hr)) + { + ComPtr srv; + hr = device->CreateShaderResourceView(tex.Get(), nullptr, srv.GetAddressOf()); + if (FAILED(hr)) + return hr; + + if (desc.MipLevels != 1) + { +#if defined(_XBOX_ONE) && defined(_TITLE) + ComPtr staging; + desc.MipLevels = 1; + desc.Usage = D3D11_USAGE_STAGING; + desc.BindFlags = desc.MiscFlags = 0; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + hr = device->CreateTexture2D(&desc, &initData, staging.GetAddressOf()); + if (FAILED(hr)) + return hr; + + d3dContext->CopySubresourceRegion(tex.Get(), 0, 0, 0, 0, staging.Get(), 0, nullptr); + UINT64 copyFence = d3dContext->InsertFence(0); + while (device->IsFencePending(copyFence)) { SwitchToThread(); } +#else + d3dContext->UpdateSubresource(tex.Get(), 0, nullptr, initData.pSysMem, initData.SysMemPitch, 0); +#endif + d3dContext->GenerateMips(srv.Get()); + } + + if (texture) + { + *texture = tex.Detach(); + } + if (textureView) + { + *textureView = srv.Detach(); + } + } + + return hr; +} + +_Use_decl_annotations_ +HRESULT DirectX::CreateTextureFromMemory( + ID3D11Device* device, + size_t width, size_t height, size_t depth, + DXGI_FORMAT format, + const D3D11_SUBRESOURCE_DATA& initData, + ID3D11Texture3D** texture, + ID3D11ShaderResourceView** textureView, + unsigned int bindFlags) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!device || !width || !height || !depth + || !initData.pSysMem || !initData.SysMemPitch || !initData.SysMemSlicePitch) + return E_INVALIDARG; + + if (!texture && !textureView) + return E_INVALIDARG; + + static_assert(D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION <= UINT32_MAX, "Exceeded integer limits"); + + if ((width > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) + || (height > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) + || (depth > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (3D: size %zu by %zu by %zu)\n", width, height, depth); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + D3D11_TEXTURE3D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.Depth = static_cast(depth); + desc.MipLevels = 1; + desc.Format = format; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = bindFlags; + + ComPtr tex; + HRESULT hr = device->CreateTexture3D(&desc, &initData, tex.GetAddressOf()); + if (SUCCEEDED(hr)) + { + if (textureView) + { + hr = device->CreateShaderResourceView(tex.Get(), nullptr, textureView); + if (FAILED(hr)) + return hr; + } + + if (texture) + { + *texture = tex.Detach(); + } + } + + return hr; +} + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +void Internal::ConstantBufferBase::CreateBuffer( + ID3D11Device* device, + size_t bytes, + ID3D11Buffer** pBuffer) +{ + if (!pBuffer) + throw std::invalid_argument("ConstantBuffer"); + + *pBuffer = nullptr; + + D3D11_BUFFER_DESC desc = {}; + desc.ByteWidth = static_cast(bytes); + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + +#if defined(_XBOX_ONE) && defined(_TITLE) + + Microsoft::WRL::ComPtr deviceX; + ThrowIfFailed(device->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceX.GetAddressOf()))); + + ThrowIfFailed(deviceX->CreatePlacementBuffer(&desc, nullptr, pBuffer)); + +#else + + desc.Usage = D3D11_USAGE_DYNAMIC; + + ThrowIfFailed( + device->CreateBuffer(&desc, nullptr, pBuffer) + ); + +#endif + + assert(pBuffer != nullptr && *pBuffer != nullptr); + _Analysis_assume_(pBuffer != nullptr && *pBuffer != nullptr); +} diff --git a/Sdk/External/DirectXTK/Src/CommonStates.cpp b/Sdk/External/DirectXTK/Src/CommonStates.cpp new file mode 100644 index 0000000..2a50a31 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/CommonStates.cpp @@ -0,0 +1,361 @@ +//-------------------------------------------------------------------------------------- +// File: CommonStates.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "CommonStates.h" +#include "DemandCreate.h" +#include "DirectXHelpers.h" +#include "SharedResourcePool.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +// Internal state object implementation class. Only one of these helpers is allocated +// per D3D device, even if there are multiple public facing CommonStates instances. +class CommonStates::Impl +{ +public: + Impl(_In_ ID3D11Device* device) noexcept + : mDevice(device) + { } + + HRESULT CreateBlendState(D3D11_BLEND srcBlend, D3D11_BLEND destBlend, _Out_ ID3D11BlendState** pResult); + HRESULT CreateDepthStencilState(bool enable, bool writeEnable, _Out_ ID3D11DepthStencilState** pResult); + HRESULT CreateRasterizerState(D3D11_CULL_MODE cullMode, D3D11_FILL_MODE fillMode, _Out_ ID3D11RasterizerState** pResult); + HRESULT CreateSamplerState(D3D11_FILTER filter, D3D11_TEXTURE_ADDRESS_MODE addressMode, _Out_ ID3D11SamplerState** pResult); + + ComPtr mDevice; + + ComPtr opaque; + ComPtr alphaBlend; + ComPtr additive; + ComPtr nonPremultiplied; + + ComPtr depthNone; + ComPtr depthDefault; + ComPtr depthRead; + + ComPtr cullNone; + ComPtr cullClockwise; + ComPtr cullCounterClockwise; + ComPtr wireframe; + + ComPtr pointWrap; + ComPtr pointClamp; + ComPtr linearWrap; + ComPtr linearClamp; + ComPtr anisotropicWrap; + ComPtr anisotropicClamp; + + std::mutex mutex; + + static SharedResourcePool instancePool; +}; + + +// Global instance pool. +SharedResourcePool CommonStates::Impl::instancePool; + + +// Helper for creating blend state objects. +HRESULT CommonStates::Impl::CreateBlendState(D3D11_BLEND srcBlend, D3D11_BLEND destBlend, _Out_ ID3D11BlendState** pResult) +{ + D3D11_BLEND_DESC desc = {}; + + desc.RenderTarget[0].BlendEnable = (srcBlend != D3D11_BLEND_ONE) || + (destBlend != D3D11_BLEND_ZERO); + + desc.RenderTarget[0].SrcBlend = desc.RenderTarget[0].SrcBlendAlpha = srcBlend; + desc.RenderTarget[0].DestBlend = desc.RenderTarget[0].DestBlendAlpha = destBlend; + desc.RenderTarget[0].BlendOp = desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + + desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + + HRESULT hr = mDevice->CreateBlendState(&desc, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:CommonStates"); + + return hr; +} + + +// Helper for creating depth stencil state objects. +HRESULT CommonStates::Impl::CreateDepthStencilState(bool enable, bool writeEnable, _Out_ ID3D11DepthStencilState** pResult) +{ + D3D11_DEPTH_STENCIL_DESC desc = {}; + + desc.DepthEnable = enable ? TRUE : FALSE; + desc.DepthWriteMask = writeEnable ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; + desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; + + desc.StencilEnable = FALSE; + desc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; + desc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; + + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + + desc.BackFace = desc.FrontFace; + + HRESULT hr = mDevice->CreateDepthStencilState(&desc, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:CommonStates"); + + return hr; +} + + +// Helper for creating rasterizer state objects. +HRESULT CommonStates::Impl::CreateRasterizerState(D3D11_CULL_MODE cullMode, D3D11_FILL_MODE fillMode, _Out_ ID3D11RasterizerState** pResult) +{ + D3D11_RASTERIZER_DESC desc = {}; + + desc.CullMode = cullMode; + desc.FillMode = fillMode; + desc.DepthClipEnable = TRUE; + desc.MultisampleEnable = TRUE; + + HRESULT hr = mDevice->CreateRasterizerState(&desc, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:CommonStates"); + + return hr; +} + + +// Helper for creating sampler state objects. +HRESULT CommonStates::Impl::CreateSamplerState(D3D11_FILTER filter, D3D11_TEXTURE_ADDRESS_MODE addressMode, _Out_ ID3D11SamplerState** pResult) +{ + D3D11_SAMPLER_DESC desc = {}; + + desc.Filter = filter; + + desc.AddressU = addressMode; + desc.AddressV = addressMode; + desc.AddressW = addressMode; + + desc.MaxAnisotropy = (mDevice->GetFeatureLevel() > D3D_FEATURE_LEVEL_9_1) ? D3D11_MAX_MAXANISOTROPY : 2u; + + desc.MaxLOD = FLT_MAX; + desc.ComparisonFunc = D3D11_COMPARISON_NEVER; + + HRESULT hr = mDevice->CreateSamplerState(&desc, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:CommonStates"); + + return hr; +} + + +//-------------------------------------------------------------------------------------- +// CommonStates +//-------------------------------------------------------------------------------------- + +// Public constructor. +CommonStates::CommonStates(_In_ ID3D11Device* device) + : pImpl(Impl::instancePool.DemandCreate(device)) +{ +} + + +// Move constructor. +CommonStates::CommonStates(CommonStates&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +CommonStates& CommonStates::operator= (CommonStates&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +CommonStates::~CommonStates() +{ +} + + +//-------------------------------------------------------------------------------------- +// Blend states +//-------------------------------------------------------------------------------------- + +ID3D11BlendState* CommonStates::Opaque() const +{ + return DemandCreate(pImpl->opaque, pImpl->mutex, [&](ID3D11BlendState** pResult) + { + return pImpl->CreateBlendState(D3D11_BLEND_ONE, D3D11_BLEND_ZERO, pResult); + }); +} + + +ID3D11BlendState* CommonStates::AlphaBlend() const +{ + return DemandCreate(pImpl->alphaBlend, pImpl->mutex, [&](ID3D11BlendState** pResult) + { + return pImpl->CreateBlendState(D3D11_BLEND_ONE, D3D11_BLEND_INV_SRC_ALPHA, pResult); + }); +} + + +ID3D11BlendState* CommonStates::Additive() const +{ + return DemandCreate(pImpl->additive, pImpl->mutex, [&](ID3D11BlendState** pResult) + { + return pImpl->CreateBlendState(D3D11_BLEND_SRC_ALPHA, D3D11_BLEND_ONE, pResult); + }); +} + + +ID3D11BlendState* CommonStates::NonPremultiplied() const +{ + return DemandCreate(pImpl->nonPremultiplied, pImpl->mutex, [&](ID3D11BlendState** pResult) + { + return pImpl->CreateBlendState(D3D11_BLEND_SRC_ALPHA, D3D11_BLEND_INV_SRC_ALPHA, pResult); + }); +} + + +//-------------------------------------------------------------------------------------- +// Depth stencil states +//-------------------------------------------------------------------------------------- + +ID3D11DepthStencilState* CommonStates::DepthNone() const +{ + return DemandCreate(pImpl->depthNone, pImpl->mutex, [&](ID3D11DepthStencilState** pResult) + { + return pImpl->CreateDepthStencilState(false, false, pResult); + }); +} + + +ID3D11DepthStencilState* CommonStates::DepthDefault() const +{ + return DemandCreate(pImpl->depthDefault, pImpl->mutex, [&](ID3D11DepthStencilState** pResult) + { + return pImpl->CreateDepthStencilState(true, true, pResult); + }); +} + + +ID3D11DepthStencilState* CommonStates::DepthRead() const +{ + return DemandCreate(pImpl->depthRead, pImpl->mutex, [&](ID3D11DepthStencilState** pResult) + { + return pImpl->CreateDepthStencilState(true, false, pResult); + }); +} + + +//-------------------------------------------------------------------------------------- +// Rasterizer states +//-------------------------------------------------------------------------------------- + +ID3D11RasterizerState* CommonStates::CullNone() const +{ + return DemandCreate(pImpl->cullNone, pImpl->mutex, [&](ID3D11RasterizerState** pResult) + { + return pImpl->CreateRasterizerState(D3D11_CULL_NONE, D3D11_FILL_SOLID, pResult); + }); +} + + +ID3D11RasterizerState* CommonStates::CullClockwise() const +{ + return DemandCreate(pImpl->cullClockwise, pImpl->mutex, [&](ID3D11RasterizerState** pResult) + { + return pImpl->CreateRasterizerState(D3D11_CULL_FRONT, D3D11_FILL_SOLID, pResult); + }); +} + + +ID3D11RasterizerState* CommonStates::CullCounterClockwise() const +{ + return DemandCreate(pImpl->cullCounterClockwise, pImpl->mutex, [&](ID3D11RasterizerState** pResult) + { + return pImpl->CreateRasterizerState(D3D11_CULL_BACK, D3D11_FILL_SOLID, pResult); + }); +} + + +ID3D11RasterizerState* CommonStates::Wireframe() const +{ + return DemandCreate(pImpl->wireframe, pImpl->mutex, [&](ID3D11RasterizerState** pResult) + { + return pImpl->CreateRasterizerState(D3D11_CULL_NONE, D3D11_FILL_WIREFRAME, pResult); + }); +} + + +//-------------------------------------------------------------------------------------- +// Sampler states +//-------------------------------------------------------------------------------------- + +ID3D11SamplerState* CommonStates::PointWrap() const +{ + return DemandCreate(pImpl->pointWrap, pImpl->mutex, [&](ID3D11SamplerState** pResult) + { + return pImpl->CreateSamplerState(D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_WRAP, pResult); + }); +} + + +ID3D11SamplerState* CommonStates::PointClamp() const +{ + return DemandCreate(pImpl->pointClamp, pImpl->mutex, [&](ID3D11SamplerState** pResult) + { + return pImpl->CreateSamplerState(D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_CLAMP, pResult); + }); +} + + +ID3D11SamplerState* CommonStates::LinearWrap() const +{ + return DemandCreate(pImpl->linearWrap, pImpl->mutex, [&](ID3D11SamplerState** pResult) + { + return pImpl->CreateSamplerState(D3D11_FILTER_MIN_MAG_MIP_LINEAR, D3D11_TEXTURE_ADDRESS_WRAP, pResult); + }); +} + + +ID3D11SamplerState* CommonStates::LinearClamp() const +{ + return DemandCreate(pImpl->linearClamp, pImpl->mutex, [&](ID3D11SamplerState** pResult) + { + return pImpl->CreateSamplerState(D3D11_FILTER_MIN_MAG_MIP_LINEAR, D3D11_TEXTURE_ADDRESS_CLAMP, pResult); + }); +} + + +ID3D11SamplerState* CommonStates::AnisotropicWrap() const +{ + return DemandCreate(pImpl->anisotropicWrap, pImpl->mutex, [&](ID3D11SamplerState** pResult) + { + return pImpl->CreateSamplerState(D3D11_FILTER_ANISOTROPIC, D3D11_TEXTURE_ADDRESS_WRAP, pResult); + }); +} + + +ID3D11SamplerState* CommonStates::AnisotropicClamp() const +{ + return DemandCreate(pImpl->anisotropicClamp, pImpl->mutex, [&](ID3D11SamplerState** pResult) + { + return pImpl->CreateSamplerState(D3D11_FILTER_ANISOTROPIC, D3D11_TEXTURE_ADDRESS_CLAMP, pResult); + }); +} diff --git a/Sdk/External/DirectXTK/Src/DDS.h b/Sdk/External/DirectXTK/Src/DDS.h new file mode 100644 index 0000000..6b02437 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DDS.h @@ -0,0 +1,273 @@ +//-------------------------------------------------------------------------------------- +// dds.h +// +// This header defines constants and structures that are useful when parsing +// DDS files. DDS files were originally designed to use several structures +// and constants that are native to DirectDraw and are defined in ddraw.h, +// such as DDSURFACEDESC2 and DDSCAPS2. This file defines similar +// (compatible) constants and structures so that one can use DDS files +// without needing to include ddraw.h. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include + +namespace DirectX +{ + +#pragma pack(push,1) + +constexpr uint32_t DDS_MAGIC = 0x20534444; // "DDS " + +struct DDS_PIXELFORMAT +{ + uint32_t size; + uint32_t flags; + uint32_t fourCC; + uint32_t RGBBitCount; + uint32_t RBitMask; + uint32_t GBitMask; + uint32_t BBitMask; + uint32_t ABitMask; +}; + +#define DDS_FOURCC 0x00000004 // DDPF_FOURCC +#define DDS_RGB 0x00000040 // DDPF_RGB +#define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS +#define DDS_LUMINANCE 0x00020000 // DDPF_LUMINANCE +#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS +#define DDS_ALPHAPIXELS 0x00000001 // DDPF_ALPHAPIXELS +#define DDS_ALPHA 0x00000002 // DDPF_ALPHA +#define DDS_PAL8 0x00000020 // DDPF_PALETTEINDEXED8 +#define DDS_PAL8A 0x00000021 // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS +#define DDS_BUMPDUDV 0x00080000 // DDPF_BUMPDUDV +// DDS_BUMPLUMINANCE 0x00040000 + +#ifndef MAKEFOURCC + #define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + (static_cast(static_cast(ch0)) \ + | (static_cast(static_cast(ch1)) << 8) \ + | (static_cast(static_cast(ch2)) << 16) \ + | (static_cast(static_cast(ch3)) << 24)) +#endif /* defined(MAKEFOURCC) */ + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DXT1 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','T','1'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DXT2 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','T','2'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DXT3 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','T','3'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DXT4 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','T','4'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DXT5 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','T','5'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_BC4_UNORM = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('B','C','4','U'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_BC4_SNORM = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('B','C','4','S'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_BC5_UNORM = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('B','C','5','U'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_BC5_SNORM = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('B','C','5','S'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_R8G8_B8G8 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('R','G','B','G'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_G8R8_G8B8 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('G','R','G','B'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_YUY2 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('Y','U','Y','2'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_UYVY = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('U','Y','V','Y'), 0, 0, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8R8G8B8 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_X8R8G8B8 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8B8G8R8 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_X8B8G8R8 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_G16R16 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x0000ffff, 0xffff0000, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_R5G6B5 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 16, 0xf800, 0x07e0, 0x001f, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A1R5G5B5 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 16, 0x7c00, 0x03e0, 0x001f, 0x8000 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_X1R5G5B5 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 16, 0x7c00, 0x03e0, 0x001f, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A4R4G4B4 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 16, 0x0f00, 0x00f0, 0x000f, 0xf000 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_X4R4G4B4 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 16, 0x0f00, 0x00f0, 0x000f, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_R8G8B8 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0xff0000, 0x00ff00, 0x0000ff, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8R3G3B2 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 16, 0x00e0, 0x001c, 0x0003, 0xff00 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_R3G3B2 = + { sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 8, 0xe0, 0x1c, 0x03, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A4L4 = + { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCEA, 0, 8, 0x0f, 0, 0, 0xf0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_L8 = + { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCE, 0, 8, 0xff, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_L16 = + { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCE, 0, 16, 0xffff, 0, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8L8 = + { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCEA, 0, 16, 0x00ff, 0, 0, 0xff00 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8L8_ALT = + { sizeof(DDS_PIXELFORMAT), DDS_LUMINANCEA, 0, 8, 0x00ff, 0, 0, 0xff00 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A8 = + { sizeof(DDS_PIXELFORMAT), DDS_ALPHA, 0, 8, 0, 0, 0, 0xff }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_V8U8 = + { sizeof(DDS_PIXELFORMAT), DDS_BUMPDUDV, 0, 16, 0x00ff, 0xff00, 0, 0 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_Q8W8V8U8 = + { sizeof(DDS_PIXELFORMAT), DDS_BUMPDUDV, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }; + +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_V16U16 = + { sizeof(DDS_PIXELFORMAT), DDS_BUMPDUDV, 0, 32, 0x0000ffff, 0xffff0000, 0, 0 }; + +// D3DFMT_A2R10G10B10/D3DFMT_A2B10G10R10 should be written using DX10 extension to avoid D3DX 10:10:10:2 reversal issue +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A2R10G10B10 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 }; +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_A2B10G10R10 = + { sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 }; + +// We do not support the following legacy Direct3D 9 formats: +// DDSPF_A2W10V10U10 = { sizeof(DDS_PIXELFORMAT), DDS_BUMPDUDV, 0, 32, 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 }; +// DDSPF_L6V5U5 = { sizeof(DDS_PIXELFORMAT), DDS_BUMPLUMINANCE, 0, 16, 0x001f, 0x03e0, 0xfc00, 0 }; +// DDSPF_X8L8V8U8 = { sizeof(DDS_PIXELFORMAT), DDS_BUMPLUMINANCE, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0 }; + +// This indicates the DDS_HEADER_DXT10 extension is present (the format is in dxgiFormat) +extern __declspec(selectany) const DDS_PIXELFORMAT DDSPF_DX10 = + { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','1','0'), 0, 0, 0, 0, 0 }; + +#define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT +#define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT +#define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH +#define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH +#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE + +#define DDS_HEIGHT 0x00000002 // DDSD_HEIGHT +#define DDS_WIDTH 0x00000004 // DDSD_WIDTH + +#define DDS_SURFACE_FLAGS_TEXTURE 0x00001000 // DDSCAPS_TEXTURE +#define DDS_SURFACE_FLAGS_MIPMAP 0x00400008 // DDSCAPS_COMPLEX | DDSCAPS_MIPMAP +#define DDS_SURFACE_FLAGS_CUBEMAP 0x00000008 // DDSCAPS_COMPLEX + +#define DDS_CUBEMAP_POSITIVEX 0x00000600 // DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_POSITIVEX +#define DDS_CUBEMAP_NEGATIVEX 0x00000a00 // DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_NEGATIVEX +#define DDS_CUBEMAP_POSITIVEY 0x00001200 // DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_POSITIVEY +#define DDS_CUBEMAP_NEGATIVEY 0x00002200 // DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_NEGATIVEY +#define DDS_CUBEMAP_POSITIVEZ 0x00004200 // DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_POSITIVEZ +#define DDS_CUBEMAP_NEGATIVEZ 0x00008200 // DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_NEGATIVEZ + +#define DDS_CUBEMAP_ALLFACES ( DDS_CUBEMAP_POSITIVEX | DDS_CUBEMAP_NEGATIVEX |\ + DDS_CUBEMAP_POSITIVEY | DDS_CUBEMAP_NEGATIVEY |\ + DDS_CUBEMAP_POSITIVEZ | DDS_CUBEMAP_NEGATIVEZ ) + +#define DDS_CUBEMAP 0x00000200 // DDSCAPS2_CUBEMAP + +#define DDS_FLAGS_VOLUME 0x00200000 // DDSCAPS2_VOLUME + +// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION +enum DDS_RESOURCE_DIMENSION : uint32_t +{ + DDS_DIMENSION_TEXTURE1D = 2, + DDS_DIMENSION_TEXTURE2D = 3, + DDS_DIMENSION_TEXTURE3D = 4, +}; + +// Subset here matches D3D10_RESOURCE_MISC_FLAG and D3D11_RESOURCE_MISC_FLAG +enum DDS_RESOURCE_MISC_FLAG : uint32_t +{ + DDS_RESOURCE_MISC_TEXTURECUBE = 0x4L, +}; + +enum DDS_MISC_FLAGS2 : uint32_t +{ + DDS_MISC_FLAGS2_ALPHA_MODE_MASK = 0x7L, +}; + +#ifndef DDS_ALPHA_MODE_DEFINED +#define DDS_ALPHA_MODE_DEFINED +enum DDS_ALPHA_MODE : uint32_t +{ + DDS_ALPHA_MODE_UNKNOWN = 0, + DDS_ALPHA_MODE_STRAIGHT = 1, + DDS_ALPHA_MODE_PREMULTIPLIED = 2, + DDS_ALPHA_MODE_OPAQUE = 3, + DDS_ALPHA_MODE_CUSTOM = 4, +}; +#endif + +struct DDS_HEADER +{ + uint32_t size; + uint32_t flags; + uint32_t height; + uint32_t width; + uint32_t pitchOrLinearSize; + uint32_t depth; // only if DDS_HEADER_FLAGS_VOLUME is set in flags + uint32_t mipMapCount; + uint32_t reserved1[11]; + DDS_PIXELFORMAT ddspf; + uint32_t caps; + uint32_t caps2; + uint32_t caps3; + uint32_t caps4; + uint32_t reserved2; +}; + +struct DDS_HEADER_DXT10 +{ + DXGI_FORMAT dxgiFormat; + uint32_t resourceDimension; + uint32_t miscFlag; // see D3D11_RESOURCE_MISC_FLAG + uint32_t arraySize; + uint32_t miscFlags2; // see DDS_MISC_FLAGS2 +}; + +#pragma pack(pop) + +static_assert( sizeof(DDS_HEADER) == 124, "DDS Header size mismatch" ); +static_assert( sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch"); + +} // namespace diff --git a/Sdk/External/DirectXTK/Src/DDSTextureLoader.cpp b/Sdk/External/DirectXTK/Src/DDSTextureLoader.cpp new file mode 100644 index 0000000..f4bff58 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DDSTextureLoader.cpp @@ -0,0 +1,1335 @@ +//-------------------------------------------------------------------------------------- +// File: DDSTextureLoader.cpp +// +// Functions for loading a DDS texture and creating a Direct3D runtime resource for it +// +// Note these functions are useful as a light-weight runtime loader for DDS files. For +// a full-featured DDS file reader, writer, and texture processing pipeline see +// the 'Texconv' sample and the 'DirectXTex' library. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include "DDSTextureLoader.h" + +#include "PlatformHelpers.h" +#include "DDS.h" +#include "DirectXHelpers.h" +#include "LoaderHelpers.h" + +using namespace DirectX; +using namespace DirectX::LoaderHelpers; + +static_assert(static_cast(DDS_DIMENSION_TEXTURE1D) == static_cast(D3D11_RESOURCE_DIMENSION_TEXTURE1D), "dds mismatch"); +static_assert(static_cast(DDS_DIMENSION_TEXTURE2D) == static_cast(D3D11_RESOURCE_DIMENSION_TEXTURE2D), "dds mismatch"); +static_assert(static_cast(DDS_DIMENSION_TEXTURE3D) == static_cast(D3D11_RESOURCE_DIMENSION_TEXTURE3D), "dds mismatch"); +static_assert(static_cast(DDS_RESOURCE_MISC_TEXTURECUBE) == static_cast(D3D11_RESOURCE_MISC_TEXTURECUBE), "dds mismatch"); + +namespace +{ + //-------------------------------------------------------------------------------------- + HRESULT FillInitData( + _In_ size_t width, + _In_ size_t height, + _In_ size_t depth, + _In_ size_t mipCount, + _In_ size_t arraySize, + _In_ DXGI_FORMAT format, + _In_ size_t maxsize, + _In_ size_t bitSize, + _In_reads_bytes_(bitSize) const uint8_t* bitData, + _Out_ size_t& twidth, + _Out_ size_t& theight, + _Out_ size_t& tdepth, + _Out_ size_t& skipMip, + _Out_writes_(mipCount*arraySize) D3D11_SUBRESOURCE_DATA* initData) noexcept + { + if (!bitData || !initData) + { + return E_POINTER; + } + + skipMip = 0; + twidth = 0; + theight = 0; + tdepth = 0; + + size_t NumBytes = 0; + size_t RowBytes = 0; + const uint8_t* pSrcBits = bitData; + const uint8_t* pEndBits = bitData + bitSize; + + size_t index = 0; + for (size_t j = 0; j < arraySize; j++) + { + size_t w = width; + size_t h = height; + size_t d = depth; + for (size_t i = 0; i < mipCount; i++) + { + HRESULT hr = GetSurfaceInfo(w, h, format, &NumBytes, &RowBytes, nullptr); + if (FAILED(hr)) + return hr; + + if (NumBytes > UINT32_MAX || RowBytes > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + + if ((mipCount <= 1) || !maxsize || (w <= maxsize && h <= maxsize && d <= maxsize)) + { + if (!twidth) + { + twidth = w; + theight = h; + tdepth = d; + } + + assert(index < mipCount * arraySize); + _Analysis_assume_(index < mipCount * arraySize); + initData[index].pSysMem = pSrcBits; + initData[index].SysMemPitch = static_cast(RowBytes); + initData[index].SysMemSlicePitch = static_cast(NumBytes); + ++index; + } + else if (!j) + { + // Count number of skipped mipmaps (first item only) + ++skipMip; + } + + if (pSrcBits + (NumBytes*d) > pEndBits) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + pSrcBits += NumBytes * d; + + w = w >> 1; + h = h >> 1; + d = d >> 1; + if (w == 0) + { + w = 1; + } + if (h == 0) + { + h = 1; + } + if (d == 0) + { + d = 1; + } + } + } + + return (index > 0) ? S_OK : E_FAIL; + } + + //-------------------------------------------------------------------------------------- + HRESULT CreateD3DResources( + _In_ ID3D11Device* d3dDevice, + _In_ uint32_t resDim, + _In_ size_t width, + _In_ size_t height, + _In_ size_t depth, + _In_ size_t mipCount, + _In_ size_t arraySize, + _In_ DXGI_FORMAT format, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ bool forceSRGB, + _In_ bool isCubeMap, + _In_reads_opt_(mipCount*arraySize) const D3D11_SUBRESOURCE_DATA* initData, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept + { + if (!d3dDevice) + return E_POINTER; + + HRESULT hr = E_FAIL; + + if (forceSRGB) + { + format = MakeSRGB(format); + } + + switch (resDim) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + { + D3D11_TEXTURE1D_DESC desc = {}; + desc.Width = static_cast(width); + desc.MipLevels = static_cast(mipCount); + desc.ArraySize = static_cast(arraySize); + desc.Format = format; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = cpuAccessFlags; + desc.MiscFlags = miscFlags & ~static_cast(D3D11_RESOURCE_MISC_TEXTURECUBE); + + ID3D11Texture1D* tex = nullptr; + hr = d3dDevice->CreateTexture1D(&desc, + initData, + &tex + ); + if (SUCCEEDED(hr) && tex) + { + if (textureView) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = format; + + if (arraySize > 1) + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + SRVDesc.Texture1DArray.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + SRVDesc.Texture1DArray.ArraySize = static_cast(arraySize); + } + else + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + SRVDesc.Texture1D.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + } + + hr = d3dDevice->CreateShaderResourceView(tex, + &SRVDesc, + textureView + ); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + } + + if (texture) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "DDSTextureLoader"); + tex->Release(); + } + } + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + { + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.MipLevels = static_cast(mipCount); + desc.ArraySize = static_cast(arraySize); + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = cpuAccessFlags; + if (isCubeMap) + { + desc.MiscFlags = miscFlags | D3D11_RESOURCE_MISC_TEXTURECUBE; + } + else + { + desc.MiscFlags = miscFlags & ~static_cast(D3D11_RESOURCE_MISC_TEXTURECUBE); + } + + ID3D11Texture2D* tex = nullptr; + hr = d3dDevice->CreateTexture2D(&desc, + initData, + &tex + ); + if (SUCCEEDED(hr) && tex) + { + if (textureView) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = format; + + if (isCubeMap) + { + if (arraySize > 6) + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBEARRAY; + SRVDesc.TextureCubeArray.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + + // Earlier we set arraySize to (NumCubes * 6) + SRVDesc.TextureCubeArray.NumCubes = static_cast(arraySize / 6); + } + else + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + SRVDesc.TextureCube.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + } + } + else if (arraySize > 1) + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + SRVDesc.Texture2DArray.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + SRVDesc.Texture2DArray.ArraySize = static_cast(arraySize); + } + else + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + SRVDesc.Texture2D.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + } + + hr = d3dDevice->CreateShaderResourceView(tex, + &SRVDesc, + textureView + ); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + } + + if (texture) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "DDSTextureLoader"); + tex->Release(); + } + } + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + { + D3D11_TEXTURE3D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.Depth = static_cast(depth); + desc.MipLevels = static_cast(mipCount); + desc.Format = format; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = cpuAccessFlags; + desc.MiscFlags = miscFlags & ~UINT(D3D11_RESOURCE_MISC_TEXTURECUBE); + + ID3D11Texture3D* tex = nullptr; + hr = d3dDevice->CreateTexture3D(&desc, + initData, + &tex + ); + if (SUCCEEDED(hr) && tex) + { + if (textureView) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = format; + + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + SRVDesc.Texture3D.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; + + hr = d3dDevice->CreateShaderResourceView(tex, + &SRVDesc, + textureView + ); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + } + + if (texture) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "DDSTextureLoader"); + tex->Release(); + } + } + } + break; + } + + return hr; + } + + //-------------------------------------------------------------------------------------- + HRESULT CreateTextureFromDDS( + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, +#if defined(_XBOX_ONE) && defined(_TITLE) + _In_opt_ ID3D11DeviceX* d3dDeviceX, + _In_opt_ ID3D11DeviceContextX* d3dContextX, +#endif + _In_ const DDS_HEADER* header, + _In_reads_bytes_(bitSize) const uint8_t* bitData, + _In_ size_t bitSize, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ bool forceSRGB, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept + { + HRESULT hr = S_OK; + + UINT width = header->width; + UINT height = header->height; + UINT depth = header->depth; + + uint32_t resDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; + UINT arraySize = 1; + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + bool isCubeMap = false; + + size_t mipCount = header->mipMapCount; + if (0 == mipCount) + { + mipCount = 1; + } + + if ((header->ddspf.flags & DDS_FOURCC) && + (MAKEFOURCC('D', 'X', '1', '0') == header->ddspf.fourCC)) + { + auto d3d10ext = reinterpret_cast(reinterpret_cast(header) + sizeof(DDS_HEADER)); + + arraySize = d3d10ext->arraySize; + if (arraySize == 0) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + + switch (d3d10ext->dxgiFormat) + { + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: + case DXGI_FORMAT_A8P8: + DebugTrace("ERROR: DDSTextureLoader does not support video textures. Consider using DirectXTex instead.\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + default: + if (BitsPerPixel(d3d10ext->dxgiFormat) == 0) + { + DebugTrace("ERROR: Unknown DXGI format (%u)\n", static_cast(d3d10ext->dxgiFormat)); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + } + + format = d3d10ext->dxgiFormat; + + switch (d3d10ext->resourceDimension) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + // D3DX writes 1D textures with a fixed Height of 1 + if ((header->flags & DDS_HEIGHT) && height != 1) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + height = depth = 1; + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + if (d3d10ext->miscFlag & D3D11_RESOURCE_MISC_TEXTURECUBE) + { + arraySize *= 6; + isCubeMap = true; + } + depth = 1; + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + if (!(header->flags & DDS_HEADER_FLAGS_VOLUME)) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + + if (arraySize > 1) + { + DebugTrace("ERROR: Volume textures are not texture arrays\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + case D3D11_RESOURCE_DIMENSION_BUFFER: + DebugTrace("ERROR: Resource dimension buffer type not supported for textures\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + case D3D11_RESOURCE_DIMENSION_UNKNOWN: + default: + DebugTrace("ERROR: Unknown resource dimension (%u)\n", static_cast(d3d10ext->resourceDimension)); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + resDim = d3d10ext->resourceDimension; + } + else + { + format = GetDXGIFormat(header->ddspf); + + if (format == DXGI_FORMAT_UNKNOWN) + { + DebugTrace("ERROR: DDSTextureLoader does not support all legacy DDS formats. Consider using DirectXTex.\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + if (header->flags & DDS_HEADER_FLAGS_VOLUME) + { + resDim = D3D11_RESOURCE_DIMENSION_TEXTURE3D; + } + else + { + if (header->caps2 & DDS_CUBEMAP) + { + // We require all six faces to be defined + if ((header->caps2 & DDS_CUBEMAP_ALLFACES) != DDS_CUBEMAP_ALLFACES) + { + DebugTrace("ERROR: DirectX 11 does not support partial cubemaps\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + arraySize = 6; + isCubeMap = true; + } + + depth = 1; + resDim = D3D11_RESOURCE_DIMENSION_TEXTURE2D; + + // Note there's no way for a legacy Direct3D 9 DDS to express a '1D' texture + } + + assert(BitsPerPixel(format) != 0); + } + + if ((miscFlags & D3D11_RESOURCE_MISC_TEXTURECUBE) + && (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE2D) + && ((arraySize % 6) == 0)) + { + isCubeMap = true; + } + + // Bound sizes (for security purposes we don't trust DDS file metadata larger than the Direct3D hardware requirements) + if (mipCount > D3D11_REQ_MIP_LEVELS) + { + DebugTrace("ERROR: Too many mipmap levels defined for DirectX 11 (%zu).\n", mipCount); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + switch (resDim) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + if ((arraySize > D3D11_REQ_TEXTURE1D_ARRAY_AXIS_DIMENSION) || + (width > D3D11_REQ_TEXTURE1D_U_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (1D: array %u, size %u)\n", arraySize, width); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + if (isCubeMap) + { + // This is the right bound because we set arraySize to (NumCubes*6) above + if ((arraySize > D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION) || + (width > D3D11_REQ_TEXTURECUBE_DIMENSION) || + (height > D3D11_REQ_TEXTURECUBE_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (2D cubemap: array %u, size %u by %u)\n", arraySize, width, height); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + } + else if ((arraySize > D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION) || + (width > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION) || + (height > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (2D: array %u, size %u by %u)\n", arraySize, width, height); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + if ((arraySize > 1) || + (width > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) || + (height > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) || + (depth > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION)) + { + DebugTrace("ERROR: Resource dimensions too large for DirectX 11 (3D: array %u, size %u by %u by %u)\n", arraySize, width, height, depth); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + case D3D11_RESOURCE_DIMENSION_BUFFER: + DebugTrace("ERROR: Resource dimension buffer type not supported for textures\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + case D3D11_RESOURCE_DIMENSION_UNKNOWN: + default: + DebugTrace("ERROR: Unknown resource dimension (%u)\n", static_cast(resDim)); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + bool autogen = false; + if (mipCount == 1 && d3dContext && textureView) // Must have context and shader-view to auto generate mipmaps + { + // See if format is supported for auto-gen mipmaps (varies by feature level) + UINT fmtSupport = 0; + hr = d3dDevice->CheckFormatSupport(format, &fmtSupport); + if (SUCCEEDED(hr) && (fmtSupport & D3D11_FORMAT_SUPPORT_MIP_AUTOGEN)) + { + // 10level9 feature levels do not support auto-gen mipgen for volume textures + if ((resDim != D3D11_RESOURCE_DIMENSION_TEXTURE3D) + || (d3dDevice->GetFeatureLevel() >= D3D_FEATURE_LEVEL_10_0)) + { + autogen = true; +#if defined(_XBOX_ONE) && defined(_TITLE) + if (!d3dDeviceX || !d3dContextX) + return E_INVALIDARG; +#endif + } + } + } + + if (autogen) + { + // Create texture with auto-generated mipmaps + ID3D11Resource* tex = nullptr; + hr = CreateD3DResources(d3dDevice, + resDim, width, height, depth, 0, arraySize, + format, + usage, + bindFlags | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, + cpuAccessFlags, + miscFlags | D3D11_RESOURCE_MISC_GENERATE_MIPS, forceSRGB, + isCubeMap, + nullptr, + &tex, textureView); + if (SUCCEEDED(hr)) + { + size_t numBytes = 0; + size_t rowBytes = 0; + hr = GetSurfaceInfo(width, height, format, &numBytes, &rowBytes, nullptr); + if (FAILED(hr)) + return hr; + + if (numBytes > bitSize) + { + (*textureView)->Release(); + *textureView = nullptr; + tex->Release(); + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + if (numBytes > UINT32_MAX || rowBytes > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + + D3D11_SHADER_RESOURCE_VIEW_DESC desc = {}; + (*textureView)->GetDesc(&desc); + + UINT mipLevels = 1; + + switch (desc.ViewDimension) + { + case D3D_SRV_DIMENSION_TEXTURE1D: mipLevels = desc.Texture1D.MipLevels; break; + case D3D_SRV_DIMENSION_TEXTURE1DARRAY: mipLevels = desc.Texture1DArray.MipLevels; break; + case D3D_SRV_DIMENSION_TEXTURE2D: mipLevels = desc.Texture2D.MipLevels; break; + case D3D_SRV_DIMENSION_TEXTURE2DARRAY: mipLevels = desc.Texture2DArray.MipLevels; break; + case D3D_SRV_DIMENSION_TEXTURECUBE: mipLevels = desc.TextureCube.MipLevels; break; + case D3D_SRV_DIMENSION_TEXTURECUBEARRAY:mipLevels = desc.TextureCubeArray.MipLevels; break; + case D3D_SRV_DIMENSION_TEXTURE3D: mipLevels = desc.Texture3D.MipLevels; break; + default: + (*textureView)->Release(); + *textureView = nullptr; + tex->Release(); + return E_UNEXPECTED; + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + + std::unique_ptr initData(new (std::nothrow) D3D11_SUBRESOURCE_DATA[arraySize]); + if (!initData) + { + return E_OUTOFMEMORY; + } + + const uint8_t* pSrcBits = bitData; + const uint8_t* pEndBits = bitData + bitSize; + for (UINT item = 0; item < arraySize; ++item) + { + if ((pSrcBits + numBytes) > pEndBits) + { + (*textureView)->Release(); + *textureView = nullptr; + tex->Release(); + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + initData[item].pSysMem = pSrcBits; + initData[item].SysMemPitch = static_cast(rowBytes); + initData[item].SysMemSlicePitch = static_cast(numBytes); + pSrcBits += numBytes; + } + + ID3D11Resource* pStaging = nullptr; + switch (resDim) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + { + ID3D11Texture1D *temp = nullptr; + CD3D11_TEXTURE1D_DESC stagingDesc(format, width, arraySize, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ); + hr = d3dDevice->CreateTexture1D(&stagingDesc, initData.get(), &temp); + if (SUCCEEDED(hr)) + pStaging = temp; + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + { + ID3D11Texture2D *temp = nullptr; + CD3D11_TEXTURE2D_DESC stagingDesc(format, width, height, arraySize, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ, 1, 0, isCubeMap ? D3D11_RESOURCE_MISC_TEXTURECUBE : 0); + hr = d3dDevice->CreateTexture2D(&stagingDesc, initData.get(), &temp); + if (SUCCEEDED(hr)) + pStaging = temp; + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + { + ID3D11Texture3D *temp = nullptr; + CD3D11_TEXTURE3D_DESC stagingDesc(format, width, height, depth, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ); + hr = d3dDevice->CreateTexture3D(&stagingDesc, initData.get(), &temp); + if (SUCCEEDED(hr)) + pStaging = temp; + } + break; + }; + + if (SUCCEEDED(hr)) + { + for (UINT item = 0; item < arraySize; ++item) + { + UINT res = D3D11CalcSubresource(0, item, mipLevels); + d3dContext->CopySubresourceRegion(tex, res, 0, 0, 0, pStaging, item, nullptr); + } + + UINT64 copyFence = d3dContextX->InsertFence(0); + while (d3dDeviceX->IsFencePending(copyFence)) { SwitchToThread(); } + pStaging->Release(); + } +#else + if (arraySize > 1) + { + const uint8_t* pSrcBits = bitData; + const uint8_t* pEndBits = bitData + bitSize; + for (UINT item = 0; item < arraySize; ++item) + { + if ((pSrcBits + numBytes) > pEndBits) + { + (*textureView)->Release(); + *textureView = nullptr; + tex->Release(); + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + UINT res = D3D11CalcSubresource(0, item, mipLevels); + d3dContext->UpdateSubresource(tex, res, nullptr, pSrcBits, static_cast(rowBytes), static_cast(numBytes)); + pSrcBits += numBytes; + } + } + else + { + d3dContext->UpdateSubresource(tex, 0, nullptr, bitData, static_cast(rowBytes), static_cast(numBytes)); + } +#endif + + d3dContext->GenerateMips(*textureView); + + if (texture) + { + *texture = tex; + } + else + { + tex->Release(); + } + } + } + else + { + // Create the texture + std::unique_ptr initData(new (std::nothrow) D3D11_SUBRESOURCE_DATA[mipCount * arraySize]); + if (!initData) + { + return E_OUTOFMEMORY; + } + + size_t skipMip = 0; + size_t twidth = 0; + size_t theight = 0; + size_t tdepth = 0; + hr = FillInitData(width, height, depth, mipCount, arraySize, format, + maxsize, bitSize, bitData, + twidth, theight, tdepth, skipMip, initData.get()); + + if (SUCCEEDED(hr)) + { + hr = CreateD3DResources(d3dDevice, + resDim, twidth, theight, tdepth, mipCount - skipMip, arraySize, + format, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + isCubeMap, + initData.get(), + texture, textureView); + + if (FAILED(hr) && !maxsize && (mipCount > 1)) + { + // Retry with a maxsize determined by feature level + switch (d3dDevice->GetFeatureLevel()) + { + case D3D_FEATURE_LEVEL_9_1: + case D3D_FEATURE_LEVEL_9_2: + if (isCubeMap) + { + maxsize = 512u /*D3D_FL9_1_REQ_TEXTURECUBE_DIMENSION*/; + } + else + { + maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) + ? 256u /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/ + : 2048u /*D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; + } + break; + + case D3D_FEATURE_LEVEL_9_3: + maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) + ? 256u /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/ + : 4096u /*D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; + break; + + default: // D3D_FEATURE_LEVEL_10_0 & D3D_FEATURE_LEVEL_10_1 + maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) + ? 2048u /*D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/ + : 8192u /*D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; + break; + } + + hr = FillInitData(width, height, depth, mipCount, arraySize, format, + maxsize, bitSize, bitData, + twidth, theight, tdepth, skipMip, initData.get()); + if (SUCCEEDED(hr)) + { + hr = CreateD3DResources(d3dDevice, + resDim, twidth, theight, tdepth, mipCount - skipMip, arraySize, + format, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + isCubeMap, + initData.get(), + texture, textureView); + } + } + } + } + + return hr; + } + + //-------------------------------------------------------------------------------------- + void SetDebugTextureInfo( + _In_z_ const wchar_t* fileName, + _In_opt_ ID3D11Resource** texture, + _In_opt_ ID3D11ShaderResourceView** textureView) noexcept + { +#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) ) + if (texture || textureView) + { +#if defined(_XBOX_ONE) && defined(_TITLE) + const wchar_t* pstrName = wcsrchr(fileName, '\\'); + if (!pstrName) + { + pstrName = fileName; + } + else + { + pstrName++; + } + if (texture && *texture) + { + (*texture)->SetName(pstrName); + } + if (textureView && *textureView) + { + (*textureView)->SetName(pstrName); + } +#else + CHAR strFileA[MAX_PATH]; + int result = WideCharToMultiByte(CP_UTF8, + WC_NO_BEST_FIT_CHARS, + fileName, + -1, + strFileA, + MAX_PATH, + nullptr, + nullptr + ); + if (result > 0) + { + const char* pstrName = strrchr(strFileA, '\\'); + if (!pstrName) + { + pstrName = strFileA; + } + else + { + pstrName++; + } + + if (texture && *texture) + { + (*texture)->SetPrivateData(WKPDID_D3DDebugObjectName, + static_cast(strnlen_s(pstrName, MAX_PATH)), + pstrName + ); + } + + if (textureView && *textureView) + { + (*textureView)->SetPrivateData(WKPDID_D3DDebugObjectName, + static_cast(strnlen_s(pstrName, MAX_PATH)), + pstrName + ); + } + } +#endif + } +#else + UNREFERENCED_PARAMETER(fileName); + UNREFERENCED_PARAMETER(texture); + UNREFERENCED_PARAMETER(textureView); +#endif + } +} // anonymous namespace + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::CreateDDSTextureFromMemory( + ID3D11Device* d3dDevice, + const uint8_t* ddsData, + size_t ddsDataSize, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + return CreateDDSTextureFromMemoryEx(d3dDevice, + ddsData, ddsDataSize, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + false, + texture, textureView, alphaMode); +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) +HRESULT DirectX::CreateDDSTextureFromMemory( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else +HRESULT DirectX::CreateDDSTextureFromMemory( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const uint8_t* ddsData, + size_t ddsDataSize, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + return CreateDDSTextureFromMemoryEx(d3dDevice, d3dContext, + ddsData, ddsDataSize, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + false, + texture, textureView, alphaMode); +} + +_Use_decl_annotations_ +HRESULT DirectX::CreateDDSTextureFromMemoryEx( + ID3D11Device* d3dDevice, + const uint8_t* ddsData, + size_t ddsDataSize, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + bool forceSRGB, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + if (alphaMode) + { + *alphaMode = DDS_ALPHA_MODE_UNKNOWN; + } + + if (!d3dDevice || !ddsData || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + // Validate DDS file in memory + const DDS_HEADER* header = nullptr; + const uint8_t* bitData = nullptr; + size_t bitSize = 0; + + HRESULT hr = LoadTextureDataFromMemory(ddsData, ddsDataSize, + &header, + &bitData, + &bitSize + ); + if (FAILED(hr)) + { + return hr; + } + + hr = CreateTextureFromDDS(d3dDevice, nullptr, + #if defined(_XBOX_ONE) && defined(_TITLE) + nullptr, nullptr, + #endif + header, bitData, bitSize, + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + texture, textureView); + if (SUCCEEDED(hr)) + { + if (texture && *texture) + { + SetDebugObjectName(*texture, "DDSTextureLoader"); + } + + if (textureView && *textureView) + { + SetDebugObjectName(*textureView, "DDSTextureLoader"); + } + + if (alphaMode) + *alphaMode = GetAlphaMode(header); + } + + return hr; +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) +HRESULT DirectX::CreateDDSTextureFromMemoryEx( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else +HRESULT DirectX::CreateDDSTextureFromMemoryEx( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const uint8_t* ddsData, + size_t ddsDataSize, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + bool forceSRGB, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + if (alphaMode) + { + *alphaMode = DDS_ALPHA_MODE_UNKNOWN; + } + + if (!d3dDevice || !ddsData || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + // Validate DDS file in memory + const DDS_HEADER* header = nullptr; + const uint8_t* bitData = nullptr; + size_t bitSize = 0; + + HRESULT hr = LoadTextureDataFromMemory(ddsData, ddsDataSize, + &header, + &bitData, + &bitSize + ); + if (FAILED(hr)) + { + return hr; + } + + hr = CreateTextureFromDDS(d3dDevice, d3dContext, + #if defined(_XBOX_ONE) && defined(_TITLE) + d3dDevice, d3dContext, + #endif + header, bitData, bitSize, + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + texture, textureView); + if (SUCCEEDED(hr)) + { + if (texture && *texture) + { + SetDebugObjectName(*texture, "DDSTextureLoader"); + } + + if (textureView && *textureView) + { + SetDebugObjectName(*textureView, "DDSTextureLoader"); + } + + if (alphaMode) + *alphaMode = GetAlphaMode(header); + } + + return hr; +} + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::CreateDDSTextureFromFile( + ID3D11Device* d3dDevice, + const wchar_t* fileName, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + return CreateDDSTextureFromFileEx(d3dDevice, + fileName, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + false, + texture, textureView, alphaMode); +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) +HRESULT DirectX::CreateDDSTextureFromFile( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else +HRESULT DirectX::CreateDDSTextureFromFile( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const wchar_t* fileName, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + return CreateDDSTextureFromFileEx(d3dDevice, d3dContext, + fileName, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + false, + texture, textureView, alphaMode); +} + +_Use_decl_annotations_ +HRESULT DirectX::CreateDDSTextureFromFileEx( + ID3D11Device* d3dDevice, + const wchar_t* fileName, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + bool forceSRGB, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + if (alphaMode) + { + *alphaMode = DDS_ALPHA_MODE_UNKNOWN; + } + + if (!d3dDevice || !fileName || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + const DDS_HEADER* header = nullptr; + const uint8_t* bitData = nullptr; + size_t bitSize = 0; + + std::unique_ptr ddsData; + HRESULT hr = LoadTextureDataFromFile(fileName, + ddsData, + &header, + &bitData, + &bitSize + ); + if (FAILED(hr)) + { + return hr; + } + + hr = CreateTextureFromDDS(d3dDevice, nullptr, + #if defined(_XBOX_ONE) && defined(_TITLE) + nullptr, nullptr, + #endif + header, bitData, bitSize, + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + texture, textureView); + + if (SUCCEEDED(hr)) + { + SetDebugTextureInfo(fileName, texture, textureView); + + if (alphaMode) + *alphaMode = GetAlphaMode(header); + } + + return hr; +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) +HRESULT DirectX::CreateDDSTextureFromFileEx( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else +HRESULT DirectX::CreateDDSTextureFromFileEx( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const wchar_t* fileName, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + bool forceSRGB, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + DDS_ALPHA_MODE* alphaMode) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + if (alphaMode) + { + *alphaMode = DDS_ALPHA_MODE_UNKNOWN; + } + + if (!d3dDevice || !fileName || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + const DDS_HEADER* header = nullptr; + const uint8_t* bitData = nullptr; + size_t bitSize = 0; + + std::unique_ptr ddsData; + HRESULT hr = LoadTextureDataFromFile(fileName, + ddsData, + &header, + &bitData, + &bitSize + ); + if (FAILED(hr)) + { + return hr; + } + + hr = CreateTextureFromDDS(d3dDevice, d3dContext, + #if defined(_XBOX_ONE) && defined(_TITLE) + d3dDevice, d3dContext, + #endif + header, bitData, bitSize, + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + texture, textureView); + + if (SUCCEEDED(hr)) + { + SetDebugTextureInfo(fileName, texture, textureView); + + if (alphaMode) + *alphaMode = GetAlphaMode(header); + } + + return hr; +} diff --git a/Sdk/External/DirectXTK/Src/DGSLEffect.cpp b/Sdk/External/DirectXTK/Src/DGSLEffect.cpp new file mode 100644 index 0000000..1bb9bce --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DGSLEffect.cpp @@ -0,0 +1,940 @@ +//-------------------------------------------------------------------------------------- +// File: DGSLEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" +#include "DemandCreate.h" + +// +// Based on the Visual Studio 3D Starter Kit +// +// http://aka.ms/vs3dkit +// + +namespace DirectX +{ + namespace EffectDirtyFlags + { + constexpr int ConstantBufferMaterial = 0x10000; + constexpr int ConstantBufferLight = 0x20000; + constexpr int ConstantBufferObject = 0x40000; + constexpr int ConstantBufferMisc = 0x80000; + constexpr int ConstantBufferBones = 0x100000; + } +} + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +namespace +{ + // Constant buffer layout. Must match the shader! +#pragma pack(push,1) + +// Slot 0 + struct MaterialConstants + { + XMVECTOR Ambient; + XMVECTOR Diffuse; + XMVECTOR Specular; + XMVECTOR Emissive; + float SpecularPower; + float Padding0; + float Padding1; + float Padding2; + }; + + // Slot 1 + struct LightConstants + { + XMVECTOR Ambient; + XMVECTOR LightColor[DGSLEffect::MaxDirectionalLights]; + XMVECTOR LightAttenuation[DGSLEffect::MaxDirectionalLights]; + XMVECTOR LightDirection[DGSLEffect::MaxDirectionalLights]; + XMVECTOR LightSpecularIntensity[DGSLEffect::MaxDirectionalLights]; + UINT IsPointLight[DGSLEffect::MaxDirectionalLights]; + UINT ActiveLights; + float Padding0; + float Padding1; + float Padding2; + }; + + // Note - DGSL does not appear to make use of LightAttenuation or IsPointLight. Not sure if it uses ActiveLights either. + + // Slot 2 + struct ObjectConstants + { + XMMATRIX LocalToWorld4x4; + XMMATRIX LocalToProjected4x4; + XMMATRIX WorldToLocal4x4; + XMMATRIX WorldToView4x4; + XMMATRIX UvTransform4x4; + XMVECTOR EyePosition; + }; + + // Slot 3 + struct MiscConstants + { + float ViewportWidth; + float ViewportHeight; + float Time; + float Padding1; + }; + + // Slot 4 + struct BoneConstants + { + XMVECTOR Bones[DGSLEffect::MaxBones][3]; + }; + +#pragma pack(pop) + + static_assert((sizeof(MaterialConstants) % 16) == 0, "CB size not padded correctly"); + static_assert((sizeof(LightConstants) % 16) == 0, "CB size not padded correctly"); + static_assert((sizeof(ObjectConstants) % 16) == 0, "CB size not padded correctly"); + static_assert((sizeof(MiscConstants) % 16) == 0, "CB size not padded correctly"); + static_assert((sizeof(BoneConstants) % 16) == 0, "CB size not padded correctly"); + + __declspec(align(16)) struct DGSLEffectConstants + { + MaterialConstants material; + LightConstants light; + ObjectConstants object; + MiscConstants misc; + BoneConstants bones; + }; + + struct DGSLEffectTraits + { + static constexpr int VertexShaderCount = 8; + static constexpr int PixelShaderCount = 12; + + static const ShaderBytecode VertexShaderBytecode[VertexShaderCount]; + static const ShaderBytecode PixelShaderBytecode[PixelShaderCount]; + }; +} + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + // VS + #include "Shaders/Compiled/XboxOneDGSLEffect_main.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_mainVc.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_main1Bones.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_main1BonesVc.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_main2Bones.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_main2BonesVc.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_main4Bones.inc" + #include "Shaders/Compiled/XboxOneDGSLEffect_main4BonesVc.inc" + + // PS + #include "Shaders/Compiled/XboxOneDGSLUnlit_main.inc" + #include "Shaders/Compiled/XboxOneDGSLLambert_main.inc" + #include "Shaders/Compiled/XboxOneDGSLPhong_main.inc" + + #include "Shaders/Compiled/XboxOneDGSLUnlit_mainTk.inc" + #include "Shaders/Compiled/XboxOneDGSLLambert_mainTk.inc" + #include "Shaders/Compiled/XboxOneDGSLPhong_mainTk.inc" + + #include "Shaders/Compiled/XboxOneDGSLUnlit_mainTx.inc" + #include "Shaders/Compiled/XboxOneDGSLLambert_mainTx.inc" + #include "Shaders/Compiled/XboxOneDGSLPhong_mainTx.inc" + + #include "Shaders/Compiled/XboxOneDGSLUnlit_mainTxTk.inc" + #include "Shaders/Compiled/XboxOneDGSLLambert_mainTxTk.inc" + #include "Shaders/Compiled/XboxOneDGSLPhong_mainTxTk.inc" +#else + // VS + #include "Shaders/Compiled/DGSLEffect_main.inc" + #include "Shaders/Compiled/DGSLEffect_mainVc.inc" + #include "Shaders/Compiled/DGSLEffect_main1Bones.inc" + #include "Shaders/Compiled/DGSLEffect_main1BonesVc.inc" + #include "Shaders/Compiled/DGSLEffect_main2Bones.inc" + #include "Shaders/Compiled/DGSLEffect_main2BonesVc.inc" + #include "Shaders/Compiled/DGSLEffect_main4Bones.inc" + #include "Shaders/Compiled/DGSLEffect_main4BonesVc.inc" + + // PS + #include "Shaders/Compiled/DGSLUnlit_main.inc" + #include "Shaders/Compiled/DGSLLambert_main.inc" + #include "Shaders/Compiled/DGSLPhong_main.inc" + + #include "Shaders/Compiled/DGSLUnlit_mainTk.inc" + #include "Shaders/Compiled/DGSLLambert_mainTk.inc" + #include "Shaders/Compiled/DGSLPhong_mainTk.inc" + + #include "Shaders/Compiled/DGSLUnlit_mainTx.inc" + #include "Shaders/Compiled/DGSLLambert_mainTx.inc" + #include "Shaders/Compiled/DGSLPhong_mainTx.inc" + + #include "Shaders/Compiled/DGSLUnlit_mainTxTk.inc" + #include "Shaders/Compiled/DGSLLambert_mainTxTk.inc" + #include "Shaders/Compiled/DGSLPhong_mainTxTk.inc" +#endif +} + + +const ShaderBytecode DGSLEffectTraits::VertexShaderBytecode[] = +{ + { DGSLEffect_main, sizeof(DGSLEffect_main) }, + { DGSLEffect_mainVc, sizeof(DGSLEffect_mainVc) }, + { DGSLEffect_main1Bones, sizeof(DGSLEffect_main1Bones) }, + { DGSLEffect_main1BonesVc, sizeof(DGSLEffect_main1BonesVc) }, + { DGSLEffect_main2Bones, sizeof(DGSLEffect_main2Bones) }, + { DGSLEffect_main2BonesVc, sizeof(DGSLEffect_main2BonesVc) }, + { DGSLEffect_main4Bones, sizeof(DGSLEffect_main4Bones) }, + { DGSLEffect_main4BonesVc, sizeof(DGSLEffect_main4BonesVc) }, +}; + + +const ShaderBytecode DGSLEffectTraits::PixelShaderBytecode[] = +{ + { DGSLUnlit_main, sizeof(DGSLUnlit_main) }, // UNLIT (no texture) + { DGSLLambert_main, sizeof(DGSLLambert_main) }, // LAMBERT (no texture) + { DGSLPhong_main, sizeof(DGSLPhong_main) }, // PHONG (no texture) + + { DGSLUnlit_mainTx, sizeof(DGSLUnlit_mainTx) }, // UNLIT (textured) + { DGSLLambert_mainTx, sizeof(DGSLLambert_mainTx) }, // LAMBERT (textured) + { DGSLPhong_mainTx, sizeof(DGSLPhong_mainTx) }, // PHONG (textured) + + { DGSLUnlit_mainTk, sizeof(DGSLUnlit_mainTk) }, // UNLIT (no texture, discard) + { DGSLLambert_mainTk, sizeof(DGSLLambert_mainTk) }, // LAMBERT (no texture, discard) + { DGSLPhong_mainTk, sizeof(DGSLPhong_mainTk) }, // PHONG (no texture, discard) + + { DGSLUnlit_mainTxTk, sizeof(DGSLUnlit_mainTxTk) }, // UNLIT (textured, discard) + { DGSLLambert_mainTxTk, sizeof(DGSLLambert_mainTxTk) }, // LAMBERT (textured, discard) + { DGSLPhong_mainTxTk, sizeof(DGSLPhong_mainTxTk) }, // PHONG (textured, discard) +}; + + +class DGSLEffect::Impl : public AlignedNew +{ +public: + Impl(_In_ ID3D11Device* device, _In_opt_ ID3D11PixelShader* pixelShader, _In_ bool enableSkinning) : + constants{}, + dirtyFlags(INT_MAX), + vertexColorEnabled(false), + textureEnabled(false), + specularEnabled(false), + alphaDiscardEnabled(false), + weightsPerVertex(enableSkinning ? 4 : 0), + mCBMaterial(device), + mCBLight(device), + mCBObject(device), + mCBMisc(device), + mPixelShader(pixelShader), + mDeviceResources(deviceResourcesPool.DemandCreate(device)) + { + static_assert(_countof(DGSLEffectTraits::VertexShaderBytecode) == DGSLEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(DGSLEffectTraits::PixelShaderBytecode) == DGSLEffectTraits::PixelShaderCount, "array/max mismatch"); + + XMMATRIX id = XMMatrixIdentity(); + world = id; + view = id; + projection = id; + constants.material.Diffuse = g_XMOne; + constants.material.Specular = g_XMOne; + constants.material.SpecularPower = 16; + constants.object.UvTransform4x4 = id; + + static_assert(MaxDirectionalLights == 4, "Mismatch with DGSL pipline"); + for (int i = 0; i < MaxDirectionalLights; ++i) + { + lightEnabled[i] = (i == 0); + lightDiffuseColor[i] = g_XMZero; + lightSpecularColor[i] = g_XMOne; + + constants.light.LightDirection[i] = g_XMNegIdentityR1; + constants.light.LightColor[i] = lightEnabled[i] ? lightDiffuseColor[i] : g_XMZero; + constants.light.LightSpecularIntensity[i] = lightEnabled[i] ? lightSpecularColor[i] : g_XMZero; + } + + if (enableSkinning) + { + mCBBone.Create(device); + + for (size_t j = 0; j < MaxBones; ++j) + { + constants.bones.Bones[j][0] = g_XMIdentityR0; + constants.bones.Bones[j][1] = g_XMIdentityR1; + constants.bones.Bones[j][2] = g_XMIdentityR2; + } + } + } + + // Methods + void Apply(_In_ ID3D11DeviceContext* deviceContext); + void GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) noexcept; + + // Fields + DGSLEffectConstants constants; + + XMMATRIX world; + XMMATRIX view; + XMMATRIX projection; + + bool lightEnabled[MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[MaxDirectionalLights]; + XMVECTOR lightSpecularColor[MaxDirectionalLights]; + + ComPtr textures[MaxTextures]; + + int dirtyFlags; + + bool vertexColorEnabled; + bool textureEnabled; + bool specularEnabled; + bool alphaDiscardEnabled; + int weightsPerVertex; + +private: + ConstantBuffer mCBMaterial; + ConstantBuffer mCBLight; + ConstantBuffer mCBObject; + ConstantBuffer mCBMisc; + ConstantBuffer mCBBone; + ComPtr mPixelShader; + + int GetCurrentVSPermutation() const noexcept; + int GetCurrentPSPermutation() const noexcept; + + // Only one of these helpers is allocated per D3D device, even if there are multiple effect instances. + class DeviceResources : protected EffectDeviceResources + { + public: + DeviceResources(_In_ ID3D11Device* device) noexcept + : EffectDeviceResources(device), + mVertexShaders{}, + mPixelShaders{} + { } + + // Gets or lazily creates the vertex shader. + ID3D11VertexShader* GetVertexShader(int permutation) + { + assert(permutation >= 0 && permutation < DGSLEffectTraits::VertexShaderCount); + + return DemandCreateVertexShader(mVertexShaders[permutation], DGSLEffectTraits::VertexShaderBytecode[permutation]); + } + + // Gets or lazily creates the specified pixel shader permutation. + ID3D11PixelShader* GetPixelShader(int permutation) + { + assert(permutation >= 0 && permutation < DGSLEffectTraits::PixelShaderCount); + + return DemandCreatePixelShader(mPixelShaders[permutation], DGSLEffectTraits::PixelShaderBytecode[permutation]); + } + + // Gets or lazily creates the default texture + ID3D11ShaderResourceView* GetDefaultTexture() { return EffectDeviceResources::GetDefaultTexture(); } + + + private: + ComPtr mVertexShaders[DGSLEffectTraits::VertexShaderCount]; + ComPtr mPixelShaders[DGSLEffectTraits::PixelShaderCount]; + }; + + // Per-device resources. + std::shared_ptr mDeviceResources; + + static SharedResourcePool deviceResourcesPool; +}; + + +// Global pool of per-device DGSLEffect resources. +SharedResourcePool DGSLEffect::Impl::deviceResourcesPool; + + +void DGSLEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + auto vertexShader = mDeviceResources->GetVertexShader(GetCurrentVSPermutation()); + auto pixelShader = mPixelShader.Get(); + if (!pixelShader) + { + pixelShader = mDeviceResources->GetPixelShader(GetCurrentPSPermutation()); + } + + deviceContext->VSSetShader(vertexShader, nullptr, 0); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + // Check for any required matrices updates + if (dirtyFlags & EffectDirtyFlags::WorldViewProj) + { + constants.object.LocalToWorld4x4 = XMMatrixTranspose(world); + constants.object.WorldToView4x4 = XMMatrixTranspose(view); + + XMMATRIX worldView = XMMatrixMultiply(world, view); + + constants.object.LocalToProjected4x4 = XMMatrixTranspose(XMMatrixMultiply(worldView, projection)); + + dirtyFlags &= ~EffectDirtyFlags::WorldViewProj; + dirtyFlags |= EffectDirtyFlags::ConstantBufferObject; + } + + if (dirtyFlags & EffectDirtyFlags::WorldInverseTranspose) + { + XMMATRIX worldInverse = XMMatrixInverse(nullptr, world); + + constants.object.WorldToLocal4x4 = XMMatrixTranspose(worldInverse); + + dirtyFlags &= ~EffectDirtyFlags::WorldInverseTranspose; + dirtyFlags |= EffectDirtyFlags::ConstantBufferObject; + } + + if (dirtyFlags & EffectDirtyFlags::EyePosition) + { + XMMATRIX viewInverse = XMMatrixInverse(nullptr, view); + + constants.object.EyePosition = viewInverse.r[3]; + + dirtyFlags &= ~EffectDirtyFlags::EyePosition; + dirtyFlags |= EffectDirtyFlags::ConstantBufferObject; + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + void* grfxMemoryMaterial; + mCBMaterial.SetData(deviceContext, constants.material, &grfxMemoryMaterial); + + void* grfxMemoryLight; + mCBLight.SetData(deviceContext, constants.light, &grfxMemoryLight); + + void* grfxMemoryObject; + mCBObject.SetData(deviceContext, constants.object, &grfxMemoryObject); + + void *grfxMemoryMisc; + mCBMisc.SetData(deviceContext, constants.misc, &grfxMemoryMisc); + + ComPtr deviceContextX; + ThrowIfFailed(deviceContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceContextX.GetAddressOf()))); + + auto buffer = mCBMaterial.GetBuffer(); + deviceContextX->VSSetPlacementConstantBuffer(0, buffer, grfxMemoryMaterial); + deviceContextX->PSSetPlacementConstantBuffer(0, buffer, grfxMemoryMaterial); + + buffer = mCBLight.GetBuffer(); + deviceContextX->VSSetPlacementConstantBuffer(1, buffer, grfxMemoryMaterial); + deviceContextX->PSSetPlacementConstantBuffer(1, buffer, grfxMemoryMaterial); + + buffer = mCBObject.GetBuffer(); + deviceContextX->VSSetPlacementConstantBuffer(2, buffer, grfxMemoryObject); + deviceContextX->PSSetPlacementConstantBuffer(2, buffer, grfxMemoryObject); + + buffer = mCBMisc.GetBuffer(); + deviceContextX->VSSetPlacementConstantBuffer(3, buffer, grfxMemoryMisc); + deviceContextX->PSSetPlacementConstantBuffer(3, buffer, grfxMemoryMisc); + + if (weightsPerVertex > 0) + { + void* grfxMemoryBone; + mCBBone.SetData(deviceContext, constants.bones, &grfxMemoryBone); + + deviceContextX->VSSetPlacementConstantBuffer(4, mCBBone.GetBuffer(), grfxMemoryBone); + } +#else + // Make sure the constant buffers are up to date. + if (dirtyFlags & EffectDirtyFlags::ConstantBufferMaterial) + { + mCBMaterial.SetData(deviceContext, constants.material); + + dirtyFlags &= ~EffectDirtyFlags::ConstantBufferMaterial; + } + + if (dirtyFlags & EffectDirtyFlags::ConstantBufferLight) + { + mCBLight.SetData(deviceContext, constants.light); + + dirtyFlags &= ~EffectDirtyFlags::ConstantBufferLight; + } + + if (dirtyFlags & EffectDirtyFlags::ConstantBufferObject) + { + mCBObject.SetData(deviceContext, constants.object); + + dirtyFlags &= ~EffectDirtyFlags::ConstantBufferObject; + } + + if (dirtyFlags & EffectDirtyFlags::ConstantBufferMisc) + { + mCBMisc.SetData(deviceContext, constants.misc); + + dirtyFlags &= ~EffectDirtyFlags::ConstantBufferMisc; + } + + if (weightsPerVertex > 0) + { + if (dirtyFlags & EffectDirtyFlags::ConstantBufferBones) + { + mCBBone.SetData(deviceContext, constants.bones); + + dirtyFlags &= ~EffectDirtyFlags::ConstantBufferBones; + } + + ID3D11Buffer* buffers[5] = { mCBMaterial.GetBuffer(), mCBLight.GetBuffer(), mCBObject.GetBuffer(), + mCBMisc.GetBuffer(), mCBBone.GetBuffer() }; + + deviceContext->VSSetConstantBuffers(0, 5, buffers); + deviceContext->PSSetConstantBuffers(0, 4, buffers); + } + else + { + ID3D11Buffer* buffers[4] = { mCBMaterial.GetBuffer(), mCBLight.GetBuffer(), mCBObject.GetBuffer(), mCBMisc.GetBuffer() }; + + deviceContext->VSSetConstantBuffers(0, 4, buffers); + deviceContext->PSSetConstantBuffers(0, 4, buffers); + } +#endif + + // Set the textures + if (textureEnabled) + { + ID3D11ShaderResourceView* txt[MaxTextures] = { textures[0].Get(), textures[1].Get(), textures[2].Get(), textures[3].Get(), + textures[4].Get(), textures[5].Get(), textures[6].Get(), textures[7].Get() }; + deviceContext->PSSetShaderResources(0, MaxTextures, txt); + } + else + { + ID3D11ShaderResourceView* txt[MaxTextures] = { mDeviceResources->GetDefaultTexture(), nullptr }; + deviceContext->PSSetShaderResources(0, MaxTextures, txt); + } +} + + +void DGSLEffect::Impl::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) noexcept +{ + int permutation = GetCurrentVSPermutation(); + + assert(permutation < DGSLEffectTraits::VertexShaderCount); + _Analysis_assume_(permutation < DGSLEffectTraits::VertexShaderCount); + + auto shader = DGSLEffectTraits::VertexShaderBytecode[permutation]; + *pShaderByteCode = shader.code; + *pByteCodeLength = shader.length; +} + + +int DGSLEffect::Impl::GetCurrentVSPermutation() const noexcept +{ + int permutation = (vertexColorEnabled) ? 1 : 0; + + if (weightsPerVertex > 0) + { + // Evaluate 1, 2, or 4 weights per vertex? + permutation += 2; + + if (weightsPerVertex == 2) + { + permutation += 2; + } + else if (weightsPerVertex == 4) + { + permutation += 4; + } + } + + return permutation; +} + + +int DGSLEffect::Impl::GetCurrentPSPermutation() const noexcept +{ + int permutation = 0; + + if (constants.light.ActiveLights > 0) + { + permutation = (specularEnabled) ? 2 : 1; + } + + if (textureEnabled) + permutation += 3; + + if (alphaDiscardEnabled) + permutation += 6; + + return permutation; +} + + + +//-------------------------------------------------------------------------------------- +// DGSLEffect +//-------------------------------------------------------------------------------------- + +DGSLEffect::DGSLEffect(_In_ ID3D11Device* device, _In_opt_ ID3D11PixelShader* pixelShader, _In_ bool enableSkinning) + : pImpl(std::make_unique(device, pixelShader, enableSkinning)) +{ +} + + +DGSLEffect::DGSLEffect(DGSLEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +DGSLEffect& DGSLEffect::operator= (DGSLEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +DGSLEffect::~DGSLEffect() +{ +} + + +// IEffect methods. +void DGSLEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void DGSLEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV DGSLEffect::SetWorld(FXMMATRIX value) +{ + pImpl->world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose; +} + + +void XM_CALLCONV DGSLEffect::SetView(FXMMATRIX value) +{ + pImpl->view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition; +} + + +void XM_CALLCONV DGSLEffect::SetProjection(FXMMATRIX value) +{ + pImpl->projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV DGSLEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->world = world; + pImpl->view = view; + pImpl->projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition; +} + + +// Material settings. +void XM_CALLCONV DGSLEffect::SetAmbientColor(FXMVECTOR value) +{ + pImpl->constants.material.Ambient = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void XM_CALLCONV DGSLEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->constants.material.Diffuse = XMVectorSelect(pImpl->constants.material.Diffuse, value, g_XMSelect1110); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void XM_CALLCONV DGSLEffect::SetEmissiveColor(FXMVECTOR value) +{ + pImpl->constants.material.Emissive = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void XM_CALLCONV DGSLEffect::SetSpecularColor(FXMVECTOR value) +{ + pImpl->specularEnabled = true; + pImpl->constants.material.Specular = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void DGSLEffect::SetSpecularPower(float value) +{ + pImpl->specularEnabled = true; + pImpl->constants.material.SpecularPower = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void DGSLEffect::DisableSpecular() +{ + pImpl->specularEnabled = false; + pImpl->constants.material.Specular = g_XMZero; + pImpl->constants.material.SpecularPower = 1.f; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void DGSLEffect::SetAlpha(float value) +{ + // Set w to new value, but preserve existing xyz (diffuse color). + pImpl->constants.material.Diffuse = XMVectorSetW(pImpl->constants.material.Diffuse, value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +void XM_CALLCONV DGSLEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->constants.material.Diffuse = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMaterial; +} + + +// Additional settings. +void XM_CALLCONV DGSLEffect::SetUVTransform(FXMMATRIX value) +{ + pImpl->constants.object.UvTransform4x4 = XMMatrixTranspose(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferObject; +} + + +void DGSLEffect::SetViewport(float width, float height) +{ + pImpl->constants.misc.ViewportWidth = width; + pImpl->constants.misc.ViewportHeight = height; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMisc; +} + + +void DGSLEffect::SetTime(float time) +{ + pImpl->constants.misc.Time = time; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferMisc; +} + + +void DGSLEffect::SetAlphaDiscardEnable(bool value) +{ + pImpl->alphaDiscardEnabled = value; +} + + +// Light settings. +void DGSLEffect::SetLightingEnabled(bool value) +{ + if (value) + { + if (!pImpl->constants.light.ActiveLights) + pImpl->constants.light.ActiveLights = 1; + } + else + { + pImpl->constants.light.ActiveLights = 0; + } + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferLight; +} + + +void DGSLEffect::SetPerPixelLighting(bool) +{ + // Unsupported interface method. +} + + +void XM_CALLCONV DGSLEffect::SetAmbientLightColor(FXMVECTOR value) +{ + pImpl->constants.light.Ambient = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferLight; +} + + +void DGSLEffect::SetLightEnabled(int whichLight, bool value) +{ + if (whichLight < 0 || whichLight >= MaxDirectionalLights) + throw std::out_of_range("whichLight parameter out of range"); + + if (pImpl->lightEnabled[whichLight] == value) + return; + + pImpl->lightEnabled[whichLight] = value; + + if (value) + { + if (whichLight >= static_cast(pImpl->constants.light.ActiveLights)) + pImpl->constants.light.ActiveLights = static_cast(whichLight + 1); + + pImpl->constants.light.LightColor[whichLight] = pImpl->lightDiffuseColor[whichLight]; + pImpl->constants.light.LightSpecularIntensity[whichLight] = pImpl->lightSpecularColor[whichLight]; + } + else + { + pImpl->constants.light.LightColor[whichLight] = + pImpl->constants.light.LightSpecularIntensity[whichLight] = g_XMZero; + } + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferLight; +} + + +void XM_CALLCONV DGSLEffect::SetLightDirection(int whichLight, FXMVECTOR value) +{ + if (whichLight < 0 || whichLight >= MaxDirectionalLights) + throw std::out_of_range("whichLight parameter out of range"); + + // DGSL effects lights do not negate the direction like BasicEffect + pImpl->constants.light.LightDirection[whichLight] = XMVectorNegate(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferLight; +} + + +void XM_CALLCONV DGSLEffect::SetLightDiffuseColor(int whichLight, FXMVECTOR value) +{ + if (whichLight < 0 || whichLight >= MaxDirectionalLights) + throw std::out_of_range("whichLight parameter out of range"); + + pImpl->lightDiffuseColor[whichLight] = value; + + if (pImpl->lightEnabled[whichLight]) + { + pImpl->constants.light.LightColor[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferLight; + } +} + + +void XM_CALLCONV DGSLEffect::SetLightSpecularColor(int whichLight, FXMVECTOR value) +{ + if (whichLight < 0 || whichLight >= MaxDirectionalLights) + throw std::out_of_range("whichLight parameter out of range"); + + pImpl->lightSpecularColor[whichLight] = value; + + if (pImpl->lightEnabled[whichLight]) + { + pImpl->constants.light.LightSpecularIntensity[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferLight; + } +} + + +void DGSLEffect::EnableDefaultLighting() +{ + EffectLights::EnableDefaultLighting(this); +} + + +// Vertex color setting. +void DGSLEffect::SetVertexColorEnabled(bool value) +{ + pImpl->vertexColorEnabled = value; +} + + +// Texture settings. +void DGSLEffect::SetTextureEnabled(bool value) +{ + pImpl->textureEnabled = value; +} + + +void DGSLEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->textures[0] = value; +} + +void DGSLEffect::SetTexture(int whichTexture, _In_opt_ ID3D11ShaderResourceView* value) +{ + if (whichTexture < 0 || whichTexture >= MaxTextures) + throw std::out_of_range("whichTexture parameter out of range"); + + pImpl->textures[whichTexture] = value; +} + + +// Animation settings. +void DGSLEffect::SetWeightsPerVertex(int value) +{ + if (!pImpl->weightsPerVertex) + { + // Safe to ignore since it's only an optimization hint + return; + } + + if ((value != 1) && + (value != 2) && + (value != 4)) + { + throw std::out_of_range("WeightsPerVertex must be 1, 2, or 4"); + } + + pImpl->weightsPerVertex = value; +} + + +void DGSLEffect::SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) +{ + if (!pImpl->weightsPerVertex) + throw std::exception("Skinning not enabled for this effect"); + + if (count > MaxBones) + throw std::out_of_range("count parameter out of range"); + + auto boneConstant = pImpl->constants.bones.Bones; + + for (size_t i = 0; i < count; i++) + { + XMMATRIX boneMatrix = XMMatrixTranspose(value[i]); + + boneConstant[i][0] = boneMatrix.r[0]; + boneConstant[i][1] = boneMatrix.r[1]; + boneConstant[i][2] = boneMatrix.r[2]; + } + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferBones; +} + + +void DGSLEffect::ResetBoneTransforms() +{ + if (!pImpl->weightsPerVertex) + { + // Safe to ignore since it just returns things back to default settings + return; + } + + auto boneConstant = pImpl->constants.bones.Bones; + + for (size_t i = 0; i < MaxBones; ++i) + { + boneConstant[i][0] = g_XMIdentityR0; + boneConstant[i][1] = g_XMIdentityR1; + boneConstant[i][2] = g_XMIdentityR2; + } + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBufferBones; +} diff --git a/Sdk/External/DirectXTK/Src/DGSLEffectFactory.cpp b/Sdk/External/DirectXTK/Src/DGSLEffectFactory.cpp new file mode 100644 index 0000000..652d654 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DGSLEffectFactory.cpp @@ -0,0 +1,609 @@ +//-------------------------------------------------------------------------------------- +// File: DGSLEffectFactory.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Effects.h" +#include "DemandCreate.h" +#include "SharedResourcePool.h" + +#include "DDSTextureLoader.h" +#include "WICTextureLoader.h" + +#include + +#include "BinaryReader.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +static_assert(DGSLEffect::MaxTextures == DGSLEffectFactory::DGSLEffectInfo::BaseTextureOffset + _countof(DGSLEffectFactory::DGSLEffectInfo::textures), "DGSL supports 8 textures"); + +// Internal DGSLEffectFactory implementation class. Only one of these helpers is allocated +// per D3D device, even if there are multiple public facing DGSLEffectFactory instances. +class DGSLEffectFactory::Impl +{ +public: + Impl(_In_ ID3D11Device* device) + : mPath{}, + mDevice(device), + mSharing(true), + mForceSRGB(false) + {} + + std::shared_ptr CreateEffect(_In_ DGSLEffectFactory* factory, _In_ const IEffectFactory::EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext); + std::shared_ptr CreateDGSLEffect(_In_ DGSLEffectFactory* factory, _In_ const DGSLEffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext); + void CreateTexture(_In_z_ const wchar_t* texture, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView); + void CreatePixelShader(_In_z_ const wchar_t* shader, _Outptr_ ID3D11PixelShader** pixelShader); + + void ReleaseCache(); + void SetSharing(bool enabled) noexcept { mSharing = enabled; } + void EnableForceSRGB(bool forceSRGB) noexcept { mForceSRGB = forceSRGB; } + + static SharedResourcePool instancePool; + + wchar_t mPath[MAX_PATH]; + + ComPtr mDevice; + +private: + using EffectCache = std::map< std::wstring, std::shared_ptr >; + using TextureCache = std::map< std::wstring, ComPtr >; + using ShaderCache = std::map< std::wstring, ComPtr >; + + EffectCache mEffectCache; + EffectCache mEffectCacheSkinning; + TextureCache mTextureCache; + ShaderCache mShaderCache; + + bool mSharing; + bool mForceSRGB; + + std::mutex mutex; +}; + + +// Global instance pool. +SharedResourcePool DGSLEffectFactory::Impl::instancePool; + + +_Use_decl_annotations_ +std::shared_ptr DGSLEffectFactory::Impl::CreateEffect(DGSLEffectFactory* factory, const DGSLEffectFactory::EffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + if (info.enableDualTexture) + { + throw std::exception("DGSLEffect does not support multiple texcoords"); + } + + if (mSharing && info.name && *info.name) + { + if (info.enableSkinning) + { + auto it = mEffectCacheSkinning.find(info.name); + if (it != mEffectCacheSkinning.end()) + { + return it->second; + } + } + else + { + auto it = mEffectCache.find(info.name); + if (it != mEffectCache.end()) + { + return it->second; + } + } + } + + auto effect = std::make_shared(mDevice.Get(), nullptr, info.enableSkinning); + + effect->EnableDefaultLighting(); + effect->SetLightingEnabled(true); + + XMVECTOR color = XMLoadFloat3(&info.ambientColor); + effect->SetAmbientColor(color); + + color = XMLoadFloat3(&info.diffuseColor); + effect->SetDiffuseColor(color); + + effect->SetAlpha(info.alpha); + + if (info.perVertexColor) + { + effect->SetVertexColorEnabled(true); + } + + if (info.specularColor.x != 0 || info.specularColor.y != 0 || info.specularColor.z != 0) + { + color = XMLoadFloat3(&info.specularColor); + effect->SetSpecularColor(color); + effect->SetSpecularPower(info.specularPower); + } + + if (info.emissiveColor.x != 0 || info.emissiveColor.y != 0 || info.emissiveColor.z != 0) + { + color = XMLoadFloat3(&info.emissiveColor); + effect->SetEmissiveColor(color); + } + + if (info.diffuseTexture && *info.diffuseTexture) + { + ComPtr srv; + + factory->CreateTexture(info.diffuseTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(srv.Get()); + effect->SetTextureEnabled(true); + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + if (info.enableSkinning) + { + mEffectCacheSkinning.insert(v); + } + else + { + mEffectCache.insert(v); + } + } + + return std::move(effect); +} + + +_Use_decl_annotations_ +std::shared_ptr DGSLEffectFactory::Impl::CreateDGSLEffect(DGSLEffectFactory* factory, const DGSLEffectFactory::DGSLEffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + if (mSharing && info.name && *info.name) + { + if (info.enableSkinning) + { + auto it = mEffectCacheSkinning.find(info.name); + if (it != mEffectCacheSkinning.end()) + { + return it->second; + } + } + else + { + auto it = mEffectCache.find(info.name); + if (it != mEffectCache.end()) + { + return it->second; + } + } + } + + std::shared_ptr effect; + + bool lighting = true; + bool allowSpecular = true; + + if (!info.pixelShader || !*info.pixelShader) + { + effect = std::make_shared(mDevice.Get(), nullptr, info.enableSkinning); + } + else + { + wchar_t root[MAX_PATH] = {}; + auto last = wcsrchr(info.pixelShader, '_'); + if (last) + { + wcscpy_s(root, last + 1); + } + else + { + wcscpy_s(root, info.pixelShader); + } + + auto first = wcschr(root, '.'); + if (first) + *first = 0; + + if (!_wcsicmp(root, L"lambert")) + { + allowSpecular = false; + effect = std::make_shared(mDevice.Get(), nullptr, info.enableSkinning); + } + else if (!_wcsicmp(root, L"phong")) + { + effect = std::make_shared(mDevice.Get(), nullptr, info.enableSkinning); + } + else if (!_wcsicmp(root, L"unlit")) + { + lighting = false; + effect = std::make_shared(mDevice.Get(), nullptr, info.enableSkinning); + } + else if (mDevice->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + // DGSL shaders are not compatible with Feature Level 9.x, use fallback shader + wcscat_s(root, L".cso"); + + ComPtr ps; + factory->CreatePixelShader(root, ps.GetAddressOf()); + + effect = std::make_shared(mDevice.Get(), ps.Get(), info.enableSkinning); + } + else + { + // Create DGSL shader and use it for the effect + ComPtr ps; + factory->CreatePixelShader(info.pixelShader, ps.GetAddressOf()); + + effect = std::make_shared(mDevice.Get(), ps.Get(), info.enableSkinning); + } + } + + if (lighting) + { + effect->EnableDefaultLighting(); + effect->SetLightingEnabled(true); + } + + XMVECTOR color = XMLoadFloat3(&info.ambientColor); + effect->SetAmbientColor(color); + + color = XMLoadFloat3(&info.diffuseColor); + effect->SetDiffuseColor(color); + effect->SetAlpha(info.alpha); + + if (info.perVertexColor) + { + effect->SetVertexColorEnabled(true); + } + + effect->SetAlphaDiscardEnable(true); + + if (allowSpecular + && (info.specularColor.x != 0 || info.specularColor.y != 0 || info.specularColor.z != 0)) + { + color = XMLoadFloat3(&info.specularColor); + effect->SetSpecularColor(color); + effect->SetSpecularPower(info.specularPower); + } + else + { + effect->DisableSpecular(); + } + + if (info.emissiveColor.x != 0 || info.emissiveColor.y != 0 || info.emissiveColor.z != 0) + { + color = XMLoadFloat3(&info.emissiveColor); + effect->SetEmissiveColor(color); + } + + if (info.diffuseTexture && *info.diffuseTexture) + { + ComPtr srv; + + factory->CreateTexture(info.diffuseTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(srv.Get()); + effect->SetTextureEnabled(true); + } + + if (info.specularTexture && *info.specularTexture) + { + ComPtr srv; + + factory->CreateTexture(info.specularTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(1, srv.Get()); + effect->SetTextureEnabled(true); + } + + if (info.normalTexture && *info.normalTexture) + { + ComPtr srv; + + factory->CreateTexture(info.normalTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(2, srv.Get()); + effect->SetTextureEnabled(true); + } + + if (info.emissiveTexture && *info.emissiveTexture) + { + ComPtr srv; + + factory->CreateTexture(info.emissiveTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(3, srv.Get()); + effect->SetTextureEnabled(true); + } + + for (size_t j = 0; j < _countof(info.textures); ++j) + { + if (info.textures[j] && *info.textures[j]) + { + ComPtr srv; + + factory->CreateTexture(info.textures[j], deviceContext, srv.GetAddressOf()); + + effect->SetTexture(static_cast(j) + DGSLEffectInfo::BaseTextureOffset, srv.Get()); + effect->SetTextureEnabled(true); + } + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + if (info.enableSkinning) + { + mEffectCacheSkinning.insert(v); + } + else + { + mEffectCache.insert(v); + } + } + + return std::move(effect); +} + + +_Use_decl_annotations_ +void DGSLEffectFactory::Impl::CreateTexture(const wchar_t* name, ID3D11DeviceContext* deviceContext, ID3D11ShaderResourceView** textureView) +{ + if (!name || !textureView) + throw std::exception("invalid arguments"); + +#if defined(_XBOX_ONE) && defined(_TITLE) + UNREFERENCED_PARAMETER(deviceContext); +#endif + + auto it = mTextureCache.find(name); + + if (mSharing && it != mTextureCache.end()) + { + ID3D11ShaderResourceView* srv = it->second.Get(); + srv->AddRef(); + *textureView = srv; + } + else + { + wchar_t fullName[MAX_PATH] = {}; + wcscpy_s(fullName, mPath); + wcscat_s(fullName, name); + + WIN32_FILE_ATTRIBUTE_DATA fileAttr = {}; + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + // Try Current Working Directory (CWD) + wcscpy_s(fullName, name); + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + DebugTrace("ERROR: DGSLEffectFactory could not find texture file '%ls'\n", name); + throw std::exception("CreateTexture"); + } + } + + wchar_t ext[_MAX_EXT]; + _wsplitpath_s(name, nullptr, 0, nullptr, 0, nullptr, 0, ext, _MAX_EXT); + + if (_wcsicmp(ext, L".dds") == 0) + { + HRESULT hr = CreateDDSTextureFromFileEx( + mDevice.Get(), fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateDDSTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateDDSTextureFromFile"); + } + } + #if !defined(_XBOX_ONE) || !defined(_TITLE) + else if (deviceContext) + { + std::lock_guard lock(mutex); + HRESULT hr = CreateWICTextureFromFileEx( + mDevice.Get(), deviceContext, fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB ? WIC_LOADER_FORCE_SRGB : WIC_LOADER_DEFAULT, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateWICTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateWICTextureFromFile"); + } + } + #endif + else + { + HRESULT hr = CreateWICTextureFromFileEx( + mDevice.Get(), fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB ? WIC_LOADER_FORCE_SRGB : WIC_LOADER_DEFAULT, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateWICTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateWICTextureFromFile"); + } + } + + if (mSharing && *name && it == mTextureCache.end()) + { + std::lock_guard lock(mutex); + TextureCache::value_type v(name, *textureView); + mTextureCache.insert(v); + } + } +} + + +_Use_decl_annotations_ +void DGSLEffectFactory::Impl::CreatePixelShader(const wchar_t* name, ID3D11PixelShader** pixelShader) +{ + if (!name || !pixelShader) + throw std::exception("invalid arguments"); + + auto it = mShaderCache.find(name); + + if (mSharing && it != mShaderCache.end()) + { + ID3D11PixelShader* ps = it->second.Get(); + ps->AddRef(); + *pixelShader = ps; + } + else + { + wchar_t fullName[MAX_PATH] = {}; + wcscpy_s(fullName, mPath); + wcscat_s(fullName, name); + + WIN32_FILE_ATTRIBUTE_DATA fileAttr = {}; + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + // Try Current Working Directory (CWD) + wcscpy_s(fullName, name); + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + DebugTrace("ERROR: DGSLEffectFactory could not find shader file '%ls'\n", name); + throw std::exception("CreatePixelShader"); + } + } + + size_t dataSize = 0; + std::unique_ptr data; + HRESULT hr = BinaryReader::ReadEntireFile(fullName, data, &dataSize); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreatePixelShader failed (%08X) to load shader file '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreatePixelShader"); + } + + ThrowIfFailed( + mDevice->CreatePixelShader(data.get(), dataSize, nullptr, pixelShader)); + + assert(pixelShader != nullptr && *pixelShader != nullptr); + _Analysis_assume_(pixelShader != nullptr && *pixelShader != nullptr); + + if (mSharing && *name && it == mShaderCache.end()) + { + std::lock_guard lock(mutex); + ShaderCache::value_type v(name, *pixelShader); + mShaderCache.insert(v); + } + } +} + + +void DGSLEffectFactory::Impl::ReleaseCache() +{ + std::lock_guard lock(mutex); + mEffectCache.clear(); + mEffectCacheSkinning.clear(); + mTextureCache.clear(); + mShaderCache.clear(); +} + + + +//-------------------------------------------------------------------------------------- +// DGSLEffectFactory +//-------------------------------------------------------------------------------------- + +DGSLEffectFactory::DGSLEffectFactory(_In_ ID3D11Device* device) + : pImpl(Impl::instancePool.DemandCreate(device)) +{ +} + +DGSLEffectFactory::~DGSLEffectFactory() +{ +} + + +DGSLEffectFactory::DGSLEffectFactory(DGSLEffectFactory&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + +DGSLEffectFactory& DGSLEffectFactory::operator= (DGSLEffectFactory&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// IEffectFactory methods +_Use_decl_annotations_ +std::shared_ptr DGSLEffectFactory::CreateEffect(const EffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + return pImpl->CreateEffect(this, info, deviceContext); +} + +_Use_decl_annotations_ +void DGSLEffectFactory::CreateTexture(const wchar_t* name, ID3D11DeviceContext* deviceContext, ID3D11ShaderResourceView** textureView) +{ + return pImpl->CreateTexture(name, deviceContext, textureView); +} + + +// DGSL methods. +_Use_decl_annotations_ +std::shared_ptr DGSLEffectFactory::CreateDGSLEffect(const DGSLEffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + return pImpl->CreateDGSLEffect(this, info, deviceContext); +} + + +_Use_decl_annotations_ +void DGSLEffectFactory::CreatePixelShader(const wchar_t* shader, ID3D11PixelShader** pixelShader) +{ + pImpl->CreatePixelShader(shader, pixelShader); +} + + +// Settings +void DGSLEffectFactory::ReleaseCache() +{ + pImpl->ReleaseCache(); +} + +void DGSLEffectFactory::SetSharing(bool enabled) noexcept +{ + pImpl->SetSharing(enabled); +} + +void DGSLEffectFactory::EnableForceSRGB(bool forceSRGB) noexcept +{ + pImpl->EnableForceSRGB(forceSRGB); +} + +void DGSLEffectFactory::SetDirectory(_In_opt_z_ const wchar_t* path) noexcept +{ + if (path && *path != 0) + { + wcscpy_s(pImpl->mPath, path); + size_t len = wcsnlen(pImpl->mPath, MAX_PATH); + if (len > 0 && len < (MAX_PATH - 1)) + { + // Ensure it has a trailing slash + if (pImpl->mPath[len - 1] != L'\\') + { + pImpl->mPath[len] = L'\\'; + pImpl->mPath[len + 1] = 0; + } + } + } + else + *pImpl->mPath = 0; +} + +ID3D11Device* DGSLEffectFactory::GetDevice() const noexcept +{ + return pImpl->mDevice.Get(); +} diff --git a/Sdk/External/DirectXTK/Src/DebugEffect.cpp b/Sdk/External/DirectXTK/Src/DebugEffect.cpp new file mode 100644 index 0000000..6abad3a --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DebugEffect.cpp @@ -0,0 +1,350 @@ +//-------------------------------------------------------------------------------------- +// File: DebugEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; + +namespace +{ + // Constant buffer layout. Must match the shader! + struct DebugEffectConstants + { + XMVECTOR ambientDownAndAlpha; + XMVECTOR ambientRange; + + XMMATRIX world; + XMVECTOR worldInverseTranspose[3]; + XMMATRIX worldViewProj; + }; + + static_assert((sizeof(DebugEffectConstants) % 16) == 0, "CB size not padded correctly"); + + + // Traits type describes our characteristics to the EffectBase template. + struct DebugEffectTraits + { + using ConstantBufferType = DebugEffectConstants; + + static constexpr int VertexShaderCount = 4; + static constexpr int PixelShaderCount = 4; + static constexpr int ShaderPermutationCount = 16; + }; +} + +// Internal DebugEffect implementation class. +class DebugEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + bool vertexColorEnabled; + bool biasedVertexNormals; + DebugEffect::Mode debugMode; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneDebugEffect_VSDebug.inc" + #include "Shaders/Compiled/XboxOneDebugEffect_VSDebugVc.inc" + + #include "Shaders/Compiled/XboxOneDebugEffect_VSDebugBn.inc" + #include "Shaders/Compiled/XboxOneDebugEffect_VSDebugVcBn.inc" + + #include "Shaders/Compiled/XboxOneDebugEffect_PSHemiAmbient.inc" + #include "Shaders/Compiled/XboxOneDebugEffect_PSRGBNormals.inc" + #include "Shaders/Compiled/XboxOneDebugEffect_PSRGBTangents.inc" + #include "Shaders/Compiled/XboxOneDebugEffect_PSRGBBiTangents.inc" +#else + #include "Shaders/Compiled/DebugEffect_VSDebug.inc" + #include "Shaders/Compiled/DebugEffect_VSDebugVc.inc" + + #include "Shaders/Compiled/DebugEffect_VSDebugBn.inc" + #include "Shaders/Compiled/DebugEffect_VSDebugVcBn.inc" + + #include "Shaders/Compiled/DebugEffect_PSHemiAmbient.inc" + #include "Shaders/Compiled/DebugEffect_PSRGBNormals.inc" + #include "Shaders/Compiled/DebugEffect_PSRGBTangents.inc" + #include "Shaders/Compiled/DebugEffect_PSRGBBiTangents.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { DebugEffect_VSDebug, sizeof(DebugEffect_VSDebug) }, + { DebugEffect_VSDebugVc, sizeof(DebugEffect_VSDebugVc) }, + + { DebugEffect_VSDebugBn, sizeof(DebugEffect_VSDebugBn) }, + { DebugEffect_VSDebugVcBn, sizeof(DebugEffect_VSDebugVcBn) }, +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // default + 0, // normals + 0, // tangents + 0, // bitangents + + 1, // vertex color + default + 1, // vertex color + normals + 1, // vertex color + tangents + 1, // vertex color + bitangents + + 2, // default (biased vertex normal) + 2, // normals (biased vertex normal) + 2, // tangents (biased vertex normal) + 2, // bitangents (biased vertex normal) + + 3, // vertex color (biased vertex normal) + 3, // vertex color (biased vertex normal) + normals + 3, // vertex color (biased vertex normal) + tangents + 3, // vertex color (biased vertex normal) + bitangents +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { DebugEffect_PSHemiAmbient, sizeof(DebugEffect_PSHemiAmbient) }, + { DebugEffect_PSRGBNormals, sizeof(DebugEffect_PSRGBNormals) }, + { DebugEffect_PSRGBTangents, sizeof(DebugEffect_PSRGBTangents) }, + { DebugEffect_PSRGBBiTangents, sizeof(DebugEffect_PSRGBBiTangents) }, +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // default + 1, // normals + 2, // tangents + 3, // bitangents + + 0, // vertex color + default + 1, // vertex color + normals + 2, // vertex color + tangents + 3, // vertex color + bitangents + + 0, // default (biased vertex normal) + 1, // normals (biased vertex normal) + 2, // tangents (biased vertex normal) + 3, // bitangents (biased vertex normal) + + 0, // vertex color (biased vertex normal) + 1, // vertex color (biased vertex normal) + normals + 2, // vertex color (biased vertex normal) + tangents + 3, // vertex color (biased vertex normal) + bitangents +}; + + +// Global pool of per-deviceDebugEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +DebugEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + vertexColorEnabled(false), + biasedVertexNormals(false), + debugMode(DebugEffect::Mode_Default) +{ + if (device->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("DebugEffect requires Feature Level 10.0 or later"); + } + + static_assert(_countof(EffectBase::VertexShaderIndices) == DebugEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == DebugEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == DebugEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == DebugEffectTraits::ShaderPermutationCount, "array/max mismatch"); + + static const XMVECTORF32 s_lower = { { { 0.f, 0.f, 0.f, 1.f } } }; + + constants.ambientDownAndAlpha = s_lower; + constants.ambientRange = g_XMOne; +} + + +int DebugEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = static_cast(debugMode); + + // Support vertex coloring? + if (vertexColorEnabled) + { + permutation += 4; + } + + if (biasedVertexNormals) + { + // Compressed normals need to be scaled and biased in the vertex shader. + permutation += 8; + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void DebugEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + // World inverse transpose matrix. + if (dirtyFlags & EffectDirtyFlags::WorldInverseTranspose) + { + constants.world = XMMatrixTranspose(matrices.world); + + XMMATRIX worldInverse = XMMatrixInverse(nullptr, matrices.world); + + constants.worldInverseTranspose[0] = worldInverse.r[0]; + constants.worldInverseTranspose[1] = worldInverse.r[1]; + constants.worldInverseTranspose[2] = worldInverse.r[2]; + + dirtyFlags &= ~EffectDirtyFlags::WorldInverseTranspose; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +DebugEffect::DebugEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +DebugEffect::DebugEffect(DebugEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +DebugEffect& DebugEffect::operator= (DebugEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +DebugEffect::~DebugEffect() +{ +} + + +// IEffect methods. +void DebugEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void DebugEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV DebugEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose; +} + + +void XM_CALLCONV DebugEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV DebugEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV DebugEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose; +} + + +// Material settings. +void DebugEffect::SetMode(Mode debugMode) +{ + if (static_cast(debugMode) < 0 || static_cast(debugMode) >= DebugEffectTraits::PixelShaderCount) + { + throw std::invalid_argument("Unsupported mode"); + } + + pImpl->debugMode = debugMode; +} + +void XM_CALLCONV DebugEffect::SetHemisphericalAmbientColor(FXMVECTOR upper, FXMVECTOR lower) +{ + // Set xyz to new value, but preserve existing w (alpha). + pImpl->constants.ambientDownAndAlpha = XMVectorSelect(pImpl->constants.ambientDownAndAlpha, lower, g_XMSelect1110); + + pImpl->constants.ambientRange = XMVectorSubtract(upper, lower); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + +void DebugEffect::SetAlpha(float value) +{ + // Set w to new value, but preserve existing xyz (ambient down). + pImpl->constants.ambientDownAndAlpha = XMVectorSetW(pImpl->constants.ambientDownAndAlpha, value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Vertex color setting. +void DebugEffect::SetVertexColorEnabled(bool value) +{ + pImpl->vertexColorEnabled = value; +} + + +// Normal compression settings. +void DebugEffect::SetBiasedVertexNormals(bool value) +{ + pImpl->biasedVertexNormals = value; +} diff --git a/Sdk/External/DirectXTK/Src/DemandCreate.h b/Sdk/External/DirectXTK/Src/DemandCreate.h new file mode 100644 index 0000000..9403f5a --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DemandCreate.h @@ -0,0 +1,48 @@ +//-------------------------------------------------------------------------------------- +// File: DemandCreate.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include "PlatformHelpers.h" + + +namespace DirectX +{ + // Helper for lazily creating a D3D resource. + template + inline T* DemandCreate(Microsoft::WRL::ComPtr& comPtr, std::mutex& mutex, TCreateFunc createFunc) + { + T* result = comPtr.Get(); + + // Double-checked lock pattern. + MemoryBarrier(); + + if (!result) + { + std::lock_guard lock(mutex); + + result = comPtr.Get(); + + if (!result) + { + // Create the new object. + ThrowIfFailed( + createFunc(&result) + ); + + MemoryBarrier(); + + comPtr.Attach(result); + } + } + + return result; + } +} diff --git a/Sdk/External/DirectXTK/Src/DirectXHelpers.cpp b/Sdk/External/DirectXTK/Src/DirectXHelpers.cpp new file mode 100644 index 0000000..d1d3d0a --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DirectXHelpers.cpp @@ -0,0 +1,54 @@ +//-------------------------------------------------------------------------------------- +// File: DirectXHelpers.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "DirectXHelpers.h" +#include "Effects.h" +#include "PlatformHelpers.h" + + +using namespace DirectX; + +_Use_decl_annotations_ +HRESULT DirectX::CreateInputLayoutFromEffect( + ID3D11Device* device, + IEffect* effect, + const D3D11_INPUT_ELEMENT_DESC* desc, + size_t count, + ID3D11InputLayout** pInputLayout) noexcept +{ + if (!pInputLayout) + return E_INVALIDARG; + + *pInputLayout = nullptr; + + if (!device || !effect || !desc || !count) + return E_INVALIDARG; + + void const* shaderByteCode; + size_t byteCodeLength; + + try + { + effect->GetVertexShaderBytecode(&shaderByteCode, &byteCodeLength); + } + catch (com_exception e) + { + return e.get_result(); + } + catch (...) + { + return E_FAIL; + } + + return device->CreateInputLayout( + desc, static_cast(count), + shaderByteCode, byteCodeLength, + pInputLayout); +} diff --git a/Sdk/External/DirectXTK/Src/DualPostProcess.cpp b/Sdk/External/DirectXTK/Src/DualPostProcess.cpp new file mode 100644 index 0000000..05c4587 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DualPostProcess.cpp @@ -0,0 +1,346 @@ +//-------------------------------------------------------------------------------------- +// File: DualPostProcess.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "PostProcess.h" + +#include "AlignedNew.h" +#include "CommonStates.h" +#include "BufferHelpers.h" +#include "DemandCreate.h" +#include "DirectXHelpers.h" +#include "SharedResourcePool.h" + +using namespace DirectX; + +using Microsoft::WRL::ComPtr; + +namespace +{ + constexpr int c_MaxSamples = 16; + + constexpr int Dirty_ConstantBuffer = 0x01; + constexpr int Dirty_Parameters = 0x02; + + // Constant buffer layout. Must match the shader! + __declspec(align(16)) struct PostProcessConstants + { + XMVECTOR sampleOffsets[c_MaxSamples]; + XMVECTOR sampleWeights[c_MaxSamples]; + }; + + static_assert((sizeof(PostProcessConstants) % 16) == 0, "CB size not padded correctly"); +} + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOnePostProcess_VSQuad.inc" + + #include "Shaders/Compiled/XboxOnePostProcess_PSMerge.inc" + #include "Shaders/Compiled/XboxOnePostProcess_PSBloomCombine.inc" +#else + #include "Shaders/Compiled/PostProcess_VSQuad.inc" + + #include "Shaders/Compiled/PostProcess_PSMerge.inc" + #include "Shaders/Compiled/PostProcess_PSBloomCombine.inc" +#endif +} + +namespace +{ + struct ShaderBytecode + { + void const* code; + size_t length; + }; + + const ShaderBytecode pixelShaders[] = + { + { PostProcess_PSMerge, sizeof(PostProcess_PSMerge) }, + { PostProcess_PSBloomCombine, sizeof(PostProcess_PSBloomCombine) }, + }; + + static_assert(_countof(pixelShaders) == DualPostProcess::Effect_Max, "array/max mismatch"); + + // Factory for lazily instantiating shaders. + class DeviceResources + { + public: + DeviceResources(_In_ ID3D11Device* device) + : stateObjects(device), + mDevice(device), + mVertexShader{}, + mPixelShaders{}, + mMutex{} + { } + + // Gets or lazily creates the vertex shader. + ID3D11VertexShader* GetVertexShader() + { + return DemandCreate(mVertexShader, mMutex, [&](ID3D11VertexShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreateVertexShader(PostProcess_VSQuad, sizeof(PostProcess_VSQuad), nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DualPostProcess"); + + return hr; + }); + } + + // Gets or lazily creates the specified pixel shader. + ID3D11PixelShader* GetPixelShader(unsigned int shaderIndex) + { + assert(shaderIndex < DualPostProcess::Effect_Max); + _Analysis_assume_(shaderIndex < DualPostProcess::Effect_Max); + + return DemandCreate(mPixelShaders[shaderIndex], mMutex, [&](ID3D11PixelShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreatePixelShader(pixelShaders[shaderIndex].code, pixelShaders[shaderIndex].length, nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DualPostProcess"); + + return hr; + }); + } + + CommonStates stateObjects; + + protected: + ComPtr mDevice; + ComPtr mVertexShader; + ComPtr mPixelShaders[DualPostProcess::Effect_Max]; + std::mutex mMutex; + }; +} + +class DualPostProcess::Impl : public AlignedNew +{ +public: + Impl(_In_ ID3D11Device* device); + + void Process(_In_ ID3D11DeviceContext* deviceContext, std::function& setCustomState); + + void SetDirtyFlag() noexcept { mDirtyFlags = INT_MAX; } + + // Fields. + PostProcessConstants constants; + DualPostProcess::Effect fx; + ComPtr texture; + ComPtr texture2; + float mergeWeight1; + float mergeWeight2; + float bloomIntensity; + float bloomBaseIntensity; + float bloomSaturation; + float bloomBaseSaturation; + +private: + int mDirtyFlags; + + ConstantBuffer mConstantBuffer; + + // Per-device resources. + std::shared_ptr mDeviceResources; + + static SharedResourcePool deviceResourcesPool; +}; + + +// Global pool of per-device DualPostProcess resources. +SharedResourcePool DualPostProcess::Impl::deviceResourcesPool; + + +// Constructor. +DualPostProcess::Impl::Impl(_In_ ID3D11Device* device) + : constants{}, + fx(DualPostProcess::Merge), + mergeWeight1(0.5f), + mergeWeight2(0.5f), + bloomIntensity(1.25f), + bloomBaseIntensity(1.f), + bloomSaturation(1.f), + bloomBaseSaturation(1.f), + mDirtyFlags(INT_MAX), + mConstantBuffer(device), + mDeviceResources(deviceResourcesPool.DemandCreate(device)) +{ + if (device->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("DualPostProcess requires Feature Level 10.0 or later"); + } + + SetDebugObjectName(mConstantBuffer.GetBuffer(), "DualPostProcess"); +} + + +// Sets our state onto the D3D device. +void DualPostProcess::Impl::Process( + _In_ ID3D11DeviceContext* deviceContext, + std::function& setCustomState) +{ + // Set the texture. + ID3D11ShaderResourceView* textures[2] = { texture.Get(), texture2.Get() }; + deviceContext->PSSetShaderResources(0, 2, textures); + + auto sampler = mDeviceResources->stateObjects.LinearClamp(); + deviceContext->PSSetSamplers(0, 1, &sampler); + + // Set state objects. + deviceContext->OMSetBlendState(mDeviceResources->stateObjects.Opaque(), nullptr, 0xffffffff); + deviceContext->OMSetDepthStencilState(mDeviceResources->stateObjects.DepthNone(), 0); + deviceContext->RSSetState(mDeviceResources->stateObjects.CullNone()); + + // Set shaders. + auto vertexShader = mDeviceResources->GetVertexShader(); + auto pixelShader = mDeviceResources->GetPixelShader(fx); + + deviceContext->VSSetShader(vertexShader, nullptr, 0); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + // Set constants. + if (mDirtyFlags & Dirty_Parameters) + { + mDirtyFlags &= ~Dirty_Parameters; + mDirtyFlags |= Dirty_ConstantBuffer; + + switch (fx) + { + case Merge: + constants.sampleWeights[0] = XMVectorReplicate(mergeWeight1); + constants.sampleWeights[1] = XMVectorReplicate(mergeWeight2); + break; + + case BloomCombine: + constants.sampleWeights[0] = XMVectorSet(bloomBaseSaturation, bloomSaturation, 0.f, 0.f); + constants.sampleWeights[1] = XMVectorReplicate(bloomBaseIntensity); + constants.sampleWeights[2] = XMVectorReplicate(bloomIntensity); + break; + + default: + break; + } + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + void *grfxMemory; + mConstantBuffer.SetData(deviceContext, constants, &grfxMemory); + + Microsoft::WRL::ComPtr deviceContextX; + ThrowIfFailed(deviceContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceContextX.GetAddressOf()))); + + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContextX->PSSetPlacementConstantBuffer(0, buffer, grfxMemory); +#else + if (mDirtyFlags & Dirty_ConstantBuffer) + { + mDirtyFlags &= ~Dirty_ConstantBuffer; + mConstantBuffer.SetData(deviceContext, constants); + } + + // Set the constant buffer. + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContext->PSSetConstantBuffers(0, 1, &buffer); +#endif + + if (setCustomState) + { + setCustomState(); + } + + // Draw quad. + deviceContext->IASetInputLayout(nullptr); + deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + deviceContext->Draw(3, 0); +} + + +// Public constructor. +DualPostProcess::DualPostProcess(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +DualPostProcess::DualPostProcess(DualPostProcess&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +DualPostProcess& DualPostProcess::operator= (DualPostProcess&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +DualPostProcess::~DualPostProcess() +{ +} + + +// IPostProcess methods. +void DualPostProcess::Process( + _In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState) +{ + pImpl->Process(deviceContext, setCustomState); +} + + +// Shader control. +void DualPostProcess::SetEffect(Effect fx) +{ + if (fx >= Effect_Max) + throw std::out_of_range("Effect not defined"); + + pImpl->fx = fx; + pImpl->SetDirtyFlag(); +} + + +// Properties +void DualPostProcess::SetSourceTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +void DualPostProcess::SetSourceTexture2(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture2 = value; +} + + +void DualPostProcess::SetMergeParameters(float weight1, float weight2) +{ + pImpl->mergeWeight1 = weight1; + pImpl->mergeWeight2 = weight2; + pImpl->SetDirtyFlag(); +} + + +void DualPostProcess::SetBloomCombineParameters(float bloom, float base, float bloomSaturation, float baseSaturation) +{ + pImpl->bloomIntensity = bloom; + pImpl->bloomBaseIntensity = base; + pImpl->bloomSaturation = bloomSaturation; + pImpl->bloomBaseSaturation = baseSaturation; + pImpl->SetDirtyFlag(); +} diff --git a/Sdk/External/DirectXTK/Src/DualTextureEffect.cpp b/Sdk/External/DirectXTK/Src/DualTextureEffect.cpp new file mode 100644 index 0000000..1b0d9a8 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/DualTextureEffect.cpp @@ -0,0 +1,334 @@ +//-------------------------------------------------------------------------------------- +// File: DualTextureEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +// Constant buffer layout. Must match the shader! +struct DualTextureEffectConstants +{ + XMVECTOR diffuseColor; + XMVECTOR fogColor; + XMVECTOR fogVector; + XMMATRIX worldViewProj; +}; + +static_assert((sizeof(DualTextureEffectConstants) % 16) == 0, "CB size not padded correctly"); + + +// Traits type describes our characteristics to the EffectBase template. +struct DualTextureEffectTraits +{ + using ConstantBufferType = DualTextureEffectConstants; + + static constexpr int VertexShaderCount = 4; + static constexpr int PixelShaderCount = 2; + static constexpr int ShaderPermutationCount = 4; +}; + + +// Internal DualTextureEffect implementation class. +class DualTextureEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + bool vertexColorEnabled; + + EffectColor color; + + ComPtr texture2; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneDualTextureEffect_VSDualTexture.inc" + #include "Shaders/Compiled/XboxOneDualTextureEffect_VSDualTextureNoFog.inc" + #include "Shaders/Compiled/XboxOneDualTextureEffect_VSDualTextureVc.inc" + #include "Shaders/Compiled/XboxOneDualTextureEffect_VSDualTextureVcNoFog.inc" + + #include "Shaders/Compiled/XboxOneDualTextureEffect_PSDualTexture.inc" + #include "Shaders/Compiled/XboxOneDualTextureEffect_PSDualTextureNoFog.inc" +#else + #include "Shaders/Compiled/DualTextureEffect_VSDualTexture.inc" + #include "Shaders/Compiled/DualTextureEffect_VSDualTextureNoFog.inc" + #include "Shaders/Compiled/DualTextureEffect_VSDualTextureVc.inc" + #include "Shaders/Compiled/DualTextureEffect_VSDualTextureVcNoFog.inc" + + #include "Shaders/Compiled/DualTextureEffect_PSDualTexture.inc" + #include "Shaders/Compiled/DualTextureEffect_PSDualTextureNoFog.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { DualTextureEffect_VSDualTexture, sizeof(DualTextureEffect_VSDualTexture) }, + { DualTextureEffect_VSDualTextureNoFog, sizeof(DualTextureEffect_VSDualTextureNoFog) }, + { DualTextureEffect_VSDualTextureVc, sizeof(DualTextureEffect_VSDualTextureVc) }, + { DualTextureEffect_VSDualTextureVcNoFog, sizeof(DualTextureEffect_VSDualTextureVcNoFog) }, + +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // basic + 1, // no fog + 2, // vertex color + 3, // vertex color, no fog +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { DualTextureEffect_PSDualTexture, sizeof(DualTextureEffect_PSDualTexture) }, + { DualTextureEffect_PSDualTextureNoFog, sizeof(DualTextureEffect_PSDualTextureNoFog) }, + +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // basic + 1, // no fog + 0, // vertex color + 1, // vertex color, no fog +}; + + +// Global pool of per-device DualTextureEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +DualTextureEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + vertexColorEnabled(false) +{ + static_assert(_countof(EffectBase::VertexShaderIndices) == DualTextureEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == DualTextureEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == DualTextureEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == DualTextureEffectTraits::ShaderPermutationCount, "array/max mismatch"); +} + + +int DualTextureEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Use optimized shaders if fog is disabled. + if (!fog.enabled) + { + permutation += 1; + } + + // Support vertex coloring? + if (vertexColorEnabled) + { + permutation += 2; + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void DualTextureEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + fog.SetConstants(dirtyFlags, matrices.worldView, constants.fogVector); + + color.SetConstants(dirtyFlags, constants.diffuseColor); + + // Set the textures. + ID3D11ShaderResourceView* textures[2] = + { + texture.Get(), + texture2.Get(), + }; + + deviceContext->PSSetShaderResources(0, 2, textures); + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +DualTextureEffect::DualTextureEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +DualTextureEffect::DualTextureEffect(DualTextureEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +DualTextureEffect& DualTextureEffect::operator= (DualTextureEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +DualTextureEffect::~DualTextureEffect() +{ +} + + +// IEffect methods. +void DualTextureEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void DualTextureEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV DualTextureEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV DualTextureEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV DualTextureEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV DualTextureEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +// Material settings. +void XM_CALLCONV DualTextureEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->color.diffuseColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void DualTextureEffect::SetAlpha(float value) +{ + pImpl->color.alpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV DualTextureEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->color.diffuseColor = value; + pImpl->color.alpha = XMVectorGetW(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +// Fog settings. +void DualTextureEffect::SetFogEnabled(bool value) +{ + pImpl->fog.enabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogEnable; +} + + +void DualTextureEffect::SetFogStart(float value) +{ + pImpl->fog.start = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void DualTextureEffect::SetFogEnd(float value) +{ + pImpl->fog.end = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV DualTextureEffect::SetFogColor(FXMVECTOR value) +{ + pImpl->constants.fogColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Vertex color setting. +void DualTextureEffect::SetVertexColorEnabled(bool value) +{ + pImpl->vertexColorEnabled = value; +} + + +// Texture settings. +void DualTextureEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +void DualTextureEffect::SetTexture2(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture2 = value; +} diff --git a/Sdk/External/DirectXTK/Src/EffectCommon.cpp b/Sdk/External/DirectXTK/Src/EffectCommon.cpp new file mode 100644 index 0000000..6dd2395 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/EffectCommon.cpp @@ -0,0 +1,458 @@ +//-------------------------------------------------------------------------------------- +// File: EffectCommon.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" +#include "DemandCreate.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +// IEffectMatrices default method +void XM_CALLCONV IEffectMatrices::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + SetWorld(world); + SetView(view); + SetProjection(projection); +} + + +// Constructor initializes default matrix values. +EffectMatrices::EffectMatrices() noexcept +{ + XMMATRIX id = XMMatrixIdentity(); + world = id; + view = id; + projection = id; + worldView = id; +} + + +// Lazily recomputes the combined world+view+projection matrix. +_Use_decl_annotations_ +void EffectMatrices::SetConstants(int& dirtyFlags, XMMATRIX& worldViewProjConstant) +{ + if (dirtyFlags & EffectDirtyFlags::WorldViewProj) + { + worldView = XMMatrixMultiply(world, view); + + worldViewProjConstant = XMMatrixTranspose(XMMatrixMultiply(worldView, projection)); + + dirtyFlags &= ~EffectDirtyFlags::WorldViewProj; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } +} + + +// Constructor initializes default fog settings. +EffectFog::EffectFog() noexcept : + enabled(false), + start(0), + end(1.f) +{ +} + + +// Lazily recomputes the derived vector used by shader fog calculations. +_Use_decl_annotations_ +void XM_CALLCONV EffectFog::SetConstants(int& dirtyFlags, FXMMATRIX worldView, XMVECTOR& fogVectorConstant) +{ + if (enabled) + { + if (dirtyFlags & (EffectDirtyFlags::FogVector | EffectDirtyFlags::FogEnable)) + { + if (start == end) + { + // Degenerate case: force everything to 100% fogged if start and end are the same. + static const XMVECTORF32 fullyFogged = { { { 0, 0, 0, 1 } } }; + + fogVectorConstant = fullyFogged; + } + else + { + // We want to transform vertex positions into view space, take the resulting + // Z value, then scale and offset according to the fog start/end distances. + // Because we only care about the Z component, the shader can do all this + // with a single dot product, using only the Z row of the world+view matrix. + + // _13, _23, _33, _43 + XMVECTOR worldViewZ = XMVectorMergeXY(XMVectorMergeZW(worldView.r[0], worldView.r[2]), + XMVectorMergeZW(worldView.r[1], worldView.r[3])); + + // 0, 0, 0, fogStart + XMVECTOR wOffset = XMVectorSwizzle<1, 2, 3, 0>(XMLoadFloat(&start)); + + // (worldViewZ + wOffset) / (start - end); + fogVectorConstant = XMVectorDivide(XMVectorAdd(worldViewZ, wOffset), XMVectorReplicate(start - end)); + } + + dirtyFlags &= ~(EffectDirtyFlags::FogVector | EffectDirtyFlags::FogEnable); + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + } + else + { + // When fog is disabled, make sure the fog vector is reset to zero. + if (dirtyFlags & EffectDirtyFlags::FogEnable) + { + fogVectorConstant = g_XMZero; + + dirtyFlags &= ~EffectDirtyFlags::FogEnable; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + } +} + + +// Constructor initializes default material color settings. +EffectColor::EffectColor() noexcept : + diffuseColor(g_XMOne), + alpha(1.f) +{ +} + + +// Lazily recomputes the material color parameter for shaders that do not support realtime lighting. +void EffectColor::SetConstants(_Inout_ int& dirtyFlags, _Inout_ XMVECTOR& diffuseColorConstant) +{ + if (dirtyFlags & EffectDirtyFlags::MaterialColor) + { + XMVECTOR alphaVector = XMVectorReplicate(alpha); + + // xyz = diffuse * alpha, w = alpha. + diffuseColorConstant = XMVectorSelect(alphaVector, XMVectorMultiply(diffuseColor, alphaVector), g_XMSelect1110); + + dirtyFlags &= ~EffectDirtyFlags::MaterialColor; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } +} + + +// Constructor initializes default light settings. +EffectLights::EffectLights() noexcept : + emissiveColor{}, + ambientLightColor{}, + lightEnabled{}, + lightDiffuseColor{}, + lightSpecularColor{} +{ + for (int i = 0; i < MaxDirectionalLights; i++) + { + lightEnabled[i] = (i == 0); + lightDiffuseColor[i] = g_XMOne; + } +} + + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable:22103, "PREFAST doesn't understand buffer is bounded by a static const value even with SAL" ) +#endif + +// Initializes constant buffer fields to match the current lighting state. +_Use_decl_annotations_ void EffectLights::InitializeConstants(XMVECTOR& specularColorAndPowerConstant, XMVECTOR* lightDirectionConstant, XMVECTOR* lightDiffuseConstant, XMVECTOR* lightSpecularConstant) const +{ + static const XMVECTORF32 defaultSpecular = { { { 1, 1, 1, 16 } } }; + static const XMVECTORF32 defaultLightDirection = { { { 0, -1, 0, 0 } } }; + + specularColorAndPowerConstant = defaultSpecular; + + for (int i = 0; i < MaxDirectionalLights; i++) + { + lightDirectionConstant[i] = defaultLightDirection; + + lightDiffuseConstant[i] = lightEnabled[i] ? lightDiffuseColor[i] : g_XMZero; + lightSpecularConstant[i] = lightEnabled[i] ? lightSpecularColor[i] : g_XMZero; + } +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + + +// Lazily recomputes derived parameter values used by shader lighting calculations. +_Use_decl_annotations_ void EffectLights::SetConstants(int& dirtyFlags, EffectMatrices const& matrices, XMMATRIX& worldConstant, XMVECTOR worldInverseTransposeConstant[3], XMVECTOR& eyePositionConstant, XMVECTOR& diffuseColorConstant, XMVECTOR& emissiveColorConstant, bool lightingEnabled) +{ + if (lightingEnabled) + { + // World inverse transpose matrix. + if (dirtyFlags & EffectDirtyFlags::WorldInverseTranspose) + { + worldConstant = XMMatrixTranspose(matrices.world); + + XMMATRIX worldInverse = XMMatrixInverse(nullptr, matrices.world); + + worldInverseTransposeConstant[0] = worldInverse.r[0]; + worldInverseTransposeConstant[1] = worldInverse.r[1]; + worldInverseTransposeConstant[2] = worldInverse.r[2]; + + dirtyFlags &= ~EffectDirtyFlags::WorldInverseTranspose; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + + // Eye position vector. + if (dirtyFlags & EffectDirtyFlags::EyePosition) + { + XMMATRIX viewInverse = XMMatrixInverse(nullptr, matrices.view); + + eyePositionConstant = viewInverse.r[3]; + + dirtyFlags &= ~EffectDirtyFlags::EyePosition; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + } + + // Material color parameters. The desired lighting model is: + // + // ((ambientLightColor + sum(diffuse directional light)) * diffuseColor) + emissiveColor + // + // When lighting is disabled, ambient and directional lights are ignored, leaving: + // + // diffuseColor + emissiveColor + // + // For the lighting disabled case, we can save one shader instruction by precomputing + // diffuse+emissive on the CPU, after which the shader can use diffuseColor directly, + // ignoring its emissive parameter. + // + // When lighting is enabled, we can merge the ambient and emissive settings. If we + // set our emissive parameter to emissive+(ambient*diffuse), the shader no longer + // needs to bother adding the ambient contribution, simplifying its computation to: + // + // (sum(diffuse directional light) * diffuseColor) + emissiveColor + // + // For futher optimization goodness, we merge material alpha with the diffuse + // color parameter, and premultiply all color values by this alpha. + + if (dirtyFlags & EffectDirtyFlags::MaterialColor) + { + XMVECTOR diffuse = diffuseColor; + XMVECTOR alphaVector = XMVectorReplicate(alpha); + + if (lightingEnabled) + { + // Merge emissive and ambient light contributions. + // (emissiveColor + ambientLightColor * diffuse) * alphaVector; + emissiveColorConstant = XMVectorMultiply(XMVectorMultiplyAdd(ambientLightColor, diffuse, emissiveColor), alphaVector); + } + else + { + // Merge diffuse and emissive light contributions. + diffuse = XMVectorAdd(diffuse, emissiveColor); + } + + // xyz = diffuse * alpha, w = alpha. + diffuseColorConstant = XMVectorSelect(alphaVector, XMVectorMultiply(diffuse, alphaVector), g_XMSelect1110); + + dirtyFlags &= ~EffectDirtyFlags::MaterialColor; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } +} + + +#ifdef _PREFAST_ +#pragma prefast(push) +#pragma prefast(disable:26015, "PREFAST doesn't understand that ValidateLightIndex bounds whichLight" ) +#endif + +// Helper for turning one of the directional lights on or off. +_Use_decl_annotations_ int EffectLights::SetLightEnabled(int whichLight, bool value, XMVECTOR* lightDiffuseConstant, XMVECTOR* lightSpecularConstant) +{ + ValidateLightIndex(whichLight); + + if (lightEnabled[whichLight] == value) + return 0; + + lightEnabled[whichLight] = value; + + if (value) + { + // If this light is now on, store its color in the constant buffer. + lightDiffuseConstant[whichLight] = lightDiffuseColor[whichLight]; + lightSpecularConstant[whichLight] = lightSpecularColor[whichLight]; + } + else + { + // If the light is off, reset constant buffer colors to zero. + lightDiffuseConstant[whichLight] = g_XMZero; + lightSpecularConstant[whichLight] = g_XMZero; + } + + return EffectDirtyFlags::ConstantBuffer; +} + + +// Helper for setting diffuse color of one of the directional lights. +_Use_decl_annotations_ +int XM_CALLCONV EffectLights::SetLightDiffuseColor(int whichLight, FXMVECTOR value, XMVECTOR* lightDiffuseConstant) +{ + ValidateLightIndex(whichLight); + + // Locally store the new color. + lightDiffuseColor[whichLight] = value; + + // If this light is currently on, also update the constant buffer. + if (lightEnabled[whichLight]) + { + lightDiffuseConstant[whichLight] = value; + + return EffectDirtyFlags::ConstantBuffer; + } + + return 0; +} + + +// Helper for setting specular color of one of the directional lights. +_Use_decl_annotations_ +int XM_CALLCONV EffectLights::SetLightSpecularColor(int whichLight, FXMVECTOR value, XMVECTOR* lightSpecularConstant) +{ + ValidateLightIndex(whichLight); + + // Locally store the new color. + lightSpecularColor[whichLight] = value; + + // If this light is currently on, also update the constant buffer. + if (lightEnabled[whichLight]) + { + lightSpecularConstant[whichLight] = value; + + return EffectDirtyFlags::ConstantBuffer; + } + + return 0; +} + +#ifdef _PREFAST_ +#pragma prefast(pop) +#endif + + +// Parameter validation helper. +void EffectLights::ValidateLightIndex(int whichLight) +{ + if (whichLight < 0 || whichLight >= MaxDirectionalLights) + { + throw std::out_of_range("whichLight parameter out of range"); + } +} + + +// Activates the default lighting rig (key, fill, and back lights). +void EffectLights::EnableDefaultLighting(_In_ IEffectLights* effect) +{ + static const XMVECTORF32 defaultDirections[MaxDirectionalLights] = + { + { { { -0.5265408f, -0.5735765f, -0.6275069f, 0 } } }, + { { { 0.7198464f, 0.3420201f, 0.6040227f, 0 } } }, + { { { 0.4545195f, -0.7660444f, 0.4545195f, 0 } } }, + }; + + static const XMVECTORF32 defaultDiffuse[MaxDirectionalLights] = + { + { { { 1.0000000f, 0.9607844f, 0.8078432f, 0 } } }, + { { { 0.9647059f, 0.7607844f, 0.4078432f, 0 } } }, + { { { 0.3231373f, 0.3607844f, 0.3937255f, 0 } } }, + }; + + static const XMVECTORF32 defaultSpecular[MaxDirectionalLights] = + { + { { { 1.0000000f, 0.9607844f, 0.8078432f, 0 } } }, + { { { 0.0000000f, 0.0000000f, 0.0000000f, 0 } } }, + { { { 0.3231373f, 0.3607844f, 0.3937255f, 0 } } }, + }; + + static const XMVECTORF32 defaultAmbient = { { { 0.05333332f, 0.09882354f, 0.1819608f, 0 } } }; + + effect->SetLightingEnabled(true); + effect->SetAmbientLightColor(defaultAmbient); + + for (int i = 0; i < MaxDirectionalLights; i++) + { + effect->SetLightEnabled(i, true); + effect->SetLightDirection(i, defaultDirections[i]); + effect->SetLightDiffuseColor(i, defaultDiffuse[i]); + effect->SetLightSpecularColor(i, defaultSpecular[i]); + } +} + + +// Gets or lazily creates the specified vertex shader permutation. +ID3D11VertexShader* EffectDeviceResources::DemandCreateVertexShader(_Inout_ ComPtr& vertexShader, ShaderBytecode const& bytecode) +{ + return DemandCreate(vertexShader, mMutex, [&](ID3D11VertexShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreateVertexShader(bytecode.code, bytecode.length, nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:Effect"); + + return hr; + }); +} + + +// Gets or lazily creates the specified pixel shader permutation. +ID3D11PixelShader* EffectDeviceResources::DemandCreatePixelShader(_Inout_ ComPtr& pixelShader, ShaderBytecode const& bytecode) +{ + return DemandCreate(pixelShader, mMutex, [&](ID3D11PixelShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreatePixelShader(bytecode.code, bytecode.length, nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:Effect"); + + return hr; + }); +} + + +// Gets or lazily creates the default texture +ID3D11ShaderResourceView* EffectDeviceResources::GetDefaultTexture() +{ + return DemandCreate(mDefaultTexture, mMutex, [&](ID3D11ShaderResourceView** pResult) -> HRESULT + { + static const uint32_t s_pixel = 0xffffffff; + + D3D11_SUBRESOURCE_DATA initData = { &s_pixel, sizeof(uint32_t), 0 }; + + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = desc.Height = desc.MipLevels = desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_IMMUTABLE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + ComPtr tex; + HRESULT hr = mDevice->CreateTexture2D(&desc, &initData, tex.GetAddressOf()); + + if (SUCCEEDED(hr)) + { + SetDebugObjectName(tex.Get(), "DirectXTK:Effect"); + + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + SRVDesc.Texture2D.MipLevels = 1; + + hr = mDevice->CreateShaderResourceView(tex.Get(), &SRVDesc, pResult); + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "DirectXTK:Effect"); + } + + return hr; + }); +} + +// Gets device feature level +D3D_FEATURE_LEVEL EffectDeviceResources::GetDeviceFeatureLevel() const +{ + return mDevice->GetFeatureLevel(); +} diff --git a/Sdk/External/DirectXTK/Src/EffectCommon.h b/Sdk/External/DirectXTK/Src/EffectCommon.h new file mode 100644 index 0000000..f8f5572 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/EffectCommon.h @@ -0,0 +1,295 @@ +//-------------------------------------------------------------------------------------- +// File: EffectCommon.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include + +#include "Effects.h" +#include "AlignedNew.h" +#include "BufferHelpers.h" +#include "DirectXHelpers.h" +#include "PlatformHelpers.h" +#include "SharedResourcePool.h" + + +// BasicEffect, SkinnedEffect, et al, have many things in common, but also significant +// differences (for instance, not all the effects support lighting). This header breaks +// out common functionality into a set of helpers which can be assembled in different +// combinations to build up whatever subset is needed by each effect. + + +namespace DirectX +{ + // Bitfield tracks which derived parameter values need to be recomputed. + namespace EffectDirtyFlags + { + constexpr int ConstantBuffer = 0x01; + constexpr int WorldViewProj = 0x02; + constexpr int WorldInverseTranspose = 0x04; + constexpr int EyePosition = 0x08; + constexpr int MaterialColor = 0x10; + constexpr int FogVector = 0x20; + constexpr int FogEnable = 0x40; + constexpr int AlphaTest = 0x80; + } + + + // Helper stores matrix parameter values, and computes derived matrices. + struct EffectMatrices + { + EffectMatrices() noexcept; + + XMMATRIX world; + XMMATRIX view; + XMMATRIX projection; + XMMATRIX worldView; + + void SetConstants(_Inout_ int& dirtyFlags, _Inout_ XMMATRIX& worldViewProjConstant); + }; + + + // Helper stores the current fog settings, and computes derived shader parameters. + struct EffectFog + { + EffectFog() noexcept; + + bool enabled; + float start; + float end; + + void XM_CALLCONV SetConstants(_Inout_ int& dirtyFlags, _In_ FXMMATRIX worldView, _Inout_ XMVECTOR& fogVectorConstant); + }; + + + // Helper stores material color settings, and computes derived parameters for shaders that do not support realtime lighting. + struct EffectColor + { + EffectColor() noexcept; + + XMVECTOR diffuseColor; + float alpha; + + void SetConstants(_Inout_ int& dirtyFlags, _Inout_ XMVECTOR& diffuseColorConstant); + }; + + + // Helper stores the current light settings, and computes derived shader parameters. + struct EffectLights : public EffectColor + { + EffectLights() noexcept; + + static constexpr int MaxDirectionalLights = IEffectLights::MaxDirectionalLights; + + + // Fields. + XMVECTOR emissiveColor; + XMVECTOR ambientLightColor; + + bool lightEnabled[MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[MaxDirectionalLights]; + XMVECTOR lightSpecularColor[MaxDirectionalLights]; + + + // Methods. + void InitializeConstants(_Out_ XMVECTOR& specularColorAndPowerConstant, _Out_writes_all_(MaxDirectionalLights) XMVECTOR* lightDirectionConstant, _Out_writes_all_(MaxDirectionalLights) XMVECTOR* lightDiffuseConstant, _Out_writes_all_(MaxDirectionalLights) XMVECTOR* lightSpecularConstant) const; + void SetConstants(_Inout_ int& dirtyFlags, _In_ EffectMatrices const& matrices, _Inout_ XMMATRIX& worldConstant, _Inout_updates_(3) XMVECTOR worldInverseTransposeConstant[3], _Inout_ XMVECTOR& eyePositionConstant, _Inout_ XMVECTOR& diffuseColorConstant, _Inout_ XMVECTOR& emissiveColorConstant, bool lightingEnabled); + + int SetLightEnabled(int whichLight, bool value, _Inout_updates_(MaxDirectionalLights) XMVECTOR* lightDiffuseConstant, _Inout_updates_(MaxDirectionalLights) XMVECTOR* lightSpecularConstant); + int XM_CALLCONV SetLightDiffuseColor(int whichLight, FXMVECTOR value, _Inout_updates_(MaxDirectionalLights) XMVECTOR* lightDiffuseConstant); + int XM_CALLCONV SetLightSpecularColor(int whichLight, FXMVECTOR value, _Inout_updates_(MaxDirectionalLights) XMVECTOR* lightSpecularConstant); + + static void ValidateLightIndex(int whichLight); + static void EnableDefaultLighting(_In_ IEffectLights* effect); + }; + + + // Points to a precompiled vertex or pixel shader program. + struct ShaderBytecode + { + void const* code; + size_t length; + }; + + + // Factory for lazily instantiating shaders. BasicEffect supports many different + // shader permutations, so we only bother creating the ones that are actually used. + class EffectDeviceResources + { + public: + EffectDeviceResources(_In_ ID3D11Device* device) noexcept + : mDevice(device) + { } + + ID3D11VertexShader* DemandCreateVertexShader(_Inout_ Microsoft::WRL::ComPtr& vertexShader, ShaderBytecode const& bytecode); + ID3D11PixelShader * DemandCreatePixelShader (_Inout_ Microsoft::WRL::ComPtr & pixelShader, ShaderBytecode const& bytecode); + ID3D11ShaderResourceView* GetDefaultTexture(); + D3D_FEATURE_LEVEL GetDeviceFeatureLevel() const; + + protected: + Microsoft::WRL::ComPtr mDevice; + Microsoft::WRL::ComPtr mDefaultTexture; + + std::mutex mMutex; + }; + + + // Templated base class provides functionality common to all the built-in effects. + template + class EffectBase : public AlignedNew + { + public: + // Constructor. + EffectBase(_In_ ID3D11Device* device) + : constants{}, + dirtyFlags(INT_MAX), + mConstantBuffer(device), + mDeviceResources(deviceResourcesPool.DemandCreate(device)) + { + SetDebugObjectName(mConstantBuffer.GetBuffer(), "Effect"); + } + + + // Fields. + typename Traits::ConstantBufferType constants; + + EffectMatrices matrices; + EffectFog fog; + + Microsoft::WRL::ComPtr texture; + + int dirtyFlags; + + + // Helper looks up the bytecode for the specified vertex shader permutation. + // Client code needs this in order to create matching input layouts. + void GetVertexShaderBytecode(int permutation, _Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) noexcept + { + assert(permutation >= 0 && permutation < Traits::ShaderPermutationCount); + _Analysis_assume_(permutation >= 0 && permutation < Traits::ShaderPermutationCount); + int shaderIndex = VertexShaderIndices[permutation]; + assert(shaderIndex >= 0 && shaderIndex < Traits::VertexShaderCount); + _Analysis_assume_(shaderIndex >= 0 && shaderIndex < Traits::VertexShaderCount); + + ShaderBytecode const& bytecode = VertexShaderBytecode[shaderIndex]; + + *pShaderByteCode = bytecode.code; + *pByteCodeLength = bytecode.length; + } + + + // Helper sets our shaders and constant buffers onto the D3D device. + void ApplyShaders(_In_ ID3D11DeviceContext* deviceContext, int permutation) + { + // Set shaders. + auto vertexShader = mDeviceResources->GetVertexShader(permutation); + auto pixelShader = mDeviceResources->GetPixelShader(permutation); + + deviceContext->VSSetShader(vertexShader, nullptr, 0); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + +#if defined(_XBOX_ONE) && defined(_TITLE) + void *grfxMemory; + mConstantBuffer.SetData(deviceContext, constants, &grfxMemory); + + Microsoft::WRL::ComPtr deviceContextX; + ThrowIfFailed(deviceContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceContextX.GetAddressOf()))); + + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContextX->VSSetPlacementConstantBuffer(0, buffer, grfxMemory); + deviceContextX->PSSetPlacementConstantBuffer(0, buffer, grfxMemory); +#else + // Make sure the constant buffer is up to date. + if (dirtyFlags & EffectDirtyFlags::ConstantBuffer) + { + mConstantBuffer.SetData(deviceContext, constants); + + dirtyFlags &= ~EffectDirtyFlags::ConstantBuffer; + } + + // Set the constant buffer. + ID3D11Buffer* buffer = mConstantBuffer.GetBuffer(); + + deviceContext->VSSetConstantBuffers(0, 1, &buffer); + deviceContext->PSSetConstantBuffers(0, 1, &buffer); +#endif + } + + + // Helpers + ID3D11ShaderResourceView* GetDefaultTexture() { return mDeviceResources->GetDefaultTexture(); } + D3D_FEATURE_LEVEL GetDeviceFeatureLevel() const { return mDeviceResources->GetDeviceFeatureLevel(); } + + + protected: + // Static arrays hold all the precompiled shader permutations. + static const ShaderBytecode VertexShaderBytecode[Traits::VertexShaderCount]; + static const ShaderBytecode PixelShaderBytecode[Traits::PixelShaderCount]; + + static const int VertexShaderIndices[Traits::ShaderPermutationCount]; + static const int PixelShaderIndices[Traits::ShaderPermutationCount]; + + private: + // D3D constant buffer holds a copy of the same data as the public 'constants' field. + ConstantBuffer mConstantBuffer; + + // Only one of these helpers is allocated per D3D device, even if there are multiple effect instances. + class DeviceResources : protected EffectDeviceResources + { + public: + DeviceResources(_In_ ID3D11Device* device) noexcept + : EffectDeviceResources(device), + mVertexShaders{}, + mPixelShaders{} + { } + + + // Gets or lazily creates the specified vertex shader permutation. + ID3D11VertexShader* GetVertexShader(int permutation) + { + assert(permutation >= 0 && permutation < Traits::ShaderPermutationCount); + _Analysis_assume_(permutation >= 0 && permutation < Traits::ShaderPermutationCount); + int shaderIndex = VertexShaderIndices[permutation]; + assert(shaderIndex >= 0 && shaderIndex < Traits::VertexShaderCount); + _Analysis_assume_(shaderIndex >= 0 && shaderIndex < Traits::VertexShaderCount); + + return DemandCreateVertexShader(mVertexShaders[shaderIndex], VertexShaderBytecode[shaderIndex]); + } + + + // Gets or lazily creates the specified pixel shader permutation. + ID3D11PixelShader* GetPixelShader(int permutation) + { + assert(permutation >= 0 && permutation < Traits::ShaderPermutationCount); + _Analysis_assume_(permutation >= 0 && permutation < Traits::ShaderPermutationCount); + int shaderIndex = PixelShaderIndices[permutation]; + assert(shaderIndex >= 0 && shaderIndex < Traits::PixelShaderCount); + _Analysis_assume_(shaderIndex >= 0 && shaderIndex < Traits::PixelShaderCount); + + return DemandCreatePixelShader(mPixelShaders[shaderIndex], PixelShaderBytecode[shaderIndex]); + } + + + // Helpers + ID3D11ShaderResourceView* GetDefaultTexture() { return EffectDeviceResources::GetDefaultTexture(); } + D3D_FEATURE_LEVEL GetDeviceFeatureLevel() const { return EffectDeviceResources::GetDeviceFeatureLevel(); } + + private: + Microsoft::WRL::ComPtr mVertexShaders[Traits::VertexShaderCount]; + Microsoft::WRL::ComPtr mPixelShaders[Traits::PixelShaderCount]; + }; + + + // Per-device resources. + std::shared_ptr mDeviceResources; + + static SharedResourcePool deviceResourcesPool; + }; +} diff --git a/Sdk/External/DirectXTK/Src/EffectFactory.cpp b/Sdk/External/DirectXTK/Src/EffectFactory.cpp new file mode 100644 index 0000000..a45ef26 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/EffectFactory.cpp @@ -0,0 +1,539 @@ +//-------------------------------------------------------------------------------------- +// File: EffectFactory.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Effects.h" +#include "DemandCreate.h" +#include "SharedResourcePool.h" + +#include "DDSTextureLoader.h" +#include "WICTextureLoader.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +// Internal EffectFactory implementation class. Only one of these helpers is allocated +// per D3D device, even if there are multiple public facing EffectFactory instances. +class EffectFactory::Impl +{ +public: + Impl(_In_ ID3D11Device* device) + : mPath{}, + mDevice(device), + mSharing(true), + mUseNormalMapEffect(true), + mForceSRGB(false) + {} + + std::shared_ptr CreateEffect(_In_ IEffectFactory* factory, _In_ const IEffectFactory::EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext); + void CreateTexture(_In_z_ const wchar_t* texture, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView); + + void ReleaseCache(); + void SetSharing(bool enabled) noexcept { mSharing = enabled; } + void EnableNormalMapEffect(bool enabled) noexcept { mUseNormalMapEffect = enabled; } + void EnableForceSRGB(bool forceSRGB) noexcept { mForceSRGB = forceSRGB; } + + static SharedResourcePool instancePool; + + wchar_t mPath[MAX_PATH]; + + ComPtr mDevice; + +private: + using EffectCache = std::map< std::wstring, std::shared_ptr >; + using TextureCache = std::map< std::wstring, ComPtr >; + + EffectCache mEffectCache; + EffectCache mEffectCacheSkinning; + EffectCache mEffectCacheDualTexture; + EffectCache mEffectNormalMap; + TextureCache mTextureCache; + + bool mSharing; + bool mUseNormalMapEffect; + bool mForceSRGB; + + std::mutex mutex; +}; + + +// Global instance pool. +SharedResourcePool EffectFactory::Impl::instancePool; + + +_Use_decl_annotations_ +std::shared_ptr EffectFactory::Impl::CreateEffect(IEffectFactory* factory, const IEffectFactory::EffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + if (info.enableSkinning) + { + // SkinnedEffect + if (mSharing && info.name && *info.name) + { + auto it = mEffectCacheSkinning.find(info.name); + if (mSharing && it != mEffectCacheSkinning.end()) + { + return it->second; + } + } + + auto effect = std::make_shared(mDevice.Get()); + + effect->EnableDefaultLighting(); + + effect->SetAlpha(info.alpha); + + // Skinned Effect does not have an ambient material color, or per-vertex color support + + XMVECTOR color = XMLoadFloat3(&info.diffuseColor); + effect->SetDiffuseColor(color); + + if (info.specularColor.x != 0 || info.specularColor.y != 0 || info.specularColor.z != 0) + { + color = XMLoadFloat3(&info.specularColor); + effect->SetSpecularColor(color); + effect->SetSpecularPower(info.specularPower); + } + else + { + effect->DisableSpecular(); + } + + if (info.emissiveColor.x != 0 || info.emissiveColor.y != 0 || info.emissiveColor.z != 0) + { + color = XMLoadFloat3(&info.emissiveColor); + effect->SetEmissiveColor(color); + } + + if (info.diffuseTexture && *info.diffuseTexture) + { + ComPtr srv; + + factory->CreateTexture(info.diffuseTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(srv.Get()); + } + + if (info.biasedVertexNormals) + { + effect->SetBiasedVertexNormals(true); + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + mEffectCacheSkinning.insert(v); + } + + return std::move(effect); + } + else if (info.enableDualTexture) + { + // DualTextureEffect + if (mSharing && info.name && *info.name) + { + auto it = mEffectCacheDualTexture.find(info.name); + if (mSharing && it != mEffectCacheDualTexture.end()) + { + return it->second; + } + } + + auto effect = std::make_shared(mDevice.Get()); + + // Dual texture effect doesn't support lighting (usually it's lightmaps) + + effect->SetAlpha(info.alpha); + + if (info.perVertexColor) + { + effect->SetVertexColorEnabled(true); + } + + XMVECTOR color = XMLoadFloat3(&info.diffuseColor); + effect->SetDiffuseColor(color); + + if (info.diffuseTexture && *info.diffuseTexture) + { + ComPtr srv; + + factory->CreateTexture(info.diffuseTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(srv.Get()); + } + + if (info.emissiveTexture && *info.emissiveTexture) + { + ComPtr srv; + + factory->CreateTexture(info.emissiveTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture2(srv.Get()); + } + else if (info.specularTexture && *info.specularTexture) + { + // If there's no emissive texture specified, use the specular texture as the second texture + ComPtr srv; + + factory->CreateTexture(info.specularTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture2(srv.Get()); + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + mEffectCacheDualTexture.insert(v); + } + + return std::move(effect); + } + else if (info.enableNormalMaps && mUseNormalMapEffect) + { + // NormalMapEffect + if (mSharing && info.name && *info.name) + { + auto it = mEffectNormalMap.find(info.name); + if (mSharing && it != mEffectNormalMap.end()) + { + return it->second; + } + } + + auto effect = std::make_shared(mDevice.Get()); + + effect->EnableDefaultLighting(); + + effect->SetAlpha(info.alpha); + + if (info.perVertexColor) + { + effect->SetVertexColorEnabled(true); + } + + // NormalMap Effect does not have an ambient material color + + XMVECTOR color = XMLoadFloat3(&info.diffuseColor); + effect->SetDiffuseColor(color); + + if (info.specularColor.x != 0 || info.specularColor.y != 0 || info.specularColor.z != 0) + { + color = XMLoadFloat3(&info.specularColor); + effect->SetSpecularColor(color); + effect->SetSpecularPower(info.specularPower); + } + else + { + effect->DisableSpecular(); + } + + if (info.emissiveColor.x != 0 || info.emissiveColor.y != 0 || info.emissiveColor.z != 0) + { + color = XMLoadFloat3(&info.emissiveColor); + effect->SetEmissiveColor(color); + } + + if (info.diffuseTexture && *info.diffuseTexture) + { + ComPtr srv; + + factory->CreateTexture(info.diffuseTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(srv.Get()); + } + + if (info.specularTexture && *info.specularTexture) + { + ComPtr srv; + + factory->CreateTexture(info.specularTexture, deviceContext, srv.GetAddressOf()); + + effect->SetSpecularTexture(srv.Get()); + } + + if (info.normalTexture && *info.normalTexture) + { + ComPtr srv; + + factory->CreateTexture(info.normalTexture, deviceContext, srv.GetAddressOf()); + + effect->SetNormalTexture(srv.Get()); + } + + if (info.biasedVertexNormals) + { + effect->SetBiasedVertexNormals(true); + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + mEffectNormalMap.insert(v); + } + + return std::move(effect); + } + else + { + // BasicEffect + if (mSharing && info.name && *info.name) + { + auto it = mEffectCache.find(info.name); + if (mSharing && it != mEffectCache.end()) + { + return it->second; + } + } + + auto effect = std::make_shared(mDevice.Get()); + + effect->EnableDefaultLighting(); + effect->SetLightingEnabled(true); + + effect->SetAlpha(info.alpha); + + if (info.perVertexColor) + { + effect->SetVertexColorEnabled(true); + } + + // Basic Effect does not have an ambient material color + + XMVECTOR color = XMLoadFloat3(&info.diffuseColor); + effect->SetDiffuseColor(color); + + if (info.specularColor.x != 0 || info.specularColor.y != 0 || info.specularColor.z != 0) + { + color = XMLoadFloat3(&info.specularColor); + effect->SetSpecularColor(color); + effect->SetSpecularPower(info.specularPower); + } + else + { + effect->DisableSpecular(); + } + + if (info.emissiveColor.x != 0 || info.emissiveColor.y != 0 || info.emissiveColor.z != 0) + { + color = XMLoadFloat3(&info.emissiveColor); + effect->SetEmissiveColor(color); + } + + if (info.diffuseTexture && *info.diffuseTexture) + { + ComPtr srv; + + factory->CreateTexture(info.diffuseTexture, deviceContext, srv.GetAddressOf()); + + effect->SetTexture(srv.Get()); + effect->SetTextureEnabled(true); + } + + if (info.biasedVertexNormals) + { + effect->SetBiasedVertexNormals(true); + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + mEffectCache.insert(v); + } + + return std::move(effect); + } +} + +_Use_decl_annotations_ +void EffectFactory::Impl::CreateTexture(const wchar_t* name, ID3D11DeviceContext* deviceContext, ID3D11ShaderResourceView** textureView) +{ + if (!name || !textureView) + throw std::exception("invalid arguments"); + +#if defined(_XBOX_ONE) && defined(_TITLE) + UNREFERENCED_PARAMETER(deviceContext); +#endif + + auto it = mTextureCache.find(name); + + if (mSharing && it != mTextureCache.end()) + { + ID3D11ShaderResourceView* srv = it->second.Get(); + srv->AddRef(); + *textureView = srv; + } + else + { + wchar_t fullName[MAX_PATH] = {}; + wcscpy_s(fullName, mPath); + wcscat_s(fullName, name); + + WIN32_FILE_ATTRIBUTE_DATA fileAttr = {}; + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + // Try Current Working Directory (CWD) + wcscpy_s(fullName, name); + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + DebugTrace("ERROR: EffectFactory could not find texture file '%ls'\n", name); + throw std::exception("CreateTexture"); + } + } + + wchar_t ext[_MAX_EXT]; + _wsplitpath_s(name, nullptr, 0, nullptr, 0, nullptr, 0, ext, _MAX_EXT); + + if (_wcsicmp(ext, L".dds") == 0) + { + HRESULT hr = CreateDDSTextureFromFileEx( + mDevice.Get(), fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateDDSTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateDDSTextureFromFile"); + } + } + #if !defined(_XBOX_ONE) || !defined(_TITLE) + else if (deviceContext) + { + std::lock_guard lock(mutex); + HRESULT hr = CreateWICTextureFromFileEx( + mDevice.Get(), deviceContext, fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB ? WIC_LOADER_FORCE_SRGB : WIC_LOADER_DEFAULT, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateWICTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateWICTextureFromFile"); + } + } + #endif + else + { + HRESULT hr = CreateWICTextureFromFileEx( + mDevice.Get(), fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB ? WIC_LOADER_FORCE_SRGB : WIC_LOADER_DEFAULT, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateWICTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateWICTextureFromFile"); + } + } + + if (mSharing && *name && it == mTextureCache.end()) + { + std::lock_guard lock(mutex); + TextureCache::value_type v(name, *textureView); + mTextureCache.insert(v); + } + } +} + +void EffectFactory::Impl::ReleaseCache() +{ + std::lock_guard lock(mutex); + mEffectCache.clear(); + mEffectCacheSkinning.clear(); + mEffectCacheDualTexture.clear(); + mEffectNormalMap.clear(); + mTextureCache.clear(); +} + + + +//-------------------------------------------------------------------------------------- +// EffectFactory +//-------------------------------------------------------------------------------------- + +EffectFactory::EffectFactory(_In_ ID3D11Device* device) + : pImpl(Impl::instancePool.DemandCreate(device)) +{ +} + +EffectFactory::~EffectFactory() +{ +} + + +EffectFactory::EffectFactory(EffectFactory&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + +EffectFactory& EffectFactory::operator= (EffectFactory&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + +_Use_decl_annotations_ +std::shared_ptr EffectFactory::CreateEffect(const EffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + return pImpl->CreateEffect(this, info, deviceContext); +} + +_Use_decl_annotations_ +void EffectFactory::CreateTexture(const wchar_t* name, ID3D11DeviceContext* deviceContext, ID3D11ShaderResourceView** textureView) +{ + return pImpl->CreateTexture(name, deviceContext, textureView); +} + +void EffectFactory::ReleaseCache() +{ + pImpl->ReleaseCache(); +} + +void EffectFactory::SetSharing(bool enabled) noexcept +{ + pImpl->SetSharing(enabled); +} + +void EffectFactory::EnableNormalMapEffect(bool enabled) noexcept +{ + pImpl->EnableNormalMapEffect(enabled); +} + +void EffectFactory::EnableForceSRGB(bool forceSRGB) noexcept +{ + pImpl->EnableForceSRGB(forceSRGB); +} + +void EffectFactory::SetDirectory(_In_opt_z_ const wchar_t* path) noexcept +{ + if (path && *path != 0) + { + wcscpy_s(pImpl->mPath, path); + size_t len = wcsnlen(pImpl->mPath, MAX_PATH); + if (len > 0 && len < (MAX_PATH - 1)) + { + // Ensure it has a trailing slash + if (pImpl->mPath[len - 1] != L'\\') + { + pImpl->mPath[len] = L'\\'; + pImpl->mPath[len + 1] = 0; + } + } + } + else + *pImpl->mPath = 0; +} + +ID3D11Device* EffectFactory::GetDevice() const noexcept +{ + return pImpl->mDevice.Get(); +} diff --git a/Sdk/External/DirectXTK/Src/EnvironmentMapEffect.cpp b/Sdk/External/DirectXTK/Src/EnvironmentMapEffect.cpp new file mode 100644 index 0000000..a77d801 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/EnvironmentMapEffect.cpp @@ -0,0 +1,727 @@ +//-------------------------------------------------------------------------------------- +// File: EnvironmentMapEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +// Constant buffer layout. Must match the shader! +struct EnvironmentMapEffectConstants +{ + XMVECTOR environmentMapSpecular; + float environmentMapAmount; + float fresnelFactor; + float pad[2]; + + XMVECTOR diffuseColor; + XMVECTOR emissiveColor; + + XMVECTOR lightDirection[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[IEffectLights::MaxDirectionalLights]; + + XMVECTOR eyePosition; + + XMVECTOR fogColor; + XMVECTOR fogVector; + + XMMATRIX world; + XMVECTOR worldInverseTranspose[3]; + XMMATRIX worldViewProj; +}; + +static_assert((sizeof(EnvironmentMapEffectConstants) % 16) == 0, "CB size not padded correctly"); + + +// Traits type describes our characteristics to the EffectBase template. +struct EnvironmentMapEffectTraits +{ + using ConstantBufferType = EnvironmentMapEffectConstants; + + static constexpr int VertexShaderCount = 12; + static constexpr int PixelShaderCount = 16; + static constexpr int ShaderPermutationCount = 56; + + static constexpr int MappingCount = 3; +}; + + +// Internal EnvironmentMapEffect implementation class. +class EnvironmentMapEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + bool preferPerPixelLighting; + bool fresnelEnabled; + bool specularEnabled; + bool biasedVertexNormals; + EnvironmentMapEffect::Mapping mapping; + + EffectLights lights; + + ComPtr environmentMap; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMap.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapFresnel.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapOneLight.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapOneLightFresnel.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapPixelLighting.inc" + + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapBn.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapFresnelBn.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapOneLightBn.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapOneLightFresnelBn.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_VSEnvMapPixelLightingBn.inc" + + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMap.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapNoFog.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapSpecular.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapSpecularNoFog.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapPixelLighting.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapPixelLightingNoFog.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapPixelLightingFresnel.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapPixelLightingFresnelNoFog.inc" + + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapSpherePixelLighting.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapSpherePixelLightingNoFog.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnel.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnelNoFog.inc" + + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapDualParabolaPixelLighting.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingNoFog.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnel.inc" + #include "Shaders/Compiled/XboxOneEnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnelNoFog.inc" +#else + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMap.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapFresnel.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapOneLight.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapOneLightFresnel.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapPixelLighting.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapPixelLightingSM4.inc" + + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapBn.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapFresnelBn.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapOneLightBn.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapOneLightFresnelBn.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapPixelLightingBn.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_VSEnvMapPixelLightingBnSM4.inc" + + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMap.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapNoFog.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapSpecular.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapSpecularNoFog.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapPixelLighting.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapPixelLightingNoFog.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapPixelLightingFresnel.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapPixelLightingFresnelNoFog.inc" + + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapSpherePixelLighting.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapSpherePixelLightingNoFog.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnel.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnelNoFog.inc" + + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapDualParabolaPixelLighting.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingNoFog.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnel.inc" + #include "Shaders/Compiled/EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnelNoFog.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { EnvironmentMapEffect_VSEnvMap, sizeof(EnvironmentMapEffect_VSEnvMap) }, + { EnvironmentMapEffect_VSEnvMapFresnel, sizeof(EnvironmentMapEffect_VSEnvMapFresnel) }, + { EnvironmentMapEffect_VSEnvMapOneLight, sizeof(EnvironmentMapEffect_VSEnvMapOneLight) }, + { EnvironmentMapEffect_VSEnvMapOneLightFresnel, sizeof(EnvironmentMapEffect_VSEnvMapOneLightFresnel) }, + { EnvironmentMapEffect_VSEnvMapPixelLighting, sizeof(EnvironmentMapEffect_VSEnvMapPixelLighting) }, + + { EnvironmentMapEffect_VSEnvMapBn, sizeof(EnvironmentMapEffect_VSEnvMapBn) }, + { EnvironmentMapEffect_VSEnvMapFresnelBn, sizeof(EnvironmentMapEffect_VSEnvMapFresnelBn) }, + { EnvironmentMapEffect_VSEnvMapOneLightBn, sizeof(EnvironmentMapEffect_VSEnvMapOneLightBn) }, + { EnvironmentMapEffect_VSEnvMapOneLightFresnelBn, sizeof(EnvironmentMapEffect_VSEnvMapOneLightFresnelBn) }, + { EnvironmentMapEffect_VSEnvMapPixelLightingBn, sizeof(EnvironmentMapEffect_VSEnvMapPixelLightingBn) }, + +#if defined(_XBOX_ONE) && defined(_TITLE) + { EnvironmentMapEffect_VSEnvMapPixelLighting, sizeof(EnvironmentMapEffect_VSEnvMapPixelLighting) }, + { EnvironmentMapEffect_VSEnvMapPixelLightingBn, sizeof(EnvironmentMapEffect_VSEnvMapPixelLightingBn) }, +#else + { EnvironmentMapEffect_VSEnvMapPixelLightingSM4, sizeof(EnvironmentMapEffect_VSEnvMapPixelLightingSM4) }, + { EnvironmentMapEffect_VSEnvMapPixelLightingBnSM4, sizeof(EnvironmentMapEffect_VSEnvMapPixelLightingBnSM4) }, +#endif +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // basic + 0, // basic, no fog + 1, // fresnel + 1, // fresnel, no fog + 0, // specular + 0, // specular, no fog + 1, // fresnel + specular + 1, // fresnel + specular, no fog + + 2, // one light + 2, // one light, no fog + 3, // one light, fresnel + 3, // one light, fresnel, no fog + 2, // one light, specular + 2, // one light, specular, no fog + 3, // one light, fresnel + specular + 3, // one light, fresnel + specular, no fog + + 4, // pixel lighting + 4, // pixel lighting, no fog + 4, // pixel lighting, fresnel + 4, // pixel lighting, fresnel, no fog + + 5, // basic (biased vertex normals) + 5, // basic (biased vertex normals), no fog + 6, // fresnel (biased vertex normals) + 6, // fresnel (biased vertex normals), no fog + 5, // specular (biased vertex normals) + 5, // specular (biased vertex normals), no fog + 6, // fresnel + specular (biased vertex normals) + 6, // fresnel + specular (biased vertex normals), no fog + + 7, // one light (biased vertex normals) + 7, // one light (biased vertex normals), no fog + 8, // one light (biased vertex normals), fresnel + 8, // one light (biased vertex normals), fresnel, no fog + 7, // one light (biased vertex normals), specular + 7, // one light (biased vertex normals), specular, no fog + 8, // one light (biased vertex normals), fresnel + specular + 8, // one light (biased vertex normals), fresnel + specular, no fog + + 9, // pixel lighting (biased vertex normals) + 9, // pixel lighting (biased vertex normals), no fog + 9, // pixel lighting (biased vertex normals), fresnel + 9, // pixel lighting (biased vertex normals), fresnel, no fog + + 4, // spheremap pixel lighting + 4, // spheremap pixel lighting, no fog + 4, // spheremap pixel lighting, fresnel + 4, // spheremap pixel lighting, fresnel, no fog + + 9, // spheremap pixel lighting (biased vertex normals) + 9, // spheremap pixel lighting (biased vertex normals), no fog + 9, // spheremap pixel lighting (biased vertex normals), fresnel + 9, // spheremap pixel lighting (biased vertex normals), fresnel, no fog + + 10, // dual-parabola pixel lighting + 10, // dual-parabola pixel lighting, no fog + 10, // dual-parabola pixel lighting, fresnel + 10, // dual-parabola pixel lighting, fresnel, no fog + + 11, // dual-parabola pixel lighting (biased vertex normals) + 11, // dual-parabola pixel lighting (biased vertex normals), no fog + 11, // dual-parabola pixel lighting (biased vertex normals), fresnel + 11, // dual-parabola pixel lighting (biased vertex normals), fresnel, no fog +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { EnvironmentMapEffect_PSEnvMap, sizeof(EnvironmentMapEffect_PSEnvMap) }, + { EnvironmentMapEffect_PSEnvMapNoFog, sizeof(EnvironmentMapEffect_PSEnvMapNoFog) }, + { EnvironmentMapEffect_PSEnvMapSpecular, sizeof(EnvironmentMapEffect_PSEnvMapSpecular) }, + { EnvironmentMapEffect_PSEnvMapSpecularNoFog, sizeof(EnvironmentMapEffect_PSEnvMapSpecularNoFog) }, + { EnvironmentMapEffect_PSEnvMapPixelLighting, sizeof(EnvironmentMapEffect_PSEnvMapPixelLighting) }, + { EnvironmentMapEffect_PSEnvMapPixelLightingNoFog, sizeof(EnvironmentMapEffect_PSEnvMapPixelLightingNoFog) }, + { EnvironmentMapEffect_PSEnvMapPixelLightingFresnel, sizeof(EnvironmentMapEffect_PSEnvMapPixelLightingFresnel) }, + { EnvironmentMapEffect_PSEnvMapPixelLightingFresnelNoFog, sizeof(EnvironmentMapEffect_PSEnvMapPixelLightingFresnelNoFog) }, + + { EnvironmentMapEffect_PSEnvMapSpherePixelLighting, sizeof(EnvironmentMapEffect_PSEnvMapSpherePixelLighting) }, + { EnvironmentMapEffect_PSEnvMapSpherePixelLightingNoFog, sizeof(EnvironmentMapEffect_PSEnvMapSpherePixelLightingNoFog) }, + { EnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnel, sizeof(EnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnel) }, + { EnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnelNoFog, sizeof(EnvironmentMapEffect_PSEnvMapSpherePixelLightingFresnelNoFog) }, + + { EnvironmentMapEffect_PSEnvMapDualParabolaPixelLighting, sizeof(EnvironmentMapEffect_PSEnvMapDualParabolaPixelLighting) }, + { EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingNoFog, sizeof(EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingNoFog) }, + { EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnel, sizeof(EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnel) }, + { EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnelNoFog, sizeof(EnvironmentMapEffect_PSEnvMapDualParabolaPixelLightingFresnelNoFog) }, +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // basic + 1, // basic, no fog + 0, // fresnel + 1, // fresnel, no fog + 2, // specular + 3, // specular, no fog + 2, // fresnel + specular + 3, // fresnel + specular, no fog + + 0, // one light + 1, // one light, no fog + 0, // one light, fresnel + 1, // one light, fresnel, no fog + 2, // one light, specular + 3, // one light, specular, no fog + 2, // one light, fresnel + specular + 3, // one light, fresnel + specular, no fog + + 4, // per pixel lighting + 5, // per pixel lighting, no fog + 6, // per pixel lighting, fresnel + 7, // per pixel lighting, fresnel, no fog + + 0, // basic (biased vertex normals) + 1, // basic (biased vertex normals), no fog + 0, // fresnel (biased vertex normals) + 1, // fresnel (biased vertex normals), no fog + 2, // specular (biased vertex normals) + 3, // specular (biased vertex normals), no fog + 2, // fresnel + specular (biased vertex normals) + 3, // fresnel + specular (biased vertex normals), no fog + + 0, // one light (biased vertex normals) + 1, // one light (biased vertex normals), no fog + 0, // one light (biased vertex normals), fresnel + 1, // one light (biased vertex normals), fresnel, no fog + 2, // one light (biased vertex normals), specular + 3, // one light (biased vertex normals), specular, no fog + 2, // one light (biased vertex normals), fresnel + specular + 3, // one light (biased vertex normals), fresnel + specular, no fog + + 4, // per pixel lighting (biased vertex normals) + 5, // per pixel lighting (biased vertex normals), no fog + 6, // per pixel lighting (biased vertex normals), fresnel + 7, // per pixel lighting (biased vertex normals), fresnel, no fog + + 8, // spheremap pixel lighting + 9, // spheremap pixel lighting, no fog + 10, // spheremap pixel lighting, fresnel + 11, // spheremap pixel lighting, fresnel, no fog + + 8, // spheremap pixel lighting (biased vertex normals) + 9, // spheremap pixel lighting (biased vertex normals), no fog + 10, // spheremap pixel lighting (biased vertex normals), fresnel + 11, // spheremap pixel lighting (biased vertex normals), fresnel, no fog + + 12, // dual-parabola pixel lighting + 13, // dual-parabola pixel lighting, no fog + 14, // dual-parabola pixel lighting, fresnel + 15, // dual-parabola pixel lighting, fresnel, no fog + + 12, // dual-parabola pixel lighting (biased vertex normals) + 13, // dual-parabola pixel lighting (biased vertex normals), no fog + 14, // dual-parabola pixel lighting (biased vertex normals), fresnel + 15, // dual-parabola pixel lighting (biased vertex normals), fresnel, no fog +}; + + +// Global pool of per-device EnvironmentMapEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +EnvironmentMapEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + preferPerPixelLighting(false), + fresnelEnabled(true), + specularEnabled(false), + biasedVertexNormals(false), + mapping(Mapping_Cube) +{ + static_assert(_countof(EffectBase::VertexShaderIndices) == EnvironmentMapEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == EnvironmentMapEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == EnvironmentMapEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == EnvironmentMapEffectTraits::ShaderPermutationCount, "array/max mismatch"); + + constants.environmentMapAmount = 1; + constants.fresnelFactor = 1; + + XMVECTOR unwantedOutput[MaxDirectionalLights]; + + lights.InitializeConstants(unwantedOutput[0], constants.lightDirection, constants.lightDiffuseColor, unwantedOutput); +} + + +int EnvironmentMapEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Use optimized shaders if fog is disabled. + if (!fog.enabled) + { + permutation += 1; + } + + // Support fresnel? + if (fresnelEnabled) + { + permutation += 2; + } + + if (mapping == Mapping_Sphere) + { + permutation += 40; + + if (biasedVertexNormals) + { + permutation += 4; + } + } + else if (mapping == Mapping_DualParabola) + { + permutation += 48; + + if (biasedVertexNormals) + { + permutation += 4; + } + } + else // Mapping_Cube + { + if (preferPerPixelLighting) + { + permutation += 16; + } + else + { + // Support specular? + if (specularEnabled) + { + permutation += 4; + } + + // Use the only-bother-with-the-first-light shader optimization? + if (!lights.lightEnabled[1] && !lights.lightEnabled[2]) + { + permutation += 8; + } + } + + if (biasedVertexNormals) + { + // Compressed normals need to be scaled and biased in the vertex shader. + permutation += 20; + } + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void EnvironmentMapEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + fog.SetConstants(dirtyFlags, matrices.worldView, constants.fogVector); + + lights.SetConstants(dirtyFlags, matrices, constants.world, constants.worldInverseTranspose, constants.eyePosition, constants.diffuseColor, constants.emissiveColor, true); + + // Set the textures. + ID3D11ShaderResourceView* textures[2] = + { + texture.Get(), + environmentMap.Get(), + }; + + deviceContext->PSSetShaderResources(0, 2, textures); + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +EnvironmentMapEffect::EnvironmentMapEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +EnvironmentMapEffect::EnvironmentMapEffect(EnvironmentMapEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +EnvironmentMapEffect& EnvironmentMapEffect::operator= (EnvironmentMapEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +EnvironmentMapEffect::~EnvironmentMapEffect() +{ +} + + +// IEffect methods. +void EnvironmentMapEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void EnvironmentMapEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV EnvironmentMapEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +// Material settings. +void XM_CALLCONV EnvironmentMapEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetEmissiveColor(FXMVECTOR value) +{ + pImpl->lights.emissiveColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void EnvironmentMapEffect::SetAlpha(float value) +{ + pImpl->lights.alpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + pImpl->lights.alpha = XMVectorGetW(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +// Light settings. +void EnvironmentMapEffect::SetLightingEnabled(bool value) +{ + if (!value) + { + throw std::exception("EnvironmentMapEffect does not support turning off lighting"); + } +} + + +void EnvironmentMapEffect::SetPerPixelLighting(bool value) +{ + pImpl->preferPerPixelLighting = value; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetAmbientLightColor(FXMVECTOR value) +{ + pImpl->lights.ambientLightColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void EnvironmentMapEffect::SetLightEnabled(int whichLight, bool value) +{ + XMVECTOR unwantedOutput[MaxDirectionalLights] = {}; + + pImpl->dirtyFlags |= pImpl->lights.SetLightEnabled(whichLight, value, pImpl->constants.lightDiffuseColor, unwantedOutput); +} + + +void XM_CALLCONV EnvironmentMapEffect::SetLightDirection(int whichLight, FXMVECTOR value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->constants.lightDirection[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetLightDiffuseColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightDiffuseColor(whichLight, value, pImpl->constants.lightDiffuseColor); +} + + +void XM_CALLCONV EnvironmentMapEffect::SetLightSpecularColor(int, FXMVECTOR) +{ + // Unsupported interface method. +} + + +void EnvironmentMapEffect::EnableDefaultLighting() +{ + EffectLights::EnableDefaultLighting(this); +} + + +// Fog settings. +void EnvironmentMapEffect::SetFogEnabled(bool value) +{ + pImpl->fog.enabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogEnable; +} + + +void EnvironmentMapEffect::SetFogStart(float value) +{ + pImpl->fog.start = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void EnvironmentMapEffect::SetFogEnd(float value) +{ + pImpl->fog.end = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetFogColor(FXMVECTOR value) +{ + pImpl->constants.fogColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Texture settings. +void EnvironmentMapEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +void EnvironmentMapEffect::SetEnvironmentMap(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->environmentMap = value; +} + + +// Additional settings. +void EnvironmentMapEffect::SetMode(EnvironmentMapEffect::Mapping mapping) +{ + if (static_cast(mapping) < 0 || static_cast(mapping) >= EnvironmentMapEffectTraits::MappingCount) + { + throw std::invalid_argument("Unsupported mapping"); + } + + if (mapping == Mapping_DualParabola) + { + if (pImpl->GetDeviceFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("Dual Parabola requires Feature Level 10.0 or later"); + } + } + + pImpl->mapping = mapping; +} + + +void EnvironmentMapEffect::SetEnvironmentMapAmount(float value) +{ + pImpl->constants.environmentMapAmount = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV EnvironmentMapEffect::SetEnvironmentMapSpecular(FXMVECTOR value) +{ + pImpl->constants.environmentMapSpecular = value; + + pImpl->specularEnabled = !XMVector3Equal(value, XMVectorZero()); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void EnvironmentMapEffect::SetFresnelFactor(float value) +{ + pImpl->constants.fresnelFactor = value; + + pImpl->fresnelEnabled = (value != 0); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Normal compression settings. +void EnvironmentMapEffect::SetBiasedVertexNormals(bool value) +{ + pImpl->biasedVertexNormals = value; +} diff --git a/Sdk/External/DirectXTK/Src/GamePad.cpp b/Sdk/External/DirectXTK/Src/GamePad.cpp new file mode 100644 index 0000000..dd089df --- /dev/null +++ b/Sdk/External/DirectXTK/Src/GamePad.cpp @@ -0,0 +1,1753 @@ +//-------------------------------------------------------------------------------------- +// File: GamePad.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include "GamePad.h" +#include "PlatformHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +namespace +{ + constexpr float c_XboxOneThumbDeadZone = .24f; // Recommended Xbox One controller deadzone + + float ApplyLinearDeadZone(float value, float maxValue, float deadZoneSize) noexcept + { + if (value < -deadZoneSize) + { + // Increase negative values to remove the deadzone discontinuity. + value += deadZoneSize; + } + else if (value > deadZoneSize) + { + // Decrease positive values to remove the deadzone discontinuity. + value -= deadZoneSize; + } + else + { + // Values inside the deadzone come out zero. + return 0; + } + + // Scale into 0-1 range. + float scaledValue = value / (maxValue - deadZoneSize); + return std::max(-1.f, std::min(scaledValue, 1.f)); + } + + void ApplyStickDeadZone( + float x, + float y, + GamePad::DeadZone deadZoneMode, + float maxValue, + float deadZoneSize, + _Out_ float& resultX, + _Out_ float& resultY) noexcept + { + switch (deadZoneMode) + { + case GamePad::DEAD_ZONE_INDEPENDENT_AXES: + resultX = ApplyLinearDeadZone(x, maxValue, deadZoneSize); + resultY = ApplyLinearDeadZone(y, maxValue, deadZoneSize); + break; + + case GamePad::DEAD_ZONE_CIRCULAR: + { + float dist = sqrtf(x*x + y * y); + float wanted = ApplyLinearDeadZone(dist, maxValue, deadZoneSize); + + float scale = (wanted > 0.f) ? (wanted / dist) : 0.f; + + resultX = std::max(-1.f, std::min(x * scale, 1.f)); + resultY = std::max(-1.f, std::min(y * scale, 1.f)); + } + break; + + default: // GamePad::DEAD_ZONE_NONE + resultX = ApplyLinearDeadZone(x, maxValue, 0); + resultY = ApplyLinearDeadZone(y, maxValue, 0); + break; + } + } +} + + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + +#include + +//====================================================================================== +// GameInput +//====================================================================================== + +class GamePad::Impl +{ +public: + Impl(GamePad* owner) : + mOwner(owner), + mCtrlChanged(INVALID_HANDLE_VALUE), + mDeviceToken(0), + mMostRecentGamepad(0) + { + if (s_gamePad) + { + throw std::exception("GamePad is a singleton"); + } + + s_gamePad = this; + + ThrowIfFailed(GameInputCreate(mGameInput.GetAddressOf())); + + ThrowIfFailed(mGameInput->RegisterDeviceCallback( + nullptr, + GameInputKindGamepad, + GameInputDeviceConnected, + GameInputBlockingEnumeration, + this, + OnGameInputDevice, + &mDeviceToken)); + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + if (mDeviceToken) + { + if (mGameInput) + { + HRESULT hr = mGameInput->UnregisterCallback(mDeviceToken, UINT64_MAX); + if (FAILED(hr)) + { + DebugTrace("ERROR: GameInput::UnregisterCallback [gamepad] failed (%08X)", static_cast(hr)); + } + } + + mDeviceToken = 0; + } + + s_gamePad = nullptr; + } + + void GetState(int player, _Out_ State& state, DeadZone deadZoneMode) + { + memset(&state, 0, sizeof(State)); + + IGameInputDevice* device = nullptr; + + if (player >= 0 && player < MAX_PLAYER_COUNT) + { + device = mInputDevices[player].Get(); + if (!device) + return; + } + else if (player == c_MostRecent) + { + player = mMostRecentGamepad; + assert(player >= 0 && player < MAX_PLAYER_COUNT); + device = mInputDevices[player].Get(); + if (!device) + return; + } + else if (player != c_MergedInput) + { + return; + } + + ComPtr reading; + if (SUCCEEDED(mGameInput->GetCurrentReading(GameInputKindGamepad, device, reading.GetAddressOf()))) + { + GameInputGamepadState pad; + if (reading->GetGamepadState(&pad)) + { + state.connected = true; + state.packet = reading->GetSequenceNumber(GameInputKindGamepad); + + state.buttons.a = (pad.buttons & GameInputGamepadA) != 0; + state.buttons.b = (pad.buttons & GameInputGamepadB) != 0; + state.buttons.x = (pad.buttons & GameInputGamepadX) != 0; + state.buttons.y = (pad.buttons & GameInputGamepadY) != 0; + state.buttons.leftStick = (pad.buttons & GameInputGamepadLeftThumbstick) != 0; + state.buttons.rightStick = (pad.buttons & GameInputGamepadRightThumbstick) != 0; + state.buttons.leftShoulder = (pad.buttons & GameInputGamepadLeftShoulder) != 0; + state.buttons.rightShoulder = (pad.buttons & GameInputGamepadRightShoulder) != 0; + state.buttons.view = (pad.buttons & GameInputGamepadView) != 0; + state.buttons.menu = (pad.buttons & GameInputGamepadMenu) != 0; + + state.dpad.up = (pad.buttons & GameInputGamepadDPadUp) != 0; + state.dpad.down = (pad.buttons & GameInputGamepadDPadDown) != 0; + state.dpad.right = (pad.buttons & GameInputGamepadDPadRight) != 0; + state.dpad.left = (pad.buttons & GameInputGamepadDPadLeft) != 0; + + ApplyStickDeadZone(pad.leftThumbstickX, pad.leftThumbstickY, + deadZoneMode, 1.f, c_XboxOneThumbDeadZone, + state.thumbSticks.leftX, state.thumbSticks.leftY); + + ApplyStickDeadZone(pad.rightThumbstickX, pad.rightThumbstickY, + deadZoneMode, 1.f, c_XboxOneThumbDeadZone, + state.thumbSticks.rightX, state.thumbSticks.rightY); + + state.triggers.left = pad.leftTrigger; + state.triggers.right = pad.rightTrigger; + } + } + } + + void GetCapabilities(int player, _Out_ Capabilities& caps) + { + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if (player >= 0 && player < MAX_PLAYER_COUNT) + { + IGameInputDevice* device = mInputDevices[player].Get(); + if (device) + { + if (device->GetDeviceStatus() & GameInputDeviceConnected) + { + auto deviceInfo = device->GetDeviceInfo(); + caps.connected = true; + caps.gamepadType = Capabilities::GAMEPAD; + caps.id = deviceInfo->deviceId; + caps.vid = deviceInfo->vendorId; + caps.pid = deviceInfo->productId; + return; + } + else + { + mInputDevices[player].Reset(); + } + } + } + + memset(&caps, 0, sizeof(Capabilities)); + } + + bool SetVibration(int player, float leftMotor, float rightMotor, float leftTrigger, float rightTrigger) noexcept + { + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if (player >= 0 && player < MAX_PLAYER_COUNT) + { + IGameInputDevice* device = mInputDevices[player].Get(); + if (device) + { + GameInputRumbleParams const params = + { + leftMotor, + rightMotor, + leftTrigger, + rightTrigger + }; + + device->SetRumbleState(¶ms); + return true; + } + } + + return false; + } + + void Suspend() noexcept + { + for (int player = 0; player < MAX_PLAYER_COUNT; ++player) + { + IGameInputDevice* device = mInputDevices[player].Get(); + if (device) + { + device->SetRumbleState(nullptr); + } + } + } + + void Resume() noexcept + { + for (int player = 0; player < MAX_PLAYER_COUNT; ++player) + { + IGameInputDevice* device = mInputDevices[player].Get(); + if (device) + { + if (!(device->GetDeviceStatus() & GameInputDeviceConnected)) + { + mInputDevices[player].Reset(); + } + } + } + } + + void GetDevice(int player, _Outptr_ IGameInputDevice** device) noexcept + { + if (!device) + return; + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + *device = nullptr; + + if (player >= 0 && player < MAX_PLAYER_COUNT) + { + IGameInputDevice* dev = mInputDevices[player].Get(); + if (dev) + { + dev->AddRef(); + *device = dev; + } + } + } + + GamePad* mOwner; + + static GamePad::Impl* s_gamePad; + + HANDLE mCtrlChanged; + +private: + ComPtr mGameInput; + ComPtr mInputDevices[MAX_PLAYER_COUNT]; + + GameInputCallbackToken mDeviceToken; + + int mMostRecentGamepad; + + static void CALLBACK OnGameInputDevice( + _In_ GameInputCallbackToken, + _In_ void * context, + _In_ IGameInputDevice * device, + _In_ uint64_t, + _In_ GameInputDeviceStatus currentStatus, + _In_ GameInputDeviceStatus) noexcept + { + auto impl = reinterpret_cast(context); + + if (currentStatus & GameInputDeviceConnected) + { + size_t empty = MAX_PLAYER_COUNT; + size_t k = 0; + for (; k < MAX_PLAYER_COUNT; ++k) + { + if (impl->mInputDevices[k].Get() == device) + { + impl->mMostRecentGamepad = static_cast(k); + break; + } + else if (!impl->mInputDevices[k]) + { + if (empty >= MAX_PLAYER_COUNT) + empty = k; + } + } + + if (k >= MAX_PLAYER_COUNT) + { + // Silently ignore "extra" gamepads as there's no hard limit + if (empty < MAX_PLAYER_COUNT) + { + impl->mInputDevices[empty] = device; + impl->mMostRecentGamepad = static_cast(empty); + } + } + } + else + { + for (size_t k = 0; k < MAX_PLAYER_COUNT; ++k) + { + if (impl->mInputDevices[k].Get() == device) + { + impl->mInputDevices[k].Reset(); + break; + } + } + } + + if (impl->mCtrlChanged != INVALID_HANDLE_VALUE) + { + SetEvent(impl->mCtrlChanged); + } + } +}; + +GamePad::Impl* GamePad::Impl::s_gamePad = nullptr; + + +#elif (_WIN32_WINNT >= _WIN32_WINNT_WIN10) && !defined(_GAMING_DESKTOP) + +//====================================================================================== +// Windows::Gaming::Input (Windows 10) +//====================================================================================== + +#pragma warning(push) +#pragma warning(disable : 4471 5204) +#include +#pragma warning(pop) + +class GamePad::Impl +{ +public: + Impl(GamePad* owner) : + mOwner(owner), + mCtrlChanged(INVALID_HANDLE_VALUE), + mUserChanged(INVALID_HANDLE_VALUE), + mMostRecentGamepad(0), + mStatics{}, + mGamePad{}, + mUserChangeToken{}, + mAddedToken{}, + mRemovedToken{}, + mChanged{} + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + + if (s_gamePad) + { + throw std::exception("GamePad is a singleton"); + } + + s_gamePad = this; + + mChanged.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mChanged) + { + throw std::exception("CreateEventEx"); + } + + ThrowIfFailed(GetActivationFactory(HStringReference(RuntimeClass_Windows_Gaming_Input_Gamepad).Get(), mStatics.GetAddressOf())); + + typedef __FIEventHandler_1_Windows__CGaming__CInput__CGamepad AddedHandler; + ThrowIfFailed(mStatics->add_GamepadAdded(Callback(GamepadAdded).Get(), &mAddedToken)); + + typedef __FIEventHandler_1_Windows__CGaming__CInput__CGamepad RemovedHandler; + ThrowIfFailed(mStatics->add_GamepadRemoved(Callback(GamepadRemoved).Get(), &mRemovedToken)); + + ScanGamePads(); + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + using namespace ABI::Windows::Gaming::Input; + + for (size_t j = 0; j < MAX_PLAYER_COUNT; ++j) + { + if (mGamePad[j]) + { + ComPtr ctrl; + HRESULT hr = mGamePad[j].As(&ctrl); + if (SUCCEEDED(hr) && ctrl) + { + (void)ctrl->remove_UserChanged(mUserChangeToken[j]); + mUserChangeToken[j].value = 0; + } + + mGamePad[j].Reset(); + } + } + + if (mStatics) + { + (void)mStatics->remove_GamepadAdded(mAddedToken); + mAddedToken.value = 0; + + (void)mStatics->remove_GamepadRemoved(mRemovedToken); + mRemovedToken.value = 0; + + mStatics.Reset(); + } + + s_gamePad = nullptr; + } + + void GetState(int player, _Out_ State& state, DeadZone deadZoneMode) + { + using namespace Microsoft::WRL; + using namespace ABI::Windows::Gaming::Input; + + if (WaitForSingleObjectEx(mChanged.get(), 0, FALSE) == WAIT_OBJECT_0) + { + ScanGamePads(); + } + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if ((player >= 0) && (player < MAX_PLAYER_COUNT)) + { + if (mGamePad[player]) + { + GamepadReading reading; + HRESULT hr = mGamePad[player]->GetCurrentReading(&reading); + if (SUCCEEDED(hr)) + { + state.connected = true; + state.packet = reading.Timestamp; + + state.buttons.a = (reading.Buttons & GamepadButtons::GamepadButtons_A) != 0; + state.buttons.b = (reading.Buttons & GamepadButtons::GamepadButtons_B) != 0; + state.buttons.x = (reading.Buttons & GamepadButtons::GamepadButtons_X) != 0; + state.buttons.y = (reading.Buttons & GamepadButtons::GamepadButtons_Y) != 0; + + state.buttons.leftStick = (reading.Buttons & GamepadButtons::GamepadButtons_LeftThumbstick) != 0; + state.buttons.rightStick = (reading.Buttons & GamepadButtons::GamepadButtons_RightThumbstick) != 0; + + state.buttons.leftShoulder = (reading.Buttons & GamepadButtons::GamepadButtons_LeftShoulder) != 0; + state.buttons.rightShoulder = (reading.Buttons & GamepadButtons::GamepadButtons_RightShoulder) != 0; + + state.buttons.back = (reading.Buttons & GamepadButtons::GamepadButtons_View) != 0; + state.buttons.start = (reading.Buttons & GamepadButtons::GamepadButtons_Menu) != 0; + + state.dpad.up = (reading.Buttons & GamepadButtons::GamepadButtons_DPadUp) != 0; + state.dpad.down = (reading.Buttons & GamepadButtons::GamepadButtons_DPadDown) != 0; + state.dpad.right = (reading.Buttons & GamepadButtons::GamepadButtons_DPadRight) != 0; + state.dpad.left = (reading.Buttons & GamepadButtons::GamepadButtons_DPadLeft) != 0; + + ApplyStickDeadZone(static_cast(reading.LeftThumbstickX), static_cast(reading.LeftThumbstickY), + deadZoneMode, 1.f, c_XboxOneThumbDeadZone, + state.thumbSticks.leftX, state.thumbSticks.leftY); + + ApplyStickDeadZone(static_cast(reading.RightThumbstickX), static_cast(reading.RightThumbstickY), + deadZoneMode, 1.f, c_XboxOneThumbDeadZone, + state.thumbSticks.rightX, state.thumbSticks.rightY); + + state.triggers.left = static_cast(reading.LeftTrigger); + state.triggers.right = static_cast(reading.RightTrigger); + + return; + } + } + } + + memset(&state, 0, sizeof(State)); + } + + void GetCapabilities(int player, Capabilities& caps) + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::System; + using namespace ABI::Windows::Gaming::Input; + + if (WaitForSingleObjectEx(mChanged.get(), 0, FALSE) == WAIT_OBJECT_0) + { + ScanGamePads(); + } + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if ((player >= 0) && (player < MAX_PLAYER_COUNT)) + { + if (mGamePad[player]) + { + caps.connected = true; + caps.gamepadType = Capabilities::GAMEPAD; + caps.id.clear(); + caps.vid = caps.pid = 0; + + ComPtr ctrl; + HRESULT hr = mGamePad[player].As(&ctrl); + if (SUCCEEDED(hr) && ctrl) + { + ComPtr user; + hr = ctrl->get_User(user.GetAddressOf()); + if (SUCCEEDED(hr) && user != nullptr) + { + HString str; + hr = user->get_NonRoamableId(str.GetAddressOf()); + if (SUCCEEDED(hr)) + { + caps.id = str.GetRawBuffer(nullptr); + } + } + + // Requires the Windows 10 Creators Update SDK (15063) + #if defined(NTDDI_WIN10_RS2) && (NTDDI_VERSION >= NTDDI_WIN10_RS2) + ComPtr rawStatics; + hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_Gaming_Input_RawGameController).Get(), rawStatics.GetAddressOf()); + if (SUCCEEDED(hr)) + { + ComPtr raw; + hr = rawStatics->FromGameController(ctrl.Get(), raw.GetAddressOf()); + if (SUCCEEDED(hr) && raw) + { + if (FAILED(raw->get_HardwareVendorId(&caps.vid))) + caps.vid = 0; + + if (FAILED(raw->get_HardwareProductId(&caps.pid))) + caps.pid = 0; + } + } + #endif // NTDDI_WIN10_RS2 + } + return; + } + } + + caps.id.clear(); + caps = {}; + } + + bool SetVibration(int player, float leftMotor, float rightMotor, float leftTrigger, float rightTrigger) noexcept + { + using namespace ABI::Windows::Gaming::Input; + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if ((player >= 0) && (player < MAX_PLAYER_COUNT)) + { + if (mGamePad[player]) + { + GamepadVibration vib; + vib.LeftMotor = double(leftMotor); + vib.RightMotor = double(rightMotor); + vib.LeftTrigger = double(leftTrigger); + vib.RightTrigger = double(rightTrigger); + HRESULT hr = mGamePad[player]->put_Vibration(vib); + + if (SUCCEEDED(hr)) + return true; + } + } + + return false; + } + + void Suspend() noexcept + { + for (size_t j = 0; j < MAX_PLAYER_COUNT; ++j) + { + mGamePad[j].Reset(); + } + } + + void Resume() noexcept + { + // Make sure we rescan gamepads + SetEvent(mChanged.get()); + } + + GamePad* mOwner; + + static GamePad::Impl* s_gamePad; + + HANDLE mCtrlChanged; + HANDLE mUserChanged; + +private: + int mMostRecentGamepad; + + void ScanGamePads() + { + using namespace Microsoft::WRL; + using namespace ABI::Windows::Foundation::Collections; + using namespace ABI::Windows::Gaming::Input; + + ComPtr> pads; + ThrowIfFailed(mStatics->get_Gamepads(pads.GetAddressOf())); + + unsigned int count = 0; + ThrowIfFailed(pads->get_Size(&count)); + + // Check for removed gamepads + for (size_t j = 0; j < MAX_PLAYER_COUNT; ++j) + { + if (mGamePad[j]) + { + unsigned int k = 0; + for (; k < count; ++k) + { + ComPtr pad; + HRESULT hr = pads->GetAt(k, pad.GetAddressOf()); + if (SUCCEEDED(hr) && (pad == mGamePad[j])) + { + break; + } + } + + if (k >= count) + { + ComPtr ctrl; + HRESULT hr = mGamePad[j].As(&ctrl); + if (SUCCEEDED(hr) && ctrl) + { + (void)ctrl->remove_UserChanged(mUserChangeToken[j]); + mUserChangeToken[j].value = 0; + } + + mGamePad[j].Reset(); + } + } + } + + // Check for added gamepads + for (unsigned int j = 0; j < count; ++j) + { + ComPtr pad; + HRESULT hr = pads->GetAt(j, pad.GetAddressOf()); + if (SUCCEEDED(hr)) + { + size_t empty = MAX_PLAYER_COUNT; + size_t k = 0; + for (; k < MAX_PLAYER_COUNT; ++k) + { + if (mGamePad[k] == pad) + { + if (j == (count - 1)) + mMostRecentGamepad = static_cast(k); + break; + } + else if (!mGamePad[k]) + { + if (empty >= MAX_PLAYER_COUNT) + empty = k; + } + } + + if (k >= MAX_PLAYER_COUNT) + { + // Silently ignore "extra" gamepads as there's no hard limit + if (empty < MAX_PLAYER_COUNT) + { + mGamePad[empty] = pad; + if (j == (count - 1)) + mMostRecentGamepad = static_cast(empty); + + ComPtr ctrl; + hr = pad.As(&ctrl); + if (SUCCEEDED(hr) && ctrl) + { + typedef __FITypedEventHandler_2_Windows__CGaming__CInput__CIGameController_Windows__CSystem__CUserChangedEventArgs UserHandler; + ThrowIfFailed(ctrl->add_UserChanged(Callback(UserChanged).Get(), &mUserChangeToken[empty])); + } + } + } + } + } + } + + ComPtr mStatics; + ComPtr mGamePad[MAX_PLAYER_COUNT]; + EventRegistrationToken mUserChangeToken[MAX_PLAYER_COUNT]; + + EventRegistrationToken mAddedToken; + EventRegistrationToken mRemovedToken; + + ScopedHandle mChanged; + + static HRESULT GamepadAdded(IInspectable *, ABI::Windows::Gaming::Input::IGamepad*) + { + if (s_gamePad) + { + SetEvent(s_gamePad->mChanged.get()); + + if (s_gamePad->mCtrlChanged != INVALID_HANDLE_VALUE) + { + SetEvent(s_gamePad->mCtrlChanged); + } + } + return S_OK; + } + + static HRESULT GamepadRemoved(IInspectable *, ABI::Windows::Gaming::Input::IGamepad*) + { + if (s_gamePad) + { + SetEvent(s_gamePad->mChanged.get()); + + if (s_gamePad->mCtrlChanged != INVALID_HANDLE_VALUE) + { + SetEvent(s_gamePad->mCtrlChanged); + } + } + return S_OK; + } + + static HRESULT UserChanged(ABI::Windows::Gaming::Input::IGameController*, ABI::Windows::System::IUserChangedEventArgs*) + { + if (s_gamePad) + { + if (s_gamePad->mUserChanged != INVALID_HANDLE_VALUE) + { + SetEvent(s_gamePad->mUserChanged); + } + } + return S_OK; + } +}; + +GamePad::Impl* GamePad::Impl::s_gamePad = nullptr; + + +#elif defined(_XBOX_ONE) + +//====================================================================================== +// Windows::Xbox::Input (Xbox One) +//====================================================================================== + +#include + +#include + +class GamePad::Impl +{ +public: + class GamepadAddedListener : public Microsoft::WRL::RuntimeClass, + ABI::Windows::Foundation::IEventHandler, + Microsoft::WRL::FtmBase> + { + public: + GamepadAddedListener(HANDLE event) : mEvent(event) {} + + STDMETHOD(Invoke)(_In_ IInspectable *, _In_ ABI::Windows::Xbox::Input::IGamepadAddedEventArgs *) override + { + SetEvent(mEvent); + + auto pad = GamePad::Impl::s_gamePad; + + if (pad && pad->mCtrlChanged != INVALID_HANDLE_VALUE) + { + SetEvent(pad->mCtrlChanged); + } + return S_OK; + } + + private: + HANDLE mEvent; + }; + + class GamepadRemovedListener : public Microsoft::WRL::RuntimeClass, + ABI::Windows::Foundation::IEventHandler, + Microsoft::WRL::FtmBase> + { + public: + GamepadRemovedListener(HANDLE event) : mEvent(event) {} + + STDMETHOD(Invoke)(_In_ IInspectable *, _In_ ABI::Windows::Xbox::Input::IGamepadRemovedEventArgs *) override + { + SetEvent(mEvent); + + auto pad = GamePad::Impl::s_gamePad; + + if (pad && pad->mCtrlChanged != INVALID_HANDLE_VALUE) + { + SetEvent(pad->mCtrlChanged); + } + return S_OK; + } + + private: + HANDLE mEvent; + }; + + class UserPairingListener : public Microsoft::WRL::RuntimeClass, + ABI::Windows::Foundation::IEventHandler, + Microsoft::WRL::FtmBase> + { + public: + UserPairingListener() noexcept {} + + STDMETHOD(Invoke)(_In_ IInspectable *, _In_ ABI::Windows::Xbox::Input::IControllerPairingChangedEventArgs *) override + { + auto pad = GamePad::Impl::s_gamePad; + + if (pad && pad->mUserChanged != INVALID_HANDLE_VALUE) + { + SetEvent(pad->mUserChanged); + } + return S_OK; + } + }; + + Impl(GamePad *owner) : + mOwner(owner), + mCtrlChanged(INVALID_HANDLE_VALUE), + mUserChanged(INVALID_HANDLE_VALUE), + mMostRecentGamepad(0), + mStatics{}, + mStaticsCtrl{}, + mGamePad{}, + mAddedToken{}, + mRemovedToken{}, + mUserParingToken{}, + mChanged{} + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + + if (s_gamePad) + { + throw std::exception("GamePad is a singleton"); + } + + s_gamePad = this; + + mChanged.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mChanged) + { + throw std::exception("CreateEventEx"); + } + + ThrowIfFailed(GetActivationFactory(HStringReference(RuntimeClass_Windows_Xbox_Input_Gamepad).Get(), mStatics.GetAddressOf())); + + ThrowIfFailed(GetActivationFactory(HStringReference(RuntimeClass_Windows_Xbox_Input_Controller).Get(), mStaticsCtrl.GetAddressOf())); + + ThrowIfFailed(mStatics->add_GamepadAdded(Make(mChanged.get()).Get(), &mAddedToken)); + + ThrowIfFailed(mStatics->add_GamepadRemoved(Make(mChanged.get()).Get(), &mRemovedToken)); + + ThrowIfFailed(mStaticsCtrl->add_ControllerPairingChanged(Make().Get(), &mUserParingToken)); + + ScanGamePads(); + } + + ~Impl() + { + if (mStatics) + { + (void)mStatics->remove_GamepadAdded(mAddedToken); + mAddedToken.value = 0; + + (void)mStatics->remove_GamepadRemoved(mRemovedToken); + mRemovedToken.value = 0; + + mStatics.Reset(); + } + + if (mStaticsCtrl) + { + (void)mStaticsCtrl->remove_ControllerPairingChanged(mUserParingToken); + mUserParingToken.value = 0; + + mStaticsCtrl.Reset(); + } + + s_gamePad = nullptr; + } + + void GetState(int player, _Out_ State& state, DeadZone deadZoneMode) + { + using namespace Microsoft::WRL; + using namespace ABI::Windows::Xbox::Input; + + if (WaitForSingleObjectEx(mChanged.get(), 0, FALSE) == WAIT_OBJECT_0) + { + ScanGamePads(); + } + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if ((player >= 0) && (player < MAX_PLAYER_COUNT)) + { + if (mGamePad[player]) + { + RawGamepadReading reading; + HRESULT hr = mGamePad[player]->GetRawCurrentReading(&reading); + if (SUCCEEDED(hr)) + { + state.connected = true; + state.packet = reading.Timestamp; + + state.buttons.a = (reading.Buttons & GamepadButtons::GamepadButtons_A) != 0; + state.buttons.b = (reading.Buttons & GamepadButtons::GamepadButtons_B) != 0; + state.buttons.x = (reading.Buttons & GamepadButtons::GamepadButtons_X) != 0; + state.buttons.y = (reading.Buttons & GamepadButtons::GamepadButtons_Y) != 0; + + state.buttons.leftStick = (reading.Buttons & GamepadButtons::GamepadButtons_LeftThumbstick) != 0; + state.buttons.rightStick = (reading.Buttons & GamepadButtons::GamepadButtons_RightThumbstick) != 0; + + state.buttons.leftShoulder = (reading.Buttons & GamepadButtons::GamepadButtons_LeftShoulder) != 0; + state.buttons.rightShoulder = (reading.Buttons & GamepadButtons::GamepadButtons_RightShoulder) != 0; + + state.buttons.back = (reading.Buttons & GamepadButtons::GamepadButtons_View) != 0; + state.buttons.start = (reading.Buttons & GamepadButtons::GamepadButtons_Menu) != 0; + + state.dpad.up = (reading.Buttons & GamepadButtons::GamepadButtons_DPadUp) != 0; + state.dpad.down = (reading.Buttons & GamepadButtons::GamepadButtons_DPadDown) != 0; + state.dpad.right = (reading.Buttons & GamepadButtons::GamepadButtons_DPadRight) != 0; + state.dpad.left = (reading.Buttons & GamepadButtons::GamepadButtons_DPadLeft) != 0; + + ApplyStickDeadZone(reading.LeftThumbstickX, reading.LeftThumbstickY, + deadZoneMode, 1.f, c_XboxOneThumbDeadZone, + state.thumbSticks.leftX, state.thumbSticks.leftY); + + ApplyStickDeadZone(reading.RightThumbstickX, reading.RightThumbstickY, + deadZoneMode, 1.f, c_XboxOneThumbDeadZone, + state.thumbSticks.rightX, state.thumbSticks.rightY); + + state.triggers.left = reading.LeftTrigger; + state.triggers.right = reading.RightTrigger; + + return; + } + } + } + + memset(&state, 0, sizeof(State)); + } + + void GetCapabilities(int player, _Out_ Capabilities& caps) + { + using namespace Microsoft::WRL; + using namespace ABI::Windows::Xbox::Input; + + if (WaitForSingleObjectEx(mChanged.get(), 0, FALSE) == WAIT_OBJECT_0) + { + ScanGamePads(); + } + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if ((player >= 0) && (player < MAX_PLAYER_COUNT)) + { + if (mGamePad[player]) + { + caps.connected = true; + caps.gamepadType = Capabilities::UNKNOWN; + caps.id = 0; + caps.vid = caps.pid = 0; + + ComPtr ctrl; + HRESULT hr = mGamePad[player].As(&ctrl); + if (SUCCEEDED(hr) && ctrl) + { + hr = ctrl->get_Id(&caps.id); + if (FAILED(hr)) + caps.id = 0; + + Wrappers::HString str; + hr = ctrl->get_Type(str.GetAddressOf()); + if (SUCCEEDED(hr)) + { + const wchar_t* typeStr = str.GetRawBuffer(nullptr); + if (_wcsicmp(typeStr, L"Windows.Xbox.Input.Gamepad") == 0) + { + caps.gamepadType = Capabilities::GAMEPAD; + } + else if (_wcsicmp(typeStr, L"Microsoft.Xbox.Input.ArcadeStick") == 0) + { + caps.gamepadType = Capabilities::ARCADE_STICK; + } + else if (_wcsicmp(typeStr, L"Microsoft.Xbox.Input.Wheel") == 0) + { + caps.gamepadType = Capabilities::WHEEL; + } + } + } + + #if _XDK_VER >= 0x42ED07E4 /* XDK Edition 180400 */ + ComPtr ctrl3; + hr = mGamePad[player].As(&ctrl3); + if (SUCCEEDED(hr) && ctrl3) + { + if (FAILED(ctrl3->get_HardwareVendorId(&caps.vid))) + caps.vid = 0; + + if (FAILED(ctrl3->get_HardwareProductId(&caps.pid))) + caps.pid = 0; + } + #endif + + return; + } + } + + memset(&caps, 0, sizeof(Capabilities)); + } + + bool SetVibration(int player, float leftMotor, float rightMotor, float leftTrigger, float rightTrigger) noexcept + { + using namespace ABI::Windows::Xbox::Input; + + if (player == c_MostRecent) + player = mMostRecentGamepad; + + if ((player >= 0) && (player < MAX_PLAYER_COUNT)) + { + if (mGamePad[player]) + { + HRESULT hr; + try + { + GamepadVibration vib; + vib.LeftMotorLevel = leftMotor; + vib.RightMotorLevel = rightMotor; + vib.LeftTriggerLevel = leftTrigger; + vib.RightTriggerLevel = rightTrigger; + hr = mGamePad[player]->SetVibration(vib); + } + catch (...) + { + // Handle case where gamepad might be invalid + hr = E_FAIL; + } + + if (SUCCEEDED(hr)) + return true; + } + } + + return false; + } + + void Suspend() noexcept + { + for (size_t j = 0; j < MAX_PLAYER_COUNT; ++j) + { + mGamePad[j].Reset(); + } + } + + void Resume() noexcept + { + // Make sure we rescan gamepads + SetEvent(mChanged.get()); + } + + GamePad* mOwner; + + static GamePad::Impl* s_gamePad; + + HANDLE mCtrlChanged; + HANDLE mUserChanged; + +private: + int mMostRecentGamepad; + + void ScanGamePads() + { + using namespace ABI::Windows::Foundation::Collections; + using namespace ABI::Windows::Xbox::Input; + + ComPtr> pads; + ThrowIfFailed(mStatics->get_Gamepads(pads.GetAddressOf())); + + unsigned int count = 0; + ThrowIfFailed(pads->get_Size(&count)); + + // Check for removed gamepads + for (size_t j = 0; j < MAX_PLAYER_COUNT; ++j) + { + if (mGamePad[j]) + { + unsigned int k = 0; + for (; k < count; ++k) + { + ComPtr pad; + HRESULT hr = pads->GetAt(k, pad.GetAddressOf()); + if (SUCCEEDED(hr) && (pad == mGamePad[j])) + { + break; + } + } + + if (k >= count) + { + mGamePad[j].Reset(); + } + } + } + + // Check for added gamepads + for (unsigned int j = 0; j < count; ++j) + { + ComPtr pad; + HRESULT hr = pads->GetAt(j, pad.GetAddressOf()); + if (SUCCEEDED(hr)) + { + size_t empty = MAX_PLAYER_COUNT; + size_t k = 0; + for (; k < MAX_PLAYER_COUNT; ++k) + { + if (mGamePad[k] == pad) + { + if (!j) + mMostRecentGamepad = static_cast(k); + break; + } + else if (!mGamePad[k]) + { + if (empty >= MAX_PLAYER_COUNT) + empty = k; + } + } + + if (k >= MAX_PLAYER_COUNT) + { + if (empty >= MAX_PLAYER_COUNT) + { + throw std::exception("Too many gamepads found"); + } + + mGamePad[empty] = pad; + if (!j) + mMostRecentGamepad = static_cast(empty); + } + } + } + } + + ComPtr mStatics; + ComPtr mStaticsCtrl; + ComPtr mGamePad[MAX_PLAYER_COUNT]; + + EventRegistrationToken mAddedToken; + EventRegistrationToken mRemovedToken; + EventRegistrationToken mUserParingToken; + + ScopedHandle mChanged; +}; + +GamePad::Impl* GamePad::Impl::s_gamePad = nullptr; + +#else + +//====================================================================================== +// XInput +//====================================================================================== + +#include + +static_assert(GamePad::MAX_PLAYER_COUNT == XUSER_MAX_COUNT, "xinput.h mismatch"); + +class GamePad::Impl +{ +public: + Impl(GamePad* owner) : + mOwner(owner), + mConnected{}, + mLastReadTime{} + #if (_WIN32_WINNT < _WIN32_WINNT_WIN8) + , mLeftMotor{} + , mRightMotor{} + , mSuspended(false) + #endif + { + for (int j = 0; j < XUSER_MAX_COUNT; ++j) + { + ClearSlot(j, 0); + } + + if (s_gamePad) + { + throw std::exception("GamePad is a singleton"); + } + + s_gamePad = this; + } + + ~Impl() + { + s_gamePad = nullptr; + } + + void GetState(int player, _Out_ State& state, DeadZone deadZoneMode) + { + if (player == c_MostRecent) + player = GetMostRecent(); + + ULONGLONG time = GetTickCount64(); + + if (!ThrottleRetry(player, time)) + { + #if (_WIN32_WINNT < _WIN32_WINNT_WIN8) + if (mSuspended) + { + memset(&state, 0, sizeof(State)); + state.connected = mConnected[player]; + return; + } + #endif + + XINPUT_STATE xstate; + DWORD result = XInputGetState(DWORD(player), &xstate); + if (result == ERROR_DEVICE_NOT_CONNECTED) + { + ClearSlot(player, time); + } + else + { + if (!mConnected[player]) + mLastReadTime[player] = time; + + mConnected[player] = true; + + state.connected = true; + state.packet = xstate.dwPacketNumber; + + WORD xbuttons = xstate.Gamepad.wButtons; + state.buttons.a = (xbuttons & XINPUT_GAMEPAD_A) != 0; + state.buttons.b = (xbuttons & XINPUT_GAMEPAD_B) != 0; + state.buttons.x = (xbuttons & XINPUT_GAMEPAD_X) != 0; + state.buttons.y = (xbuttons & XINPUT_GAMEPAD_Y) != 0; + state.buttons.leftStick = (xbuttons & XINPUT_GAMEPAD_LEFT_THUMB) != 0; + state.buttons.rightStick = (xbuttons & XINPUT_GAMEPAD_RIGHT_THUMB) != 0; + state.buttons.leftShoulder = (xbuttons & XINPUT_GAMEPAD_LEFT_SHOULDER) != 0; + state.buttons.rightShoulder = (xbuttons & XINPUT_GAMEPAD_RIGHT_SHOULDER) != 0; + state.buttons.back = (xbuttons & XINPUT_GAMEPAD_BACK) != 0; + state.buttons.start = (xbuttons & XINPUT_GAMEPAD_START) != 0; + + state.dpad.up = (xbuttons & XINPUT_GAMEPAD_DPAD_UP) != 0; + state.dpad.down = (xbuttons & XINPUT_GAMEPAD_DPAD_DOWN) != 0; + state.dpad.right = (xbuttons & XINPUT_GAMEPAD_DPAD_RIGHT) != 0; + state.dpad.left = (xbuttons & XINPUT_GAMEPAD_DPAD_LEFT) != 0; + + if (deadZoneMode == DEAD_ZONE_NONE) + { + state.triggers.left = ApplyLinearDeadZone(float(xstate.Gamepad.bLeftTrigger), 255.f, 0.f); + state.triggers.right = ApplyLinearDeadZone(float(xstate.Gamepad.bRightTrigger), 255.f, 0.f); + } + else + { + state.triggers.left = ApplyLinearDeadZone(float(xstate.Gamepad.bLeftTrigger), 255.f, float(XINPUT_GAMEPAD_TRIGGER_THRESHOLD)); + state.triggers.right = ApplyLinearDeadZone(float(xstate.Gamepad.bRightTrigger), 255.f, float(XINPUT_GAMEPAD_TRIGGER_THRESHOLD)); + } + + ApplyStickDeadZone(float(xstate.Gamepad.sThumbLX), float(xstate.Gamepad.sThumbLY), + deadZoneMode, 32767.f, float(XINPUT_GAMEPAD_LEFT_THUMB_DEADZONE), + state.thumbSticks.leftX, state.thumbSticks.leftY); + + ApplyStickDeadZone(float(xstate.Gamepad.sThumbRX), float(xstate.Gamepad.sThumbRY), + deadZoneMode, 32767.f, float(XINPUT_GAMEPAD_RIGHT_THUMB_DEADZONE), + state.thumbSticks.rightX, state.thumbSticks.rightY); + + return; + } + } + + memset(&state, 0, sizeof(State)); + } + + void GetCapabilities(int player, _Out_ Capabilities& caps) + { + if (player == c_MostRecent) + player = GetMostRecent(); + + ULONGLONG time = GetTickCount64(); + + if (!ThrottleRetry(player, time)) + { + XINPUT_CAPABILITIES xcaps; + DWORD result = XInputGetCapabilities(DWORD(player), 0, &xcaps); + if (result == ERROR_DEVICE_NOT_CONNECTED) + { + ClearSlot(player, time); + } + else + { + if (!mConnected[player]) + mLastReadTime[player] = time; + + mConnected[player] = true; + + caps.connected = true; + caps.id = uint64_t(player); + if (xcaps.Type == XINPUT_DEVTYPE_GAMEPAD) + { + static_assert(Capabilities::GAMEPAD == XINPUT_DEVSUBTYPE_GAMEPAD, "xinput.h mismatch"); + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + static_assert(XINPUT_DEVSUBTYPE_WHEEL == Capabilities::WHEEL, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_ARCADE_STICK == Capabilities::ARCADE_STICK, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_FLIGHT_STICK == Capabilities::FLIGHT_STICK, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_DANCE_PAD == Capabilities::DANCE_PAD, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_GUITAR == Capabilities::GUITAR, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_GUITAR_ALTERNATE == Capabilities::GUITAR_ALTERNATE, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_DRUM_KIT == Capabilities::DRUM_KIT, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_GUITAR_BASS == Capabilities::GUITAR_BASS, "xinput.h mismatch"); + static_assert(XINPUT_DEVSUBTYPE_ARCADE_PAD == Capabilities::ARCADE_PAD, "xinput.h mismatch"); + #endif + + caps.gamepadType = Capabilities::Type(xcaps.SubType); + } + + // Hard-coded VID/PID + caps.vid = 0x045E; + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + caps.pid = (xcaps.Flags & XINPUT_CAPS_WIRELESS) ? 0x0719 : 0; + #else + caps.pid = 0; + #endif + + return; + } + } + + memset(&caps, 0, sizeof(Capabilities)); + } + + bool SetVibration(int player, float leftMotor, float rightMotor, float leftTrigger, float rightTrigger) noexcept + { + if (player == c_MostRecent) + player = GetMostRecent(); + + ULONGLONG time = GetTickCount64(); + + if (ThrottleRetry(player, time)) + { + return false; + } + + // XInput does not provide a way to set the left/right trigger impulse motors on the Xbox One Controller, + // and these motors are not present on the Xbox 360 Common Controller + UNREFERENCED_PARAMETER(leftTrigger); + UNREFERENCED_PARAMETER(rightTrigger); + + #if (_WIN32_WINNT < _WIN32_WINNT_WIN8) + mLeftMotor[player] = leftMotor; + mRightMotor[player] = rightMotor; + + if (mSuspended) + return mConnected[player]; + #endif + + XINPUT_VIBRATION xvibration; + xvibration.wLeftMotorSpeed = WORD(leftMotor * 0xFFFF); + xvibration.wRightMotorSpeed = WORD(rightMotor * 0xFFFF); + DWORD result = XInputSetState(DWORD(player), &xvibration); + if (result == ERROR_DEVICE_NOT_CONNECTED) + { + ClearSlot(player, time); + return false; + } + else + { + if (!mConnected[player]) + mLastReadTime[player] = time; + + mConnected[player] = true; + return (result == ERROR_SUCCESS); + } + } + + void Suspend() noexcept + { + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN10) + // XInput focus is handled automatically on Windows 10 + #elif (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + XInputEnable(FALSE); + #else + // For XInput 9.1.0, we have to emulate the behavior of XInputEnable( FALSE ) + if (!mSuspended) + { + for (size_t j = 0; j < XUSER_MAX_COUNT; ++j) + { + if (mConnected[j]) + { + XINPUT_VIBRATION xvibration; + xvibration.wLeftMotorSpeed = xvibration.wRightMotorSpeed = 0; + (void)XInputSetState(DWORD(j), &xvibration); + } + } + + mSuspended = true; + } + #endif + } + + void Resume() noexcept + { + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN10) + // XInput focus is handled automatically on Windows 10 + #elif (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + XInputEnable(TRUE); + #else + // For XInput 9.1.0, we have to emulate the behavior of XInputEnable( TRUE ) + if (mSuspended) + { + ULONGLONG time = GetTickCount64(); + + for (int j = 0; j < XUSER_MAX_COUNT; ++j) + { + if (mConnected[j]) + { + XINPUT_VIBRATION xvibration; + xvibration.wLeftMotorSpeed = WORD(mLeftMotor[j] * 0xFFFF); + xvibration.wRightMotorSpeed = WORD(mRightMotor[j] * 0xFFFF); + DWORD result = XInputSetState(DWORD(j), &xvibration); + if (result == ERROR_DEVICE_NOT_CONNECTED) + { + ClearSlot(j, time); + } + } + } + + mSuspended = false; + } + #endif + } + + GamePad* mOwner; + + static GamePad::Impl* s_gamePad; + +private: + bool mConnected[XUSER_MAX_COUNT]; + ULONGLONG mLastReadTime[XUSER_MAX_COUNT]; + +#if (_WIN32_WINNT < _WIN32_WINNT_WIN8) + // Variables for emulating XInputEnable on XInput 9.1.0 + float mLeftMotor[XUSER_MAX_COUNT]; + float mRightMotor[XUSER_MAX_COUNT]; + bool mSuspended; +#endif + + bool ThrottleRetry(int player, ULONGLONG time) + { + // This function minimizes a potential performance issue with XInput on Windows when + // checking a disconnected controller slot which requires device enumeration. + // This throttling keeps checks for newly connected gamepads to about once a second + + if ((player < 0) || (player >= XUSER_MAX_COUNT)) + return true; + + if (mConnected[player]) + return false; + + for (int j = 0; j < XUSER_MAX_COUNT; ++j) + { + if (!mConnected[j]) + { + LONGLONG delta = LONGLONG(time) - LONGLONG(mLastReadTime[j]); + + LONGLONG interval = 1000; + if (j != player) + interval /= 4; + + if ((delta >= 0) && (delta < interval)) + return true; + } + } + + return false; + } + + void ClearSlot(int player, ULONGLONG time) + { + mConnected[player] = false; + mLastReadTime[player] = time; + #if (_WIN32_WINNT < _WIN32_WINNT_WIN8) + mLeftMotor[player] = mRightMotor[player] = 0.f; + #endif + } + + int GetMostRecent() + { + int player = -1; + ULONGLONG time = 0; + + for (size_t j = 0; j < XUSER_MAX_COUNT; ++j) + { + if (mConnected[j] && (mLastReadTime[j] > time)) + { + time = mLastReadTime[j]; + player = static_cast(j); + } + } + + return player; + } +}; + +GamePad::Impl* GamePad::Impl::s_gamePad = nullptr; + +#endif + +#pragma warning( disable : 4355 ) + +// Public constructor. +GamePad::GamePad() noexcept(false) + : pImpl(std::make_unique(this)) +{ +} + + +// Move constructor. +GamePad::GamePad(GamePad&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ + pImpl->mOwner = this; +} + + +// Move assignment. +GamePad& GamePad::operator= (GamePad&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + pImpl->mOwner = this; + return *this; +} + + +// Public destructor. +GamePad::~GamePad() +{ +} + + +GamePad::State GamePad::GetState(int player, DeadZone deadZoneMode) +{ + State state; + pImpl->GetState(player, state, deadZoneMode); + return state; +} + + +GamePad::Capabilities GamePad::GetCapabilities(int player) +{ + Capabilities caps; + pImpl->GetCapabilities(player, caps); + return caps; +} + + +bool GamePad::SetVibration(int player, float leftMotor, float rightMotor, float leftTrigger, float rightTrigger) noexcept +{ + return pImpl->SetVibration(player, leftMotor, rightMotor, leftTrigger, rightTrigger); +} + + +void GamePad::Suspend() noexcept +{ + pImpl->Suspend(); +} + + +void GamePad::Resume() noexcept +{ + pImpl->Resume(); +} + + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) +void GamePad::RegisterEvents(HANDLE ctrlChanged) noexcept +{ + pImpl->mCtrlChanged = (!ctrlChanged) ? INVALID_HANDLE_VALUE : ctrlChanged; +} + +void GamePad::GetDevice(int player, _Outptr_ IGameInputDevice** device) noexcept +{ + pImpl->GetDevice(player, device); +} +#elif ((_WIN32_WINNT >= _WIN32_WINNT_WIN10) && !defined(_GAMING_DESKTOP)) || defined(_XBOX_ONE) +void GamePad::RegisterEvents(HANDLE ctrlChanged, HANDLE userChanged) noexcept +{ + pImpl->mCtrlChanged = (!ctrlChanged) ? INVALID_HANDLE_VALUE : ctrlChanged; + pImpl->mUserChanged = (!userChanged) ? INVALID_HANDLE_VALUE : userChanged; +} +#endif + + +GamePad& GamePad::Get() +{ + if (!Impl::s_gamePad || !Impl::s_gamePad->mOwner) + throw std::exception("GamePad is a singleton"); + + return *Impl::s_gamePad->mOwner; +} + + + +//====================================================================================== +// ButtonStateTracker +//====================================================================================== + +#define UPDATE_BUTTON_STATE(field) field = static_cast( ( !!state.buttons.field ) | ( ( !!state.buttons.field ^ !!lastState.buttons.field ) << 1 ) ); + +void GamePad::ButtonStateTracker::Update(const GamePad::State& state) noexcept +{ + UPDATE_BUTTON_STATE(a) + + assert((!state.buttons.a && !lastState.buttons.a) == (a == UP)); + assert((state.buttons.a && lastState.buttons.a) == (a == HELD)); + assert((!state.buttons.a && lastState.buttons.a) == (a == RELEASED)); + assert((state.buttons.a && !lastState.buttons.a) == (a == PRESSED)); + + UPDATE_BUTTON_STATE(b) + UPDATE_BUTTON_STATE(x) + UPDATE_BUTTON_STATE(y) + + UPDATE_BUTTON_STATE(leftStick) + UPDATE_BUTTON_STATE(rightStick) + + UPDATE_BUTTON_STATE(leftShoulder) + UPDATE_BUTTON_STATE(rightShoulder) + + UPDATE_BUTTON_STATE(back) + UPDATE_BUTTON_STATE(start) + + dpadUp = static_cast((!!state.dpad.up) | ((!!state.dpad.up ^ !!lastState.dpad.up) << 1)); + dpadDown = static_cast((!!state.dpad.down) | ((!!state.dpad.down ^ !!lastState.dpad.down) << 1)); + dpadLeft = static_cast((!!state.dpad.left) | ((!!state.dpad.left ^ !!lastState.dpad.left) << 1)); + dpadRight = static_cast((!!state.dpad.right) | ((!!state.dpad.right ^ !!lastState.dpad.right) << 1)); + + assert((!state.dpad.up && !lastState.dpad.up) == (dpadUp == UP)); + assert((state.dpad.up && lastState.dpad.up) == (dpadUp == HELD)); + assert((!state.dpad.up && lastState.dpad.up) == (dpadUp == RELEASED)); + assert((state.dpad.up && !lastState.dpad.up) == (dpadUp == PRESSED)); + + // Handle 'threshold' tests which emulate buttons + + bool threshold = state.IsLeftThumbStickUp(); + leftStickUp = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsLeftThumbStickUp()) << 1)); + + threshold = state.IsLeftThumbStickDown(); + leftStickDown = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsLeftThumbStickDown()) << 1)); + + threshold = state.IsLeftThumbStickLeft(); + leftStickLeft = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsLeftThumbStickLeft()) << 1)); + + threshold = state.IsLeftThumbStickRight(); + leftStickRight = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsLeftThumbStickRight()) << 1)); + + threshold = state.IsRightThumbStickUp(); + rightStickUp = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsRightThumbStickUp()) << 1)); + + threshold = state.IsRightThumbStickDown(); + rightStickDown = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsRightThumbStickDown()) << 1)); + + threshold = state.IsRightThumbStickLeft(); + rightStickLeft = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsRightThumbStickLeft()) << 1)); + + threshold = state.IsRightThumbStickRight(); + rightStickRight = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsRightThumbStickRight()) << 1)); + + threshold = state.IsLeftTriggerPressed(); + leftTrigger = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsLeftTriggerPressed()) << 1)); + + threshold = state.IsRightTriggerPressed(); + rightTrigger = static_cast((!!threshold) | ((!!threshold ^ !!lastState.IsRightTriggerPressed()) << 1)); + + lastState = state; +} + +#undef UPDATE_BUTTON_STATE + + +void GamePad::ButtonStateTracker::Reset() noexcept +{ + memset(this, 0, sizeof(ButtonStateTracker)); +} diff --git a/Sdk/External/DirectXTK/Src/GeometricPrimitive.cpp b/Sdk/External/DirectXTK/Src/GeometricPrimitive.cpp new file mode 100644 index 0000000..692b189 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/GeometricPrimitive.cpp @@ -0,0 +1,773 @@ +//-------------------------------------------------------------------------------------- +// File: GeometricPrimitive.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "GeometricPrimitive.h" +#include "BufferHelpers.h" +#include "CommonStates.h" +#include "DirectXHelpers.h" +#include "Effects.h" +#include "Geometry.h" +#include "SharedResourcePool.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +// Internal GeometricPrimitive implementation class. +class GeometricPrimitive::Impl +{ +public: + Impl() noexcept : mIndexCount(0) {} + + void Initialize(_In_ ID3D11DeviceContext* deviceContext, const VertexCollection& vertices, const IndexCollection& indices); + + void XM_CALLCONV Draw(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection, + FXMVECTOR color, + _In_opt_ ID3D11ShaderResourceView* texture, + bool wireframe, + std::function& setCustomState) const; + + void Draw(_In_ IEffect* effect, + _In_ ID3D11InputLayout* inputLayout, + bool alpha, bool wireframe, + std::function& setCustomState) const; + + void CreateInputLayout(_In_ IEffect* effect, _Outptr_ ID3D11InputLayout** inputLayout) const; + +private: + ComPtr mVertexBuffer; + ComPtr mIndexBuffer; + + UINT mIndexCount; + + // Only one of these helpers is allocated per D3D device context, even if there are multiple GeometricPrimitive instances. + class SharedResources + { + public: + SharedResources(_In_ ID3D11DeviceContext* deviceContext); + + void PrepareForRendering(bool alpha, bool wireframe) const; + + ComPtr mDeviceContext; + std::unique_ptr effect; + + ComPtr inputLayoutTextured; + ComPtr inputLayoutUntextured; + + std::unique_ptr stateObjects; + }; + + + // Per-device-context data. + std::shared_ptr mResources; + + static SharedResourcePool sharedResourcesPool; +}; + + +// Global pool of per-device-context GeometricPrimitive resources. +SharedResourcePool GeometricPrimitive::Impl::sharedResourcesPool; + + +// Per-device-context constructor. +GeometricPrimitive::Impl::SharedResources::SharedResources(_In_ ID3D11DeviceContext* deviceContext) + : mDeviceContext(deviceContext) +{ + ComPtr device; + deviceContext->GetDevice(&device); + + // Create the BasicEffect. + effect = std::make_unique(device.Get()); + + effect->EnableDefaultLighting(); + + // Create state objects. + stateObjects = std::make_unique(device.Get()); + + // Create input layouts. + effect->SetTextureEnabled(true); + ThrowIfFailed( + CreateInputLayoutFromEffect(device.Get(), effect.get(), &inputLayoutTextured) + ); + + SetDebugObjectName(inputLayoutTextured.Get(), "DirectXTK:GeometricPrimitive"); + + effect->SetTextureEnabled(false); + ThrowIfFailed( + CreateInputLayoutFromEffect(device.Get(), effect.get(), &inputLayoutUntextured) + ); + + SetDebugObjectName(inputLayoutUntextured.Get(), "DirectXTK:GeometricPrimitive"); +} + + +// Sets up D3D device state ready for drawing a primitive. +void GeometricPrimitive::Impl::SharedResources::PrepareForRendering(bool alpha, bool wireframe) const +{ + // Set the blend and depth stencil state. + ID3D11BlendState* blendState; + ID3D11DepthStencilState* depthStencilState; + + if (alpha) + { + // Alpha blended rendering. + blendState = stateObjects->AlphaBlend(); + depthStencilState = stateObjects->DepthRead(); + } + else + { + // Opaque rendering. + blendState = stateObjects->Opaque(); + depthStencilState = stateObjects->DepthDefault(); + } + + mDeviceContext->OMSetBlendState(blendState, nullptr, 0xFFFFFFFF); + mDeviceContext->OMSetDepthStencilState(depthStencilState, 0); + + // Set the rasterizer state. + if (wireframe) + mDeviceContext->RSSetState(stateObjects->Wireframe()); + else + mDeviceContext->RSSetState(stateObjects->CullCounterClockwise()); + + ID3D11SamplerState* samplerState = stateObjects->LinearWrap(); + + mDeviceContext->PSSetSamplers(0, 1, &samplerState); +} + + +// Initializes a geometric primitive instance that will draw the specified vertex and index data. +_Use_decl_annotations_ +void GeometricPrimitive::Impl::Initialize(ID3D11DeviceContext* deviceContext, const VertexCollection& vertices, const IndexCollection& indices) +{ + if (vertices.size() >= USHRT_MAX) + throw std::exception("Too many vertices for 16-bit index buffer"); + + if (indices.size() > UINT32_MAX) + throw std::exception("Too many indices"); + + mResources = sharedResourcesPool.DemandCreate(deviceContext); + + ComPtr device; + deviceContext->GetDevice(&device); + + ThrowIfFailed( + CreateStaticBuffer(device.Get(), vertices, D3D11_BIND_VERTEX_BUFFER, mVertexBuffer.ReleaseAndGetAddressOf()) + ); + + ThrowIfFailed( + CreateStaticBuffer(device.Get(), indices, D3D11_BIND_INDEX_BUFFER, mIndexBuffer.ReleaseAndGetAddressOf()) + ); + + SetDebugObjectName(mVertexBuffer.Get(), "DirectXTK:GeometricPrimitive"); + SetDebugObjectName(mIndexBuffer.Get(), "DirectXTK:GeometricPrimitive"); + + mIndexCount = static_cast(indices.size()); +} + + +// Draws the primitive. +_Use_decl_annotations_ +void XM_CALLCONV GeometricPrimitive::Impl::Draw( + FXMMATRIX world, + CXMMATRIX view, + CXMMATRIX projection, + FXMVECTOR color, + ID3D11ShaderResourceView* texture, + bool wireframe, + std::function& setCustomState) const +{ + assert(mResources); + auto effect = mResources->effect.get(); + assert(effect != nullptr); + + ID3D11InputLayout *inputLayout; + if (texture) + { + effect->SetTextureEnabled(true); + effect->SetTexture(texture); + + inputLayout = mResources->inputLayoutTextured.Get(); + } + else + { + effect->SetTextureEnabled(false); + + inputLayout = mResources->inputLayoutUntextured.Get(); + } + + // Set effect parameters. + effect->SetMatrices(world, view, projection); + + effect->SetColorAndAlpha(color); + + float alpha = XMVectorGetW(color); + Draw(effect, inputLayout, (alpha < 1.f), wireframe, setCustomState); +} + + +// Draw the primitive using a custom effect. +_Use_decl_annotations_ +void GeometricPrimitive::Impl::Draw( + IEffect* effect, + ID3D11InputLayout* inputLayout, + bool alpha, + bool wireframe, + std::function& setCustomState) const +{ + assert(mResources); + auto deviceContext = mResources->mDeviceContext.Get(); + assert(deviceContext != nullptr); + + // Set state objects. + mResources->PrepareForRendering(alpha, wireframe); + + // Set input layout. + assert(inputLayout != nullptr); + deviceContext->IASetInputLayout(inputLayout); + + // Activate our shaders, constant buffers, texture, etc. + assert(effect != nullptr); + effect->Apply(deviceContext); + + // Set the vertex and index buffer. + auto vertexBuffer = mVertexBuffer.Get(); + UINT vertexStride = sizeof(VertexType); + UINT vertexOffset = 0; + + deviceContext->IASetVertexBuffers(0, 1, &vertexBuffer, &vertexStride, &vertexOffset); + + deviceContext->IASetIndexBuffer(mIndexBuffer.Get(), DXGI_FORMAT_R16_UINT, 0); + + // Hook lets the caller replace our shaders or state settings with whatever else they see fit. + if (setCustomState) + { + setCustomState(); + } + + // Draw the primitive. + deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + deviceContext->DrawIndexed(mIndexCount, 0, 0); +} + + +// Create input layout for drawing with a custom effect. +_Use_decl_annotations_ +void GeometricPrimitive::Impl::CreateInputLayout(IEffect* effect, ID3D11InputLayout** inputLayout) const +{ + assert(effect != nullptr); + assert(inputLayout != nullptr); + + assert(mResources); + auto deviceContext = mResources->mDeviceContext.Get(); + assert(deviceContext != nullptr); + + ComPtr device; + deviceContext->GetDevice(&device); + + ThrowIfFailed( + CreateInputLayoutFromEffect(device.Get(), effect, inputLayout) + ); + + assert(inputLayout != nullptr && *inputLayout != nullptr); + _Analysis_assume_(inputLayout != nullptr && *inputLayout != nullptr); + + SetDebugObjectName(*inputLayout, "DirectXTK:GeometricPrimitive"); +} + + +//-------------------------------------------------------------------------------------- +// GeometricPrimitive +//-------------------------------------------------------------------------------------- + +// Constructor. +GeometricPrimitive::GeometricPrimitive() noexcept(false) + : pImpl(std::make_unique()) +{ +} + + +// Destructor. +GeometricPrimitive::~GeometricPrimitive() +{ +} + + +// Public entrypoints. +_Use_decl_annotations_ +void XM_CALLCONV GeometricPrimitive::Draw( + FXMMATRIX world, + CXMMATRIX view, + CXMMATRIX projection, + FXMVECTOR color, + ID3D11ShaderResourceView* texture, + bool wireframe, + std::function setCustomState) const +{ + pImpl->Draw(world, view, projection, color, texture, wireframe, setCustomState); +} + + +_Use_decl_annotations_ +void GeometricPrimitive::Draw( + IEffect* effect, + ID3D11InputLayout* inputLayout, + bool alpha, + bool wireframe, + std::function setCustomState) const +{ + pImpl->Draw(effect, inputLayout, alpha, wireframe, setCustomState); +} + + +_Use_decl_annotations_ +void GeometricPrimitive::CreateInputLayout(IEffect* effect, ID3D11InputLayout** inputLayout) const +{ + pImpl->CreateInputLayout(effect, inputLayout); +} + + +//-------------------------------------------------------------------------------------- +// Cube (aka a Hexahedron) or Box +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateCube( + ID3D11DeviceContext* deviceContext, + float size, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeBox(vertices, indices, XMFLOAT3(size, size, size), rhcoords, false); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateCube( + std::vector& vertices, + std::vector& indices, + float size, + bool rhcoords) +{ + ComputeBox(vertices, indices, XMFLOAT3(size, size, size), rhcoords, false); +} + + +// Creates a box primitive. +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateBox( + ID3D11DeviceContext* deviceContext, + const XMFLOAT3& size, + bool rhcoords, + bool invertn) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeBox(vertices, indices, size, rhcoords, invertn); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateBox( + std::vector& vertices, + std::vector& indices, + const XMFLOAT3& size, + bool rhcoords, + bool invertn) +{ + ComputeBox(vertices, indices, size, rhcoords, invertn); +} + + +//-------------------------------------------------------------------------------------- +// Sphere +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateSphere( + ID3D11DeviceContext* deviceContext, + float diameter, + size_t tessellation, + bool rhcoords, + bool invertn) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeSphere(vertices, indices, diameter, tessellation, rhcoords, invertn); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateSphere( + std::vector& vertices, + std::vector& indices, + float diameter, + size_t tessellation, + bool rhcoords, + bool invertn) +{ + ComputeSphere(vertices, indices, diameter, tessellation, rhcoords, invertn); +} + + +//-------------------------------------------------------------------------------------- +// Geodesic sphere +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateGeoSphere( + ID3D11DeviceContext* deviceContext, + float diameter, + size_t tessellation, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeGeoSphere(vertices, indices, diameter, tessellation, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateGeoSphere( + std::vector& vertices, + std::vector& indices, + float diameter, + size_t tessellation, bool rhcoords) +{ + ComputeGeoSphere(vertices, indices, diameter, tessellation, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Cylinder / Cone +//-------------------------------------------------------------------------------------- + +// Creates a cylinder primitive. +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateCylinder( + ID3D11DeviceContext* deviceContext, + float height, + float diameter, + size_t tessellation, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeCylinder(vertices, indices, height, diameter, tessellation, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateCylinder( + std::vector& vertices, + std::vector& indices, + float height, + float diameter, + size_t tessellation, + bool rhcoords) +{ + ComputeCylinder(vertices, indices, height, diameter, tessellation, rhcoords); +} + + +// Creates a cone primitive. +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateCone( + ID3D11DeviceContext* deviceContext, + float diameter, + float height, + size_t tessellation, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeCone(vertices, indices, diameter, height, tessellation, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateCone( + std::vector& vertices, + std::vector& indices, + float diameter, + float height, + size_t tessellation, + bool rhcoords) +{ + ComputeCone(vertices, indices, diameter, height, tessellation, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Torus +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateTorus( + ID3D11DeviceContext* deviceContext, + float diameter, + float thickness, + size_t tessellation, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeTorus(vertices, indices, diameter, thickness, tessellation, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateTorus( + std::vector& vertices, + std::vector& indices, + float diameter, + float thickness, + size_t tessellation, + bool rhcoords) +{ + ComputeTorus(vertices, indices, diameter, thickness, tessellation, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Tetrahedron +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateTetrahedron( + ID3D11DeviceContext* deviceContext, + float size, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeTetrahedron(vertices, indices, size, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateTetrahedron( + std::vector& vertices, + std::vector& indices, + float size, + bool rhcoords) +{ + ComputeTetrahedron(vertices, indices, size, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Octahedron +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateOctahedron( + ID3D11DeviceContext* deviceContext, + float size, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeOctahedron(vertices, indices, size, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateOctahedron( + std::vector& vertices, + std::vector& indices, + float size, + bool rhcoords) +{ + ComputeOctahedron(vertices, indices, size, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Dodecahedron +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateDodecahedron( + ID3D11DeviceContext* deviceContext, + float size, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeDodecahedron(vertices, indices, size, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateDodecahedron( + std::vector& vertices, + std::vector& indices, + float size, + bool rhcoords) +{ + ComputeDodecahedron(vertices, indices, size, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Icosahedron +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateIcosahedron( + ID3D11DeviceContext* deviceContext, + float size, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeIcosahedron(vertices, indices, size, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateIcosahedron( + std::vector& vertices, + std::vector& indices, + float size, + bool rhcoords) +{ + ComputeIcosahedron(vertices, indices, size, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Teapot +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateTeapot( + ID3D11DeviceContext* deviceContext, + float size, + size_t tessellation, + bool rhcoords) +{ + VertexCollection vertices; + IndexCollection indices; + ComputeTeapot(vertices, indices, size, tessellation, rhcoords); + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} + +void GeometricPrimitive::CreateTeapot( + std::vector& vertices, + std::vector& indices, + float size, + size_t tessellation, + bool rhcoords) +{ + ComputeTeapot(vertices, indices, size, tessellation, rhcoords); +} + + +//-------------------------------------------------------------------------------------- +// Custom +//-------------------------------------------------------------------------------------- + +_Use_decl_annotations_ +std::unique_ptr GeometricPrimitive::CreateCustom( + ID3D11DeviceContext* deviceContext, + const std::vector& vertices, + const std::vector& indices) +{ + // Extra validation + if (vertices.empty() || indices.empty()) + throw std::exception("Requires both vertices and indices"); + + if (indices.size() % 3) + throw std::exception("Expected triangular faces"); + + size_t nVerts = vertices.size(); + if (nVerts >= USHRT_MAX) + throw std::exception("Too many vertices for 16-bit index buffer"); + + for (auto it = indices.cbegin(); it != indices.cend(); ++it) + { + if (*it >= nVerts) + { + throw std::exception("Index not in vertices list"); + } + } + + // Create the primitive object. + std::unique_ptr primitive(new GeometricPrimitive()); + + primitive->pImpl->Initialize(deviceContext, vertices, indices); + + return primitive; +} diff --git a/Sdk/External/DirectXTK/Src/Geometry.cpp b/Sdk/External/DirectXTK/Src/Geometry.cpp new file mode 100644 index 0000000..e41bbc6 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Geometry.cpp @@ -0,0 +1,1195 @@ +//-------------------------------------------------------------------------------------- +// File: Geometry.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Geometry.h" +#include "Bezier.h" + +using namespace DirectX; + +namespace +{ + constexpr float SQRT2 = 1.41421356237309504880f; + constexpr float SQRT3 = 1.73205080756887729352f; + constexpr float SQRT6 = 2.44948974278317809820f; + + inline void CheckIndexOverflow(size_t value) + { + // Use >=, not > comparison, because some D3D level 9_x hardware does not support 0xFFFF index values. + if (value >= USHRT_MAX) + throw std::exception("Index value out of range: cannot tesselate primitive so finely"); + } + + + // Collection types used when generating the geometry. + inline void index_push_back(IndexCollection& indices, size_t value) + { + CheckIndexOverflow(value); + indices.push_back(static_cast(value)); + } + + + // Helper for flipping winding of geometric primitives for LH vs. RH coords + inline void ReverseWinding(IndexCollection& indices, VertexCollection& vertices) + { + assert((indices.size() % 3) == 0); + for (auto it = indices.begin(); it != indices.end(); it += 3) + { + std::swap(*it, *(it + 2)); + } + + for (auto it = vertices.begin(); it != vertices.end(); ++it) + { + it->textureCoordinate.x = (1.f - it->textureCoordinate.x); + } + } + + + // Helper for inverting normals of geometric primitives for 'inside' vs. 'outside' viewing + inline void InvertNormals(VertexCollection& vertices) + { + for (auto it = vertices.begin(); it != vertices.end(); ++it) + { + it->normal.x = -it->normal.x; + it->normal.y = -it->normal.y; + it->normal.z = -it->normal.z; + } + } +} + + +//-------------------------------------------------------------------------------------- +// Cube (aka a Hexahedron) or Box +//-------------------------------------------------------------------------------------- +void DirectX::ComputeBox(VertexCollection& vertices, IndexCollection& indices, const XMFLOAT3& size, bool rhcoords, bool invertn) +{ + vertices.clear(); + indices.clear(); + + // A box has six faces, each one pointing in a different direction. + constexpr int FaceCount = 6; + + static const XMVECTORF32 faceNormals[FaceCount] = + { + { { { 0, 0, 1, 0 } } }, + { { { 0, 0, -1, 0 } } }, + { { { 1, 0, 0, 0 } } }, + { { { -1, 0, 0, 0 } } }, + { { { 0, 1, 0, 0 } } }, + { { { 0, -1, 0, 0 } } }, + }; + + static const XMVECTORF32 textureCoordinates[4] = + { + { { { 1, 0, 0, 0 } } }, + { { { 1, 1, 0, 0 } } }, + { { { 0, 1, 0, 0 } } }, + { { { 0, 0, 0, 0 } } }, + }; + + XMVECTOR tsize = XMLoadFloat3(&size); + tsize = XMVectorDivide(tsize, g_XMTwo); + + // Create each face in turn. + for (int i = 0; i < FaceCount; i++) + { + XMVECTOR normal = faceNormals[i]; + + // Get two vectors perpendicular both to the face normal and to each other. + XMVECTOR basis = (i >= 4) ? g_XMIdentityR2 : g_XMIdentityR1; + + XMVECTOR side1 = XMVector3Cross(normal, basis); + XMVECTOR side2 = XMVector3Cross(normal, side1); + + // Six indices (two triangles) per face. + size_t vbase = vertices.size(); + index_push_back(indices, vbase + 0); + index_push_back(indices, vbase + 1); + index_push_back(indices, vbase + 2); + + index_push_back(indices, vbase + 0); + index_push_back(indices, vbase + 2); + index_push_back(indices, vbase + 3); + + // Four vertices per face. + // (normal - side1 - side2) * tsize // normal // t0 + vertices.push_back(VertexPositionNormalTexture(XMVectorMultiply(XMVectorSubtract(XMVectorSubtract(normal, side1), side2), tsize), normal, textureCoordinates[0])); + + // (normal - side1 + side2) * tsize // normal // t1 + vertices.push_back(VertexPositionNormalTexture(XMVectorMultiply(XMVectorAdd(XMVectorSubtract(normal, side1), side2), tsize), normal, textureCoordinates[1])); + + // (normal + side1 + side2) * tsize // normal // t2 + vertices.push_back(VertexPositionNormalTexture(XMVectorMultiply(XMVectorAdd(normal, XMVectorAdd(side1, side2)), tsize), normal, textureCoordinates[2])); + + // (normal + side1 - side2) * tsize // normal // t3 + vertices.push_back(VertexPositionNormalTexture(XMVectorMultiply(XMVectorSubtract(XMVectorAdd(normal, side1), side2), tsize), normal, textureCoordinates[3])); + } + + // Build RH above + if (!rhcoords) + ReverseWinding(indices, vertices); + + if (invertn) + InvertNormals(vertices); +} + + +//-------------------------------------------------------------------------------------- +// Sphere +//-------------------------------------------------------------------------------------- +void DirectX::ComputeSphere(VertexCollection& vertices, IndexCollection& indices, float diameter, size_t tessellation, bool rhcoords, bool invertn) +{ + vertices.clear(); + indices.clear(); + + if (tessellation < 3) + throw std::out_of_range("tesselation parameter out of range"); + + size_t verticalSegments = tessellation; + size_t horizontalSegments = tessellation * 2; + + float radius = diameter / 2; + + // Create rings of vertices at progressively higher latitudes. + for (size_t i = 0; i <= verticalSegments; i++) + { + float v = 1 - float(i) / float(verticalSegments); + + float latitude = (float(i) * XM_PI / float(verticalSegments)) - XM_PIDIV2; + float dy, dxz; + + XMScalarSinCos(&dy, &dxz, latitude); + + // Create a single ring of vertices at this latitude. + for (size_t j = 0; j <= horizontalSegments; j++) + { + float u = float(j) / float(horizontalSegments); + + float longitude = float(j) * XM_2PI / float(horizontalSegments); + float dx, dz; + + XMScalarSinCos(&dx, &dz, longitude); + + dx *= dxz; + dz *= dxz; + + XMVECTOR normal = XMVectorSet(dx, dy, dz, 0); + XMVECTOR textureCoordinate = XMVectorSet(u, v, 0, 0); + + vertices.push_back(VertexPositionNormalTexture(XMVectorScale(normal, radius), normal, textureCoordinate)); + } + } + + // Fill the index buffer with triangles joining each pair of latitude rings. + size_t stride = horizontalSegments + 1; + + for (size_t i = 0; i < verticalSegments; i++) + { + for (size_t j = 0; j <= horizontalSegments; j++) + { + size_t nextI = i + 1; + size_t nextJ = (j + 1) % stride; + + index_push_back(indices, i * stride + j); + index_push_back(indices, nextI * stride + j); + index_push_back(indices, i * stride + nextJ); + + index_push_back(indices, i * stride + nextJ); + index_push_back(indices, nextI * stride + j); + index_push_back(indices, nextI * stride + nextJ); + } + } + + // Build RH above + if (!rhcoords) + ReverseWinding(indices, vertices); + + if (invertn) + InvertNormals(vertices); +} + + +//-------------------------------------------------------------------------------------- +// Geodesic sphere +//-------------------------------------------------------------------------------------- +void DirectX::ComputeGeoSphere(VertexCollection& vertices, IndexCollection& indices, float diameter, size_t tessellation, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + // An undirected edge between two vertices, represented by a pair of indexes into a vertex array. + // Becuse this edge is undirected, (a,b) is the same as (b,a). + using UndirectedEdge = std::pair; + + // Makes an undirected edge. Rather than overloading comparison operators to give us the (a,b)==(b,a) property, + // we'll just ensure that the larger of the two goes first. This'll simplify things greatly. + auto makeUndirectedEdge = [](uint16_t a, uint16_t b) noexcept + { + return std::make_pair(std::max(a, b), std::min(a, b)); + }; + + // Key: an edge + // Value: the index of the vertex which lies midway between the two vertices pointed to by the key value + // This map is used to avoid duplicating vertices when subdividing triangles along edges. + using EdgeSubdivisionMap = std::map; + + + static const XMFLOAT3 OctahedronVertices[] = + { + // when looking down the negative z-axis (into the screen) + XMFLOAT3(0, 1, 0), // 0 top + XMFLOAT3(0, 0, -1), // 1 front + XMFLOAT3(1, 0, 0), // 2 right + XMFLOAT3(0, 0, 1), // 3 back + XMFLOAT3(-1, 0, 0), // 4 left + XMFLOAT3(0, -1, 0), // 5 bottom + }; + static const uint16_t OctahedronIndices[] = + { + 0, 1, 2, // top front-right face + 0, 2, 3, // top back-right face + 0, 3, 4, // top back-left face + 0, 4, 1, // top front-left face + 5, 1, 4, // bottom front-left face + 5, 4, 3, // bottom back-left face + 5, 3, 2, // bottom back-right face + 5, 2, 1, // bottom front-right face + }; + + const float radius = diameter / 2.0f; + + // Start with an octahedron; copy the data into the vertex/index collection. + + std::vector vertexPositions(std::begin(OctahedronVertices), std::end(OctahedronVertices)); + + indices.insert(indices.begin(), std::begin(OctahedronIndices), std::end(OctahedronIndices)); + + // We know these values by looking at the above index list for the octahedron. Despite the subdivisions that are + // about to go on, these values aren't ever going to change because the vertices don't move around in the array. + // We'll need these values later on to fix the singularities that show up at the poles. + const uint16_t northPoleIndex = 0; + const uint16_t southPoleIndex = 5; + + for (size_t iSubdivision = 0; iSubdivision < tessellation; ++iSubdivision) + { + assert(indices.size() % 3 == 0); // sanity + + // We use this to keep track of which edges have already been subdivided. + EdgeSubdivisionMap subdividedEdges; + + // The new index collection after subdivision. + IndexCollection newIndices; + + const size_t triangleCount = indices.size() / 3; + for (size_t iTriangle = 0; iTriangle < triangleCount; ++iTriangle) + { + // For each edge on this triangle, create a new vertex in the middle of that edge. + // The winding order of the triangles we output are the same as the winding order of the inputs. + + // Indices of the vertices making up this triangle + uint16_t iv0 = indices[iTriangle * 3 + 0]; + uint16_t iv1 = indices[iTriangle * 3 + 1]; + uint16_t iv2 = indices[iTriangle * 3 + 2]; + + // Get the new vertices + XMFLOAT3 v01; // vertex on the midpoint of v0 and v1 + XMFLOAT3 v12; // ditto v1 and v2 + XMFLOAT3 v20; // ditto v2 and v0 + uint16_t iv01; // index of v01 + uint16_t iv12; // index of v12 + uint16_t iv20; // index of v20 + + // Function that, when given the index of two vertices, creates a new vertex at the midpoint of those vertices. + auto divideEdge = [&](uint16_t i0, uint16_t i1, XMFLOAT3& outVertex, uint16_t& outIndex) + { + const UndirectedEdge edge = makeUndirectedEdge(i0, i1); + + // Check to see if we've already generated this vertex + auto it = subdividedEdges.find(edge); + if (it != subdividedEdges.end()) + { + // We've already generated this vertex before + outIndex = it->second; // the index of this vertex + outVertex = vertexPositions[outIndex]; // and the vertex itself + } + else + { + // Haven't generated this vertex before: so add it now + + // outVertex = (vertices[i0] + vertices[i1]) / 2 + XMStoreFloat3( + &outVertex, + XMVectorScale( + XMVectorAdd(XMLoadFloat3(&vertexPositions[i0]), XMLoadFloat3(&vertexPositions[i1])), + 0.5f + ) + ); + + outIndex = static_cast(vertexPositions.size()); + CheckIndexOverflow(outIndex); + vertexPositions.push_back(outVertex); + + // Now add it to the map. + auto entry = std::make_pair(edge, outIndex); + subdividedEdges.insert(entry); + } + }; + + // Add/get new vertices and their indices + divideEdge(iv0, iv1, v01, iv01); + divideEdge(iv1, iv2, v12, iv12); + divideEdge(iv0, iv2, v20, iv20); + + // Add the new indices. We have four new triangles from our original one: + // v0 + // o + // /a\ + // v20 o---o v01 + // /b\c/d\ + // v2 o---o---o v1 + // v12 + const uint16_t indicesToAdd[] = + { + iv0, iv01, iv20, // a + iv20, iv12, iv2, // b + iv20, iv01, iv12, // c + iv01, iv1, iv12, // d + }; + newIndices.insert(newIndices.end(), std::begin(indicesToAdd), std::end(indicesToAdd)); + } + + indices = std::move(newIndices); + } + + // Now that we've completed subdivision, fill in the final vertex collection + vertices.reserve(vertexPositions.size()); + for (auto it = vertexPositions.begin(); it != vertexPositions.end(); ++it) + { + const auto& vertexValue = *it; + + auto normal = XMVector3Normalize(XMLoadFloat3(&vertexValue)); + auto pos = XMVectorScale(normal, radius); + + XMFLOAT3 normalFloat3; + XMStoreFloat3(&normalFloat3, normal); + + // calculate texture coordinates for this vertex + float longitude = atan2f(normalFloat3.x, -normalFloat3.z); + float latitude = acosf(normalFloat3.y); + + float u = longitude / XM_2PI + 0.5f; + float v = latitude / XM_PI; + + auto texcoord = XMVectorSet(1.0f - u, v, 0.0f, 0.0f); + vertices.push_back(VertexPositionNormalTexture(pos, normal, texcoord)); + } + + // There are a couple of fixes to do. One is a texture coordinate wraparound fixup. At some point, there will be + // a set of triangles somewhere in the mesh with texture coordinates such that the wraparound across 0.0/1.0 + // occurs across that triangle. Eg. when the left hand side of the triangle has a U coordinate of 0.98 and the + // right hand side has a U coordinate of 0.0. The intent is that such a triangle should render with a U of 0.98 to + // 1.0, not 0.98 to 0.0. If we don't do this fixup, there will be a visible seam across one side of the sphere. + // + // Luckily this is relatively easy to fix. There is a straight edge which runs down the prime meridian of the + // completed sphere. If you imagine the vertices along that edge, they circumscribe a semicircular arc starting at + // y=1 and ending at y=-1, and sweeping across the range of z=0 to z=1. x stays zero. It's along this edge that we + // need to duplicate our vertices - and provide the correct texture coordinates. + size_t preFixupVertexCount = vertices.size(); + for (size_t i = 0; i < preFixupVertexCount; ++i) + { + // This vertex is on the prime meridian if position.x and texcoord.u are both zero (allowing for small epsilon). + bool isOnPrimeMeridian = XMVector2NearEqual( + XMVectorSet(vertices[i].position.x, vertices[i].textureCoordinate.x, 0.0f, 0.0f), + XMVectorZero(), + XMVectorSplatEpsilon()); + + if (isOnPrimeMeridian) + { + size_t newIndex = vertices.size(); // the index of this vertex that we're about to add + CheckIndexOverflow(newIndex); + + // copy this vertex, correct the texture coordinate, and add the vertex + VertexPositionNormalTexture v = vertices[i]; + v.textureCoordinate.x = 1.0f; + vertices.push_back(v); + + // Now find all the triangles which contain this vertex and update them if necessary + for (size_t j = 0; j < indices.size(); j += 3) + { + uint16_t* triIndex0 = &indices[j + 0]; + uint16_t* triIndex1 = &indices[j + 1]; + uint16_t* triIndex2 = &indices[j + 2]; + + if (*triIndex0 == i) + { + // nothing; just keep going + } + else if (*triIndex1 == i) + { + std::swap(triIndex0, triIndex1); // swap the pointers (not the values) + } + else if (*triIndex2 == i) + { + std::swap(triIndex0, triIndex2); // swap the pointers (not the values) + } + else + { + // this triangle doesn't use the vertex we're interested in + continue; + } + + // If we got to this point then triIndex0 is the pointer to the index to the vertex we're looking at + assert(*triIndex0 == i); + assert(*triIndex1 != i && *triIndex2 != i); // assume no degenerate triangles + + const VertexPositionNormalTexture& v0 = vertices[*triIndex0]; + const VertexPositionNormalTexture& v1 = vertices[*triIndex1]; + const VertexPositionNormalTexture& v2 = vertices[*triIndex2]; + + // check the other two vertices to see if we might need to fix this triangle + + if (abs(v0.textureCoordinate.x - v1.textureCoordinate.x) > 0.5f || + abs(v0.textureCoordinate.x - v2.textureCoordinate.x) > 0.5f) + { + // yep; replace the specified index to point to the new, corrected vertex + *triIndex0 = static_cast(newIndex); + } + } + } + } + + // And one last fix we need to do: the poles. A common use-case of a sphere mesh is to map a rectangular texture onto + // it. If that happens, then the poles become singularities which map the entire top and bottom rows of the texture + // onto a single point. In general there's no real way to do that right. But to match the behavior of non-geodesic + // spheres, we need to duplicate the pole vertex for every triangle that uses it. This will introduce seams near the + // poles, but reduce stretching. + auto fixPole = [&](size_t poleIndex) + { + const auto& poleVertex = vertices[poleIndex]; + bool overwrittenPoleVertex = false; // overwriting the original pole vertex saves us one vertex + + for (size_t i = 0; i < indices.size(); i += 3) + { + // These pointers point to the three indices which make up this triangle. pPoleIndex is the pointer to the + // entry in the index array which represents the pole index, and the other two pointers point to the other + // two indices making up this triangle. + uint16_t* pPoleIndex; + uint16_t* pOtherIndex0; + uint16_t* pOtherIndex1; + if (indices[i + 0] == poleIndex) + { + pPoleIndex = &indices[i + 0]; + pOtherIndex0 = &indices[i + 1]; + pOtherIndex1 = &indices[i + 2]; + } + else if (indices[i + 1] == poleIndex) + { + pPoleIndex = &indices[i + 1]; + pOtherIndex0 = &indices[i + 2]; + pOtherIndex1 = &indices[i + 0]; + } + else if (indices[i + 2] == poleIndex) + { + pPoleIndex = &indices[i + 2]; + pOtherIndex0 = &indices[i + 0]; + pOtherIndex1 = &indices[i + 1]; + } + else + { + continue; + } + + const auto& otherVertex0 = vertices[*pOtherIndex0]; + const auto& otherVertex1 = vertices[*pOtherIndex1]; + + // Calculate the texcoords for the new pole vertex, add it to the vertices and update the index + VertexPositionNormalTexture newPoleVertex = poleVertex; + newPoleVertex.textureCoordinate.x = (otherVertex0.textureCoordinate.x + otherVertex1.textureCoordinate.x) / 2; + newPoleVertex.textureCoordinate.y = poleVertex.textureCoordinate.y; + + if (!overwrittenPoleVertex) + { + vertices[poleIndex] = newPoleVertex; + overwrittenPoleVertex = true; + } + else + { + CheckIndexOverflow(vertices.size()); + + *pPoleIndex = static_cast(vertices.size()); + vertices.push_back(newPoleVertex); + } + } + }; + + fixPole(northPoleIndex); + fixPole(southPoleIndex); + + // Build RH above + if (!rhcoords) + ReverseWinding(indices, vertices); +} + + +//-------------------------------------------------------------------------------------- +// Cylinder / Cone +//-------------------------------------------------------------------------------------- +namespace +{ + // Helper computes a point on a unit circle, aligned to the x/z plane and centered on the origin. + inline XMVECTOR GetCircleVector(size_t i, size_t tessellation) noexcept + { + float angle = float(i) * XM_2PI / float(tessellation); + float dx, dz; + + XMScalarSinCos(&dx, &dz, angle); + + XMVECTORF32 v = { { { dx, 0, dz, 0 } } }; + return v; + } + + inline XMVECTOR GetCircleTangent(size_t i, size_t tessellation) noexcept + { + float angle = (float(i) * XM_2PI / float(tessellation)) + XM_PIDIV2; + float dx, dz; + + XMScalarSinCos(&dx, &dz, angle); + + XMVECTORF32 v = { { { dx, 0, dz, 0 } } }; + return v; + } + + + // Helper creates a triangle fan to close the end of a cylinder / cone + void CreateCylinderCap(VertexCollection& vertices, IndexCollection& indices, size_t tessellation, float height, float radius, bool isTop) + { + // Create cap indices. + for (size_t i = 0; i < tessellation - 2; i++) + { + size_t i1 = (i + 1) % tessellation; + size_t i2 = (i + 2) % tessellation; + + if (isTop) + { + std::swap(i1, i2); + } + + size_t vbase = vertices.size(); + index_push_back(indices, vbase); + index_push_back(indices, vbase + i1); + index_push_back(indices, vbase + i2); + } + + // Which end of the cylinder is this? + XMVECTOR normal = g_XMIdentityR1; + XMVECTOR textureScale = g_XMNegativeOneHalf; + + if (!isTop) + { + normal = XMVectorNegate(normal); + textureScale = XMVectorMultiply(textureScale, g_XMNegateX); + } + + // Create cap vertices. + for (size_t i = 0; i < tessellation; i++) + { + XMVECTOR circleVector = GetCircleVector(i, tessellation); + + XMVECTOR position = XMVectorAdd(XMVectorScale(circleVector, radius), XMVectorScale(normal, height)); + + XMVECTOR textureCoordinate = XMVectorMultiplyAdd(XMVectorSwizzle<0, 2, 3, 3>(circleVector), textureScale, g_XMOneHalf); + + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinate)); + } + } +} + +void DirectX::ComputeCylinder(VertexCollection& vertices, IndexCollection& indices, float height, float diameter, size_t tessellation, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + if (tessellation < 3) + throw std::out_of_range("tesselation parameter out of range"); + + height /= 2; + + XMVECTOR topOffset = XMVectorScale(g_XMIdentityR1, height); + + float radius = diameter / 2; + size_t stride = tessellation + 1; + + // Create a ring of triangles around the outside of the cylinder. + for (size_t i = 0; i <= tessellation; i++) + { + XMVECTOR normal = GetCircleVector(i, tessellation); + + XMVECTOR sideOffset = XMVectorScale(normal, radius); + + float u = float(i) / float(tessellation); + + XMVECTOR textureCoordinate = XMLoadFloat(&u); + + vertices.push_back(VertexPositionNormalTexture(XMVectorAdd(sideOffset, topOffset), normal, textureCoordinate)); + vertices.push_back(VertexPositionNormalTexture(XMVectorSubtract(sideOffset, topOffset), normal, XMVectorAdd(textureCoordinate, g_XMIdentityR1))); + + index_push_back(indices, i * 2); + index_push_back(indices, (i * 2 + 2) % (stride * 2)); + index_push_back(indices, i * 2 + 1); + + index_push_back(indices, i * 2 + 1); + index_push_back(indices, (i * 2 + 2) % (stride * 2)); + index_push_back(indices, (i * 2 + 3) % (stride * 2)); + } + + // Create flat triangle fan caps to seal the top and bottom. + CreateCylinderCap(vertices, indices, tessellation, height, radius, true); + CreateCylinderCap(vertices, indices, tessellation, height, radius, false); + + // Build RH above + if (!rhcoords) + ReverseWinding(indices, vertices); +} + + +// Creates a cone primitive. +void DirectX::ComputeCone(VertexCollection& vertices, IndexCollection& indices, float diameter, float height, size_t tessellation, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + if (tessellation < 3) + throw std::out_of_range("tesselation parameter out of range"); + + height /= 2; + + XMVECTOR topOffset = XMVectorScale(g_XMIdentityR1, height); + + float radius = diameter / 2; + size_t stride = tessellation + 1; + + // Create a ring of triangles around the outside of the cone. + for (size_t i = 0; i <= tessellation; i++) + { + XMVECTOR circlevec = GetCircleVector(i, tessellation); + + XMVECTOR sideOffset = XMVectorScale(circlevec, radius); + + float u = float(i) / float(tessellation); + + XMVECTOR textureCoordinate = XMLoadFloat(&u); + + XMVECTOR pt = XMVectorSubtract(sideOffset, topOffset); + + XMVECTOR normal = XMVector3Cross( + GetCircleTangent(i, tessellation), + XMVectorSubtract(topOffset, pt)); + normal = XMVector3Normalize(normal); + + // Duplicate the top vertex for distinct normals + vertices.push_back(VertexPositionNormalTexture(topOffset, normal, g_XMZero)); + vertices.push_back(VertexPositionNormalTexture(pt, normal, XMVectorAdd(textureCoordinate, g_XMIdentityR1))); + + index_push_back(indices, i * 2); + index_push_back(indices, (i * 2 + 3) % (stride * 2)); + index_push_back(indices, (i * 2 + 1) % (stride * 2)); + } + + // Create flat triangle fan caps to seal the bottom. + CreateCylinderCap(vertices, indices, tessellation, height, radius, false); + + // Build RH above + if (!rhcoords) + ReverseWinding(indices, vertices); +} + + +//-------------------------------------------------------------------------------------- +// Torus +//-------------------------------------------------------------------------------------- +void DirectX::ComputeTorus(VertexCollection& vertices, IndexCollection& indices, float diameter, float thickness, size_t tessellation, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + if (tessellation < 3) + throw std::out_of_range("tesselation parameter out of range"); + + size_t stride = tessellation + 1; + + // First we loop around the main ring of the torus. + for (size_t i = 0; i <= tessellation; i++) + { + float u = float(i) / float(tessellation); + + float outerAngle = float(i) * XM_2PI / float(tessellation) - XM_PIDIV2; + + // Create a transform matrix that will align geometry to + // slice perpendicularly though the current ring position. + XMMATRIX transform = XMMatrixTranslation(diameter / 2, 0, 0) * XMMatrixRotationY(outerAngle); + + // Now we loop along the other axis, around the side of the tube. + for (size_t j = 0; j <= tessellation; j++) + { + float v = 1 - float(j) / float(tessellation); + + float innerAngle = float(j) * XM_2PI / float(tessellation) + XM_PI; + float dx, dy; + + XMScalarSinCos(&dy, &dx, innerAngle); + + // Create a vertex. + XMVECTOR normal = XMVectorSet(dx, dy, 0, 0); + XMVECTOR position = XMVectorScale(normal, thickness / 2); + XMVECTOR textureCoordinate = XMVectorSet(u, v, 0, 0); + + position = XMVector3Transform(position, transform); + normal = XMVector3TransformNormal(normal, transform); + + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinate)); + + // And create indices for two triangles. + size_t nextI = (i + 1) % stride; + size_t nextJ = (j + 1) % stride; + + index_push_back(indices, i * stride + j); + index_push_back(indices, i * stride + nextJ); + index_push_back(indices, nextI * stride + j); + + index_push_back(indices, i * stride + nextJ); + index_push_back(indices, nextI * stride + nextJ); + index_push_back(indices, nextI * stride + j); + } + } + + // Build RH above + if (!rhcoords) + ReverseWinding(indices, vertices); +} + + +//-------------------------------------------------------------------------------------- +// Tetrahedron +//-------------------------------------------------------------------------------------- +void DirectX::ComputeTetrahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + static const XMVECTORF32 verts[4] = + { + { { { 0.f, 0.f, 1.f, 0 } } }, + { { { 2.f*SQRT2 / 3.f, 0.f, -1.f / 3.f, 0 } } }, + { { { -SQRT2 / 3.f, SQRT6 / 3.f, -1.f / 3.f, 0 } } }, + { { { -SQRT2 / 3.f, -SQRT6 / 3.f, -1.f / 3.f, 0 } } } + }; + + static const uint32_t faces[4 * 3] = + { + 0, 1, 2, + 0, 2, 3, + 0, 3, 1, + 1, 3, 2, + }; + + for (size_t j = 0; j < _countof(faces); j += 3) + { + uint32_t v0 = faces[j]; + uint32_t v1 = faces[j + 1]; + uint32_t v2 = faces[j + 2]; + + XMVECTOR normal = XMVector3Cross( + XMVectorSubtract(verts[v1].v, verts[v0].v), + XMVectorSubtract(verts[v2].v, verts[v0].v)); + normal = XMVector3Normalize(normal); + + size_t base = vertices.size(); + index_push_back(indices, base); + index_push_back(indices, base + 1); + index_push_back(indices, base + 2); + + // Duplicate vertices to use face normals + XMVECTOR position = XMVectorScale(verts[v0], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMZero /* 0, 0 */)); + + position = XMVectorScale(verts[v1], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMIdentityR0 /* 1, 0 */)); + + position = XMVectorScale(verts[v2], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMIdentityR1 /* 0, 1 */)); + } + + // Built LH above + if (rhcoords) + ReverseWinding(indices, vertices); + + assert(vertices.size() == 4 * 3); + assert(indices.size() == 4 * 3); +} + + +//-------------------------------------------------------------------------------------- +// Octahedron +//-------------------------------------------------------------------------------------- +void DirectX::ComputeOctahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + static const XMVECTORF32 verts[6] = + { + { { { 1, 0, 0, 0 } } }, + { { { -1, 0, 0, 0 } } }, + { { { 0, 1, 0, 0 } } }, + { { { 0, -1, 0, 0 } } }, + { { { 0, 0, 1, 0 } } }, + { { { 0, 0, -1, 0 } } } + }; + + static const uint32_t faces[8 * 3] = + { + 4, 0, 2, + 4, 2, 1, + 4, 1, 3, + 4, 3, 0, + 5, 2, 0, + 5, 1, 2, + 5, 3, 1, + 5, 0, 3 + }; + + for (size_t j = 0; j < _countof(faces); j += 3) + { + uint32_t v0 = faces[j]; + uint32_t v1 = faces[j + 1]; + uint32_t v2 = faces[j + 2]; + + XMVECTOR normal = XMVector3Cross( + XMVectorSubtract(verts[v1].v, verts[v0].v), + XMVectorSubtract(verts[v2].v, verts[v0].v)); + normal = XMVector3Normalize(normal); + + size_t base = vertices.size(); + index_push_back(indices, base); + index_push_back(indices, base + 1); + index_push_back(indices, base + 2); + + // Duplicate vertices to use face normals + XMVECTOR position = XMVectorScale(verts[v0], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMZero /* 0, 0 */)); + + position = XMVectorScale(verts[v1], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMIdentityR0 /* 1, 0 */)); + + position = XMVectorScale(verts[v2], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMIdentityR1 /* 0, 1*/)); + } + + // Built LH above + if (rhcoords) + ReverseWinding(indices, vertices); + + assert(vertices.size() == 8 * 3); + assert(indices.size() == 8 * 3); +} + + +//-------------------------------------------------------------------------------------- +// Dodecahedron +//-------------------------------------------------------------------------------------- +void DirectX::ComputeDodecahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + static const float a = 1.f / SQRT3; + static const float b = 0.356822089773089931942f; // sqrt( ( 3 - sqrt(5) ) / 6 ) + static const float c = 0.934172358962715696451f; // sqrt( ( 3 + sqrt(5) ) / 6 ); + + static const XMVECTORF32 verts[20] = + { + { { { a, a, a, 0 } } }, + { { { a, a, -a, 0 } } }, + { { { a, -a, a, 0 } } }, + { { { a, -a, -a, 0 } } }, + { { { -a, a, a, 0 } } }, + { { { -a, a, -a, 0 } } }, + { { { -a, -a, a, 0 } } }, + { { { -a, -a, -a, 0 } } }, + { { { b, c, 0, 0 } } }, + { { { -b, c, 0, 0 } } }, + { { { b, -c, 0, 0 } } }, + { { { -b, -c, 0, 0 } } }, + { { { c, 0, b, 0 } } }, + { { { c, 0, -b, 0 } } }, + { { { -c, 0, b, 0 } } }, + { { { -c, 0, -b, 0 } } }, + { { { 0, b, c, 0 } } }, + { { { 0, -b, c, 0 } } }, + { { { 0, b, -c, 0 } } }, + { { { 0, -b, -c, 0 } } } + }; + + static const uint32_t faces[12 * 5] = + { + 0, 8, 9, 4, 16, + 0, 16, 17, 2, 12, + 12, 2, 10, 3, 13, + 9, 5, 15, 14, 4, + 3, 19, 18, 1, 13, + 7, 11, 6, 14, 15, + 0, 12, 13, 1, 8, + 8, 1, 18, 5, 9, + 16, 4, 14, 6, 17, + 6, 11, 10, 2, 17, + 7, 15, 5, 18, 19, + 7, 19, 3, 10, 11, + }; + + static const XMVECTORF32 textureCoordinates[5] = + { + { { { 0.654508f, 0.0244717f, 0, 0 } } }, + { { { 0.0954915f, 0.206107f, 0, 0 } } }, + { { { 0.0954915f, 0.793893f, 0, 0 } } }, + { { { 0.654508f, 0.975528f, 0, 0 } } }, + { { { 1.f, 0.5f, 0, 0 } } } + }; + + static const uint32_t textureIndex[12][5] = + { + { 0, 1, 2, 3, 4 }, + { 2, 3, 4, 0, 1 }, + { 4, 0, 1, 2, 3 }, + { 1, 2, 3, 4, 0 }, + { 2, 3, 4, 0, 1 }, + { 0, 1, 2, 3, 4 }, + { 1, 2, 3, 4, 0 }, + { 4, 0, 1, 2, 3 }, + { 4, 0, 1, 2, 3 }, + { 1, 2, 3, 4, 0 }, + { 0, 1, 2, 3, 4 }, + { 2, 3, 4, 0, 1 }, + }; + + size_t t = 0; + for (size_t j = 0; j < _countof(faces); j += 5, ++t) + { + uint32_t v0 = faces[j]; + uint32_t v1 = faces[j + 1]; + uint32_t v2 = faces[j + 2]; + uint32_t v3 = faces[j + 3]; + uint32_t v4 = faces[j + 4]; + + XMVECTOR normal = XMVector3Cross( + XMVectorSubtract(verts[v1].v, verts[v0].v), + XMVectorSubtract(verts[v2].v, verts[v0].v)); + normal = XMVector3Normalize(normal); + + size_t base = vertices.size(); + + index_push_back(indices, base); + index_push_back(indices, base + 1); + index_push_back(indices, base + 2); + + index_push_back(indices, base); + index_push_back(indices, base + 2); + index_push_back(indices, base + 3); + + index_push_back(indices, base); + index_push_back(indices, base + 3); + index_push_back(indices, base + 4); + + // Duplicate vertices to use face normals + XMVECTOR position = XMVectorScale(verts[v0], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinates[textureIndex[t][0]])); + + position = XMVectorScale(verts[v1], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinates[textureIndex[t][1]])); + + position = XMVectorScale(verts[v2], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinates[textureIndex[t][2]])); + + position = XMVectorScale(verts[v3], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinates[textureIndex[t][3]])); + + position = XMVectorScale(verts[v4], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinates[textureIndex[t][4]])); + } + + // Built LH above + if (rhcoords) + ReverseWinding(indices, vertices); + + assert(vertices.size() == 12 * 5); + assert(indices.size() == 12 * 3 * 3); +} + + +//-------------------------------------------------------------------------------------- +// Icosahedron +//-------------------------------------------------------------------------------------- +void DirectX::ComputeIcosahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + static const float t = 1.618033988749894848205f; // (1 + sqrt(5)) / 2 + static const float t2 = 1.519544995837552493271f; // sqrt( 1 + sqr( (1 + sqrt(5)) / 2 ) ) + + static const XMVECTORF32 verts[12] = + { + { { { t / t2, 1.f / t2, 0, 0 } } }, + { { { -t / t2, 1.f / t2, 0, 0 } } }, + { { { t / t2, -1.f / t2, 0, 0 } } }, + { { { -t / t2, -1.f / t2, 0, 0 } } }, + { { { 1.f / t2, 0, t / t2, 0 } } }, + { { { 1.f / t2, 0, -t / t2, 0 } } }, + { { { -1.f / t2, 0, t / t2, 0 } } }, + { { { -1.f / t2, 0, -t / t2, 0 } } }, + { { { 0, t / t2, 1.f / t2, 0 } } }, + { { { 0, -t / t2, 1.f / t2, 0 } } }, + { { { 0, t / t2, -1.f / t2, 0 } } }, + { { { 0, -t / t2, -1.f / t2, 0 } } } + }; + + static const uint32_t faces[20 * 3] = + { + 0, 8, 4, + 0, 5, 10, + 2, 4, 9, + 2, 11, 5, + 1, 6, 8, + 1, 10, 7, + 3, 9, 6, + 3, 7, 11, + 0, 10, 8, + 1, 8, 10, + 2, 9, 11, + 3, 11, 9, + 4, 2, 0, + 5, 0, 2, + 6, 1, 3, + 7, 3, 1, + 8, 6, 4, + 9, 4, 6, + 10, 5, 7, + 11, 7, 5 + }; + + for (size_t j = 0; j < _countof(faces); j += 3) + { + uint32_t v0 = faces[j]; + uint32_t v1 = faces[j + 1]; + uint32_t v2 = faces[j + 2]; + + XMVECTOR normal = XMVector3Cross( + XMVectorSubtract(verts[v1].v, verts[v0].v), + XMVectorSubtract(verts[v2].v, verts[v0].v)); + normal = XMVector3Normalize(normal); + + size_t base = vertices.size(); + index_push_back(indices, base); + index_push_back(indices, base + 1); + index_push_back(indices, base + 2); + + // Duplicate vertices to use face normals + XMVECTOR position = XMVectorScale(verts[v0], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMZero /* 0, 0 */)); + + position = XMVectorScale(verts[v1], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMIdentityR0 /* 1, 0 */)); + + position = XMVectorScale(verts[v2], size); + vertices.push_back(VertexPositionNormalTexture(position, normal, g_XMIdentityR1 /* 0, 1 */)); + } + + // Built LH above + if (rhcoords) + ReverseWinding(indices, vertices); + + assert(vertices.size() == 20 * 3); + assert(indices.size() == 20 * 3); +} + + +//-------------------------------------------------------------------------------------- +// Teapot +//-------------------------------------------------------------------------------------- + +// Include the teapot control point data. +namespace +{ +#include "TeapotData.inc" + + // Tessellates the specified bezier patch. + void XM_CALLCONV TessellatePatch(VertexCollection& vertices, IndexCollection& indices, TeapotPatch const& patch, size_t tessellation, FXMVECTOR scale, bool isMirrored) + { + // Look up the 16 control points for this patch. + XMVECTOR controlPoints[16] = {}; + + for (int i = 0; i < 16; i++) + { + controlPoints[i] = XMVectorMultiply(TeapotControlPoints[patch.indices[i]], scale); + } + + // Create the index data. + size_t vbase = vertices.size(); + Bezier::CreatePatchIndices(tessellation, isMirrored, [&](size_t index) + { + index_push_back(indices, vbase + index); + }); + + // Create the vertex data. + Bezier::CreatePatchVertices(controlPoints, tessellation, isMirrored, [&](FXMVECTOR position, FXMVECTOR normal, FXMVECTOR textureCoordinate) + { + vertices.push_back(VertexPositionNormalTexture(position, normal, textureCoordinate)); + }); + } +} + + +// Creates a teapot primitive. +void DirectX::ComputeTeapot(VertexCollection& vertices, IndexCollection& indices, float size, size_t tessellation, bool rhcoords) +{ + vertices.clear(); + indices.clear(); + + if (tessellation < 1) + throw std::out_of_range("tesselation parameter out of range"); + + XMVECTOR scaleVector = XMVectorReplicate(size); + + XMVECTOR scaleNegateX = XMVectorMultiply(scaleVector, g_XMNegateX); + XMVECTOR scaleNegateZ = XMVectorMultiply(scaleVector, g_XMNegateZ); + XMVECTOR scaleNegateXZ = XMVectorMultiply(scaleVector, XMVectorMultiply(g_XMNegateX, g_XMNegateZ)); + + for (size_t i = 0; i < _countof(TeapotPatches); i++) + { + TeapotPatch const& patch = TeapotPatches[i]; + + // Because the teapot is symmetrical from left to right, we only store + // data for one side, then tessellate each patch twice, mirroring in X. + TessellatePatch(vertices, indices, patch, tessellation, scaleVector, false); + TessellatePatch(vertices, indices, patch, tessellation, scaleNegateX, true); + + if (patch.mirrorZ) + { + // Some parts of the teapot (the body, lid, and rim, but not the + // handle or spout) are also symmetrical from front to back, so + // we tessellate them four times, mirroring in Z as well as X. + TessellatePatch(vertices, indices, patch, tessellation, scaleNegateZ, true); + TessellatePatch(vertices, indices, patch, tessellation, scaleNegateXZ, false); + } + } + + // Built RH above + if (!rhcoords) + ReverseWinding(indices, vertices); +} diff --git a/Sdk/External/DirectXTK/Src/Geometry.h b/Sdk/External/DirectXTK/Src/Geometry.h new file mode 100644 index 0000000..73969df --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Geometry.h @@ -0,0 +1,29 @@ +//-------------------------------------------------------------------------------------- +// File: Geometry.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "VertexTypes.h" + +namespace DirectX +{ + using VertexCollection = std::vector; + using IndexCollection = std::vector; + + void ComputeBox(VertexCollection& vertices, IndexCollection& indices, const XMFLOAT3& size, bool rhcoords, bool invertn); + void ComputeSphere(VertexCollection& vertices, IndexCollection& indices, float diameter, size_t tessellation, bool rhcoords, bool invertn); + void ComputeGeoSphere(VertexCollection& vertices, IndexCollection& indices, float diameter, size_t tessellation, bool rhcoords); + void ComputeCylinder(VertexCollection& vertices, IndexCollection& indices, float height, float diameter, size_t tessellation, bool rhcoords); + void ComputeCone(VertexCollection& vertices, IndexCollection& indices, float diameter, float height, size_t tessellation, bool rhcoords); + void ComputeTorus(VertexCollection& vertices, IndexCollection& indices, float diameter, float thickness, size_t tessellation, bool rhcoords); + void ComputeTetrahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords); + void ComputeOctahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords); + void ComputeDodecahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords); + void ComputeIcosahedron(VertexCollection& vertices, IndexCollection& indices, float size, bool rhcoords); + void ComputeTeapot(VertexCollection& vertices, IndexCollection& indices, float size, size_t tessellation, bool rhcoords); +} diff --git a/Sdk/External/DirectXTK/Src/GraphicsMemory.cpp b/Sdk/External/DirectXTK/Src/GraphicsMemory.cpp new file mode 100644 index 0000000..15a0773 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/GraphicsMemory.cpp @@ -0,0 +1,327 @@ +//-------------------------------------------------------------------------------------- +// File: GraphicsMemory.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include "GraphicsMemory.h" +#include "DirectXHelpers.h" +#include "PlatformHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +#if defined(_XBOX_ONE) && defined(_TITLE) + +//====================================================================================== +// Xbox One Direct3D 11.x +//====================================================================================== + +class GraphicsMemory::Impl +{ +public: + Impl(GraphicsMemory* owner) : + mOwner(owner), + mCurrentFrame(0) + { + if (s_graphicsMemory) + { + throw std::exception("GraphicsMemory is a singleton"); + } + + s_graphicsMemory = this; + } + + ~Impl() + { + if (mDevice && mDeviceContext) + { + UINT64 finalFence = mDeviceContext->InsertFence(0); + + while (mDevice->IsFencePending(finalFence)) + { + SwitchToThread(); + } + + mDeviceContext.Reset(); + mDevice.Reset(); + } + + s_graphicsMemory = nullptr; + } + + void Initialize(_In_ ID3D11DeviceX* device, unsigned int backBufferCount) + { + assert(device != nullptr); + mDevice = device; + + device->GetImmediateContextX(mDeviceContext.GetAddressOf()); + + mFrames.resize(backBufferCount); + } + + void* Allocate(_In_opt_ ID3D11DeviceContext* deviceContext, size_t size, int alignment) + { + // Currently use a single global allocator instead of a per-context allocator + UNREFERENCED_PARAMETER(deviceContext); + + std::lock_guard lock(mGuard); + + return mFrames[mCurrentFrame].Allocate(size, alignment); + } + + void Commit() + { + std::lock_guard lock(mGuard); + + mFrames[mCurrentFrame].mFence = mDeviceContext->InsertFence(D3D11_INSERT_FENCE_NO_KICKOFF); + + ++mCurrentFrame; + if (mCurrentFrame >= mFrames.size()) + { + mCurrentFrame = 0; + } + + mFrames[mCurrentFrame].WaitOnFence(mDevice.Get()); + + mFrames[mCurrentFrame].Clear(); + } + + GraphicsMemory* mOwner; + + std::mutex mGuard; + + struct MemoryPage + { + MemoryPage() noexcept : mPageSize(0), mGrfxMemory(nullptr) {} + + void Initialize(size_t reqSize) + { + mPageSize = 0x100000; // 1 MB general pages for Xbox One + if (mPageSize < reqSize) + { + mPageSize = AlignUp(reqSize, 65536); + } + + mGrfxMemory = VirtualAlloc(nullptr, mPageSize, + MEM_LARGE_PAGES | MEM_GRAPHICS | MEM_RESERVE | MEM_COMMIT, + PAGE_WRITECOMBINE | PAGE_READWRITE | PAGE_GPU_READONLY); + if (!mGrfxMemory) + throw std::bad_alloc(); + } + + size_t mPageSize; + void* mGrfxMemory; + }; + + struct MemoryFrame + { + MemoryFrame() noexcept : mCurOffset(0), mFence(0) {} + + ~MemoryFrame() { Clear(); } + + UINT mCurOffset; + + UINT64 mFence; + + void* Allocate(size_t size, size_t alignment) + { + size_t alignedSize = AlignUp(size, alignment); + + if (mPages.empty()) + { + MemoryPage newPage; + newPage.Initialize(alignedSize); + + mCurOffset = 0; + + mPages.emplace_back(newPage); + } + else + { + mCurOffset = AlignUp(mCurOffset, alignment); + + if (mCurOffset + alignedSize > mPages.front().mPageSize) + { + MemoryPage newPage; + newPage.Initialize(alignedSize); + + mCurOffset = 0; + + mPages.emplace_front(newPage); + } + } + + void* ptr = static_cast(mPages.front().mGrfxMemory) + mCurOffset; + + mCurOffset += static_cast(alignedSize); + + return ptr; + } + + void WaitOnFence(ID3D11DeviceX* device) + { + if (mFence) + { + while (device->IsFencePending(mFence)) + { + SwitchToThread(); + } + + mFence = 0; + } + } + + void Clear() + { + for (auto it = mPages.begin(); it != mPages.end(); ++it) + { + if (it->mGrfxMemory) + { + VirtualFree(it->mGrfxMemory, 0, MEM_RELEASE); + it->mGrfxMemory = nullptr; + } + } + + mPages.clear(); + + mCurOffset = 0; + } + + std::list mPages; + }; + + UINT mCurrentFrame; + std::vector mFrames; + + ComPtr mDevice; + ComPtr mDeviceContext; + + static GraphicsMemory::Impl* s_graphicsMemory; +}; + +GraphicsMemory::Impl* GraphicsMemory::Impl::s_graphicsMemory = nullptr; + +#else + +//====================================================================================== +// Null allocator for standard Direct3D +//====================================================================================== + +class GraphicsMemory::Impl +{ +public: + Impl(GraphicsMemory* owner) : + mOwner(owner) + { + if (s_graphicsMemory) + { + throw std::exception("GraphicsMemory is a singleton"); + } + + s_graphicsMemory = this; + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + s_graphicsMemory = nullptr; + } + + void Initialize(_In_ ID3D11Device* device, unsigned int backBufferCount) noexcept + { + UNREFERENCED_PARAMETER(device); + UNREFERENCED_PARAMETER(backBufferCount); + } + + void* Allocate(_In_opt_ ID3D11DeviceContext* context, size_t size, int alignment) noexcept + { + UNREFERENCED_PARAMETER(context); + UNREFERENCED_PARAMETER(size); + UNREFERENCED_PARAMETER(alignment); + return nullptr; + } + + void Commit() noexcept + { + } + + GraphicsMemory* mOwner; + + static GraphicsMemory::Impl* s_graphicsMemory; +}; + +GraphicsMemory::Impl* GraphicsMemory::Impl::s_graphicsMemory = nullptr; + +#endif + + +//-------------------------------------------------------------------------------------- + +#pragma warning( disable : 4355 ) + +// Public constructor. +#if defined(_XBOX_ONE) && defined(_TITLE) +GraphicsMemory::GraphicsMemory(_In_ ID3D11DeviceX* device, unsigned int backBufferCount) +#else +GraphicsMemory::GraphicsMemory(_In_ ID3D11Device* device, unsigned int backBufferCount) +#endif + : pImpl(std::make_unique(this)) +{ + pImpl->Initialize(device, backBufferCount); +} + + +// Move constructor. +GraphicsMemory::GraphicsMemory(GraphicsMemory&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ + pImpl->mOwner = this; +} + + +// Move assignment. +GraphicsMemory& GraphicsMemory::operator= (GraphicsMemory&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + pImpl->mOwner = this; + return *this; +} + + +// Public destructor. +GraphicsMemory::~GraphicsMemory() +{ +} + + +void* GraphicsMemory::Allocate(_In_opt_ ID3D11DeviceContext* context, size_t size, int alignment) +{ + return pImpl->Allocate(context, size, alignment); +} + + +void GraphicsMemory::Commit() +{ + pImpl->Commit(); +} + + +GraphicsMemory& GraphicsMemory::Get() +{ + if (!Impl::s_graphicsMemory || !Impl::s_graphicsMemory->mOwner) + throw std::exception("GraphicsMemory singleton not created"); + + return *Impl::s_graphicsMemory->mOwner; +} diff --git a/Sdk/External/DirectXTK/Src/Keyboard.cpp b/Sdk/External/DirectXTK/Src/Keyboard.cpp new file mode 100644 index 0000000..88439b1 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Keyboard.cpp @@ -0,0 +1,642 @@ +//-------------------------------------------------------------------------------------- +// File: Keyboard.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Keyboard.h" + +#include "PlatformHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +static_assert(sizeof(Keyboard::State) == (256 / 8), "Size mismatch for State"); + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +namespace +{ + inline void KeyDown(int key, Keyboard::State& state) noexcept + { + if (key < 0 || key > 0xfe) + return; + + auto ptr = reinterpret_cast(&state); + + unsigned int bf = 1u << (key & 0x1f); + ptr[(key >> 5)] |= bf; + } + + inline void KeyUp(int key, Keyboard::State& state) noexcept + { + if (key < 0 || key > 0xfe) + return; + + auto ptr = reinterpret_cast(&state); + + unsigned int bf = 1u << (key & 0x1f); + ptr[(key >> 5)] &= ~bf; + } +} + + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + +#include + +//====================================================================================== +// GameInput +//====================================================================================== + +class Keyboard::Impl +{ +public: + Impl(Keyboard* owner) : + mOwner(owner), + mConnected(0), + mDeviceToken(0), + mKeyState{} + { + if (s_keyboard) + { + throw std::exception("Keyboard is a singleton"); + } + + s_keyboard = this; + + ThrowIfFailed(GameInputCreate(mGameInput.GetAddressOf())); + + ThrowIfFailed(mGameInput->RegisterDeviceCallback( + nullptr, + GameInputKindKeyboard, + GameInputDeviceConnected, + GameInputBlockingEnumeration, + this, + OnGameInputDevice, + &mDeviceToken)); + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + if (mDeviceToken) + { + if (mGameInput) + { + HRESULT hr = mGameInput->UnregisterCallback(mDeviceToken, UINT64_MAX); + if (FAILED(hr)) + { + DebugTrace("ERROR: GameInput::UnregisterCallback [keyboard] failed (%08X)", static_cast(hr)); + } + } + + mDeviceToken = 0; + } + + s_keyboard = nullptr; + } + + void GetState(State& state) const + { + state = {}; + + ComPtr reading; + if (SUCCEEDED(mGameInput->GetCurrentReading(GameInputKindKeyboard, nullptr, reading.GetAddressOf()))) + { + uint32_t readCount = reading->GetKeyState(c_MaxSimultaneousKeys, mKeyState); + for (size_t j = 0; j < readCount; ++j) + { + int vk = static_cast(mKeyState[j].virtualKey); + KeyDown(vk, state); + } + } + } + + void Reset() noexcept + { + } + + bool IsConnected() const + { + return mConnected > 0; + } + + Keyboard* mOwner; + uint32_t mConnected; + + static Keyboard::Impl* s_keyboard; + +private: + static constexpr size_t c_MaxSimultaneousKeys = 16; + + ComPtr mGameInput; + GameInputCallbackToken mDeviceToken; + + mutable GameInputKeyState mKeyState[c_MaxSimultaneousKeys]; + + static void CALLBACK OnGameInputDevice( + _In_ GameInputCallbackToken, + _In_ void * context, + _In_ IGameInputDevice *, + _In_ uint64_t, + _In_ GameInputDeviceStatus currentStatus, + _In_ GameInputDeviceStatus) noexcept + { + auto impl = reinterpret_cast(context); + + if (currentStatus & GameInputDeviceConnected) + { + ++impl->mConnected; + } + else if (impl->mConnected > 0) + { + --impl->mConnected; + } + } +}; + + +Keyboard::Impl* Keyboard::Impl::s_keyboard = nullptr; + + +#elif !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) + +//====================================================================================== +// Win32 desktop implementation +//====================================================================================== + +// +// For a Win32 desktop application, call this function from your Window Message Procedure +// +// LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +// { +// switch (message) +// { +// +// case WM_ACTIVATEAPP: +// Keyboard::ProcessMessage(message, wParam, lParam); +// break; +// +// case WM_KEYDOWN: +// case WM_SYSKEYDOWN: +// case WM_KEYUP: +// case WM_SYSKEYUP: +// Keyboard::ProcessMessage(message, wParam, lParam); +// break; +// +// } +// } +// + +class Keyboard::Impl +{ +public: + Impl(Keyboard* owner) : + mState{}, + mOwner(owner) + { + if (s_keyboard) + { + throw std::exception("Keyboard is a singleton"); + } + + s_keyboard = this; + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + s_keyboard = nullptr; + } + + void GetState(State& state) const + { + memcpy(&state, &mState, sizeof(State)); + } + + void Reset() noexcept + { + memset(&mState, 0, sizeof(State)); + } + + bool IsConnected() const + { + return true; + } + + State mState; + Keyboard* mOwner; + + static Keyboard::Impl* s_keyboard; +}; + + +Keyboard::Impl* Keyboard::Impl::s_keyboard = nullptr; + + +void Keyboard::ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam) +{ + auto pImpl = Impl::s_keyboard; + + if (!pImpl) + return; + + bool down = false; + + switch (message) + { + case WM_ACTIVATEAPP: + pImpl->Reset(); + return; + + case WM_KEYDOWN: + case WM_SYSKEYDOWN: + down = true; + break; + + case WM_KEYUP: + case WM_SYSKEYUP: + break; + + default: + return; + } + + int vk = static_cast(wParam); + switch (vk) + { + case VK_SHIFT: + vk = static_cast( + MapVirtualKey((static_cast(lParam) & 0x00ff0000) >> 16u, + MAPVK_VSC_TO_VK_EX)); + if (!down) + { + // Workaround to ensure left vs. right shift get cleared when both were pressed at same time + KeyUp(VK_LSHIFT, pImpl->mState); + KeyUp(VK_RSHIFT, pImpl->mState); + } + break; + + case VK_CONTROL: + vk = (static_cast(lParam) & 0x01000000) ? VK_RCONTROL : VK_LCONTROL; + break; + + case VK_MENU: + vk = (static_cast(lParam) & 0x01000000) ? VK_RMENU : VK_LMENU; + break; + } + + if (down) + { + KeyDown(vk, pImpl->mState); + } + else + { + KeyUp(vk, pImpl->mState); + } +} + + +#else + +//====================================================================================== +// Windows Store or Universal Windows Platform (UWP) app implementation +//====================================================================================== + +// +// For a Windows Store app or Universal Windows Platform (UWP) app, add the following: +// +// void App::SetWindow(CoreWindow^ window ) +// { +// m_keyboard->SetWindow(window); +// } +// + +#include + +class Keyboard::Impl +{ +public: + Impl(Keyboard* owner) : + mState{}, + mOwner(owner), + mAcceleratorKeyToken{}, + mActivatedToken{} + { + if (s_keyboard) + { + throw std::exception("Keyboard is a singleton"); + } + + s_keyboard = this; + } + + ~Impl() + { + s_keyboard = nullptr; + + RemoveHandlers(); + } + + void GetState(State& state) const + { + memcpy(&state, &mState, sizeof(State)); + } + + void Reset() noexcept + { + memset(&mState, 0, sizeof(State)); + } + + bool IsConnected() const + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Devices::Input; + using namespace ABI::Windows::Foundation; + + ComPtr caps; + HRESULT hr = RoActivateInstance(HStringReference(RuntimeClass_Windows_Devices_Input_KeyboardCapabilities).Get(), &caps); + ThrowIfFailed(hr); + + INT32 value; + if (SUCCEEDED(caps->get_KeyboardPresent(&value))) + { + return value != 0; + } + + return false; + } + + void SetWindow(ABI::Windows::UI::Core::ICoreWindow* window) + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::UI::Core; + + if (mWindow.Get() == window) + return; + + RemoveHandlers(); + + mWindow = window; + + if (!window) + return; + + typedef __FITypedEventHandler_2_Windows__CUI__CCore__CCoreWindow_Windows__CUI__CCore__CWindowActivatedEventArgs ActivatedHandler; + HRESULT hr = window->add_Activated(Callback(Activated).Get(), &mActivatedToken); + ThrowIfFailed(hr); + + ComPtr dispatcher; + hr = window->get_Dispatcher(dispatcher.GetAddressOf()); + ThrowIfFailed(hr); + + ComPtr keys; + hr = dispatcher.As(&keys); + ThrowIfFailed(hr); + + typedef __FITypedEventHandler_2_Windows__CUI__CCore__CCoreDispatcher_Windows__CUI__CCore__CAcceleratorKeyEventArgs AcceleratorKeyHandler; + hr = keys->add_AcceleratorKeyActivated(Callback(AcceleratorKeyEvent).Get(), &mAcceleratorKeyToken); + ThrowIfFailed(hr); + } + + State mState; + Keyboard* mOwner; + + static Keyboard::Impl* s_keyboard; + +private: + ComPtr mWindow; + + EventRegistrationToken mAcceleratorKeyToken; + EventRegistrationToken mActivatedToken; + + void RemoveHandlers() + { + if (mWindow) + { + using namespace ABI::Windows::UI::Core; + + ComPtr dispatcher; + HRESULT hr = mWindow->get_Dispatcher(dispatcher.GetAddressOf()); + ThrowIfFailed(hr); + + (void)mWindow->remove_Activated(mActivatedToken); + mActivatedToken.value = 0; + + ComPtr keys; + hr = dispatcher.As(&keys); + ThrowIfFailed(hr); + + (void)keys->remove_AcceleratorKeyActivated(mAcceleratorKeyToken); + mAcceleratorKeyToken.value = 0; + } + } + + static HRESULT Activated(IInspectable *, ABI::Windows::UI::Core::IWindowActivatedEventArgs*) + { + auto pImpl = Impl::s_keyboard; + + if (!pImpl) + return S_OK; + + pImpl->Reset(); + + return S_OK; + } + + static HRESULT AcceleratorKeyEvent(IInspectable *, ABI::Windows::UI::Core::IAcceleratorKeyEventArgs* args) + { + using namespace ABI::Windows::System; + using namespace ABI::Windows::UI::Core; + + auto pImpl = Impl::s_keyboard; + + if (!pImpl) + return S_OK; + + CoreAcceleratorKeyEventType evtType; + HRESULT hr = args->get_EventType(&evtType); + ThrowIfFailed(hr); + + bool down = false; + + switch (evtType) + { + case CoreAcceleratorKeyEventType_KeyDown: + case CoreAcceleratorKeyEventType_SystemKeyDown: + down = true; + break; + + case CoreAcceleratorKeyEventType_KeyUp: + case CoreAcceleratorKeyEventType_SystemKeyUp: + break; + + default: + return S_OK; + } + + CorePhysicalKeyStatus status; + hr = args->get_KeyStatus(&status); + ThrowIfFailed(hr); + + VirtualKey virtualKey; + hr = args->get_VirtualKey(&virtualKey); + ThrowIfFailed(hr); + + int vk = static_cast(virtualKey); + + switch (vk) + { + case VK_SHIFT: + vk = (status.ScanCode == 0x36) ? VK_RSHIFT : VK_LSHIFT; + if (!down) + { + // Workaround to ensure left vs. right shift get cleared when both were pressed at same time + KeyUp(VK_LSHIFT, pImpl->mState); + KeyUp(VK_RSHIFT, pImpl->mState); + } + break; + + case VK_CONTROL: + vk = (status.IsExtendedKey) ? VK_RCONTROL : VK_LCONTROL; + break; + + case VK_MENU: + vk = (status.IsExtendedKey) ? VK_RMENU : VK_LMENU; + break; + } + + if (down) + { + KeyDown(vk, pImpl->mState); + } + else + { + KeyUp(vk, pImpl->mState); + } + + return S_OK; + } +}; + + +Keyboard::Impl* Keyboard::Impl::s_keyboard = nullptr; + + +void Keyboard::SetWindow(ABI::Windows::UI::Core::ICoreWindow* window) +{ + pImpl->SetWindow(window); +} + +#endif + +#pragma warning( disable : 4355 ) + +// Public constructor. +Keyboard::Keyboard() noexcept(false) + : pImpl(std::make_unique(this)) +{ +} + + +// Move constructor. +Keyboard::Keyboard(Keyboard&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ + pImpl->mOwner = this; +} + + +// Move assignment. +Keyboard& Keyboard::operator= (Keyboard&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + pImpl->mOwner = this; + return *this; +} + + +// Public destructor. +Keyboard::~Keyboard() +{ +} + + +Keyboard::State Keyboard::GetState() const +{ + State state; + pImpl->GetState(state); + return state; +} + + +void Keyboard::Reset() noexcept +{ + pImpl->Reset(); +} + + +bool Keyboard::IsConnected() const +{ + return pImpl->IsConnected(); +} + +Keyboard& Keyboard::Get() +{ + if (!Impl::s_keyboard || !Impl::s_keyboard->mOwner) + throw std::exception("Keyboard is a singleton"); + + return *Impl::s_keyboard->mOwner; +} + + + +//====================================================================================== +// KeyboardStateTracker +//====================================================================================== + +void Keyboard::KeyboardStateTracker::Update(const State& state) noexcept +{ + auto currPtr = reinterpret_cast(&state); + auto prevPtr = reinterpret_cast(&lastState); + auto releasedPtr = reinterpret_cast(&released); + auto pressedPtr = reinterpret_cast(&pressed); + for (size_t j = 0; j < (256 / 32); ++j) + { + *pressedPtr = *currPtr & ~(*prevPtr); + *releasedPtr = ~(*currPtr) & *prevPtr; + + ++currPtr; + ++prevPtr; + ++releasedPtr; + ++pressedPtr; + } + + lastState = state; +} + + +void Keyboard::KeyboardStateTracker::Reset() noexcept +{ + memset(this, 0, sizeof(KeyboardStateTracker)); +} diff --git a/Sdk/External/DirectXTK/Src/LoaderHelpers.h b/Sdk/External/DirectXTK/Src/LoaderHelpers.h new file mode 100644 index 0000000..d8aac62 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/LoaderHelpers.h @@ -0,0 +1,1020 @@ +//-------------------------------------------------------------------------------------- +// File: LoaderHelpers.h +// +// Helper functions for texture loaders and screen grabber +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include "DDS.h" +#include "DDSTextureLoader.h" +#include "PlatformHelpers.h" + + +namespace DirectX +{ + namespace LoaderHelpers + { + //-------------------------------------------------------------------------------------- + // Return the BPP for a particular format + //-------------------------------------------------------------------------------------- + inline size_t BitsPerPixel(_In_ DXGI_FORMAT fmt) noexcept + { + switch (fmt) + { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + return 128; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + return 96; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_Y416: + case DXGI_FORMAT_Y210: + case DXGI_FORMAT_Y216: + return 64; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + case DXGI_FORMAT_AYUV: + case DXGI_FORMAT_Y410: + case DXGI_FORMAT_YUY2: +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) + case DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT: + case DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT: + case DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM: +#endif + return 32; + + case DXGI_FORMAT_P010: + case DXGI_FORMAT_P016: +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN10) + case DXGI_FORMAT_V408: +#endif +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) + case DXGI_FORMAT_D16_UNORM_S8_UINT: + case DXGI_FORMAT_R16_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X16_TYPELESS_G8_UINT: +#endif + return 24; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_A8P8: + case DXGI_FORMAT_B4G4R4A4_UNORM: +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN10) + case DXGI_FORMAT_P208: + case DXGI_FORMAT_V208: +#endif + return 16; + + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_420_OPAQUE: + case DXGI_FORMAT_NV11: + return 12; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: +#if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) + case DXGI_FORMAT_R4G4_UNORM: +#endif + return 8; + + case DXGI_FORMAT_R1_UNORM: + return 1; + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + return 4; + + case DXGI_FORMAT_UNKNOWN: + case DXGI_FORMAT_FORCE_UINT: + default: + return 0; + } + } + + //-------------------------------------------------------------------------------------- + inline DXGI_FORMAT MakeSRGB(_In_ DXGI_FORMAT format) noexcept + { + switch (format) + { + case DXGI_FORMAT_R8G8B8A8_UNORM: + return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + + case DXGI_FORMAT_BC1_UNORM: + return DXGI_FORMAT_BC1_UNORM_SRGB; + + case DXGI_FORMAT_BC2_UNORM: + return DXGI_FORMAT_BC2_UNORM_SRGB; + + case DXGI_FORMAT_BC3_UNORM: + return DXGI_FORMAT_BC3_UNORM_SRGB; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB; + + case DXGI_FORMAT_B8G8R8X8_UNORM: + return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB; + + case DXGI_FORMAT_BC7_UNORM: + return DXGI_FORMAT_BC7_UNORM_SRGB; + + default: + return format; + } + } + + //-------------------------------------------------------------------------------------- + inline bool IsCompressed(_In_ DXGI_FORMAT fmt) noexcept + { + switch (fmt) + { + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return true; + + default: + return false; + } + } + + //-------------------------------------------------------------------------------------- + inline DXGI_FORMAT EnsureNotTypeless(DXGI_FORMAT fmt) noexcept + { + // Assumes UNORM or FLOAT; doesn't use UINT or SINT + switch (fmt) + { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: return DXGI_FORMAT_R32G32B32A32_FLOAT; + case DXGI_FORMAT_R32G32B32_TYPELESS: return DXGI_FORMAT_R32G32B32_FLOAT; + case DXGI_FORMAT_R16G16B16A16_TYPELESS: return DXGI_FORMAT_R16G16B16A16_UNORM; + case DXGI_FORMAT_R32G32_TYPELESS: return DXGI_FORMAT_R32G32_FLOAT; + case DXGI_FORMAT_R10G10B10A2_TYPELESS: return DXGI_FORMAT_R10G10B10A2_UNORM; + case DXGI_FORMAT_R8G8B8A8_TYPELESS: return DXGI_FORMAT_R8G8B8A8_UNORM; + case DXGI_FORMAT_R16G16_TYPELESS: return DXGI_FORMAT_R16G16_UNORM; + case DXGI_FORMAT_R32_TYPELESS: return DXGI_FORMAT_R32_FLOAT; + case DXGI_FORMAT_R8G8_TYPELESS: return DXGI_FORMAT_R8G8_UNORM; + case DXGI_FORMAT_R16_TYPELESS: return DXGI_FORMAT_R16_UNORM; + case DXGI_FORMAT_R8_TYPELESS: return DXGI_FORMAT_R8_UNORM; + case DXGI_FORMAT_BC1_TYPELESS: return DXGI_FORMAT_BC1_UNORM; + case DXGI_FORMAT_BC2_TYPELESS: return DXGI_FORMAT_BC2_UNORM; + case DXGI_FORMAT_BC3_TYPELESS: return DXGI_FORMAT_BC3_UNORM; + case DXGI_FORMAT_BC4_TYPELESS: return DXGI_FORMAT_BC4_UNORM; + case DXGI_FORMAT_BC5_TYPELESS: return DXGI_FORMAT_BC5_UNORM; + case DXGI_FORMAT_B8G8R8A8_TYPELESS: return DXGI_FORMAT_B8G8R8A8_UNORM; + case DXGI_FORMAT_B8G8R8X8_TYPELESS: return DXGI_FORMAT_B8G8R8X8_UNORM; + case DXGI_FORMAT_BC7_TYPELESS: return DXGI_FORMAT_BC7_UNORM; + default: return fmt; + } + } + + //-------------------------------------------------------------------------------------- + inline HRESULT LoadTextureDataFromMemory( + _In_reads_(ddsDataSize) const uint8_t* ddsData, + size_t ddsDataSize, + const DDS_HEADER** header, + const uint8_t** bitData, + size_t* bitSize) noexcept + { + if (!header || !bitData || !bitSize) + { + return E_POINTER; + } + + if (ddsDataSize > UINT32_MAX) + { + return E_FAIL; + } + + if (ddsDataSize < (sizeof(uint32_t) + sizeof(DDS_HEADER))) + { + return E_FAIL; + } + + // DDS files always start with the same magic number ("DDS ") + auto dwMagicNumber = *reinterpret_cast(ddsData); + if (dwMagicNumber != DDS_MAGIC) + { + return E_FAIL; + } + + auto hdr = reinterpret_cast(ddsData + sizeof(uint32_t)); + + // Verify header to validate DDS file + if (hdr->size != sizeof(DDS_HEADER) || + hdr->ddspf.size != sizeof(DDS_PIXELFORMAT)) + { + return E_FAIL; + } + + // Check for DX10 extension + bool bDXT10Header = false; + if ((hdr->ddspf.flags & DDS_FOURCC) && + (MAKEFOURCC('D', 'X', '1', '0') == hdr->ddspf.fourCC)) + { + // Must be long enough for both headers and magic value + if (ddsDataSize < (sizeof(DDS_HEADER) + sizeof(uint32_t) + sizeof(DDS_HEADER_DXT10))) + { + return E_FAIL; + } + + bDXT10Header = true; + } + + // setup the pointers in the process request + *header = hdr; + auto offset = sizeof(uint32_t) + + sizeof(DDS_HEADER) + + (bDXT10Header ? sizeof(DDS_HEADER_DXT10) : 0u); + *bitData = ddsData + offset; + *bitSize = ddsDataSize - offset; + + return S_OK; + } + + //-------------------------------------------------------------------------------------- + inline HRESULT LoadTextureDataFromFile( + _In_z_ const wchar_t* fileName, + std::unique_ptr& ddsData, + const DDS_HEADER** header, + const uint8_t** bitData, + size_t* bitSize) noexcept + { + if (!header || !bitData || !bitSize) + { + return E_POINTER; + } + + // open the file + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + ScopedHandle hFile(safe_handle(CreateFile2(fileName, + GENERIC_READ, + FILE_SHARE_READ, + OPEN_EXISTING, + nullptr))); + #else + ScopedHandle hFile(safe_handle(CreateFileW(fileName, + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + nullptr))); + #endif + + if (!hFile) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // Get the file size + FILE_STANDARD_INFO fileInfo; + if (!GetFileInformationByHandleEx(hFile.get(), FileStandardInfo, &fileInfo, sizeof(fileInfo))) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // File is too big for 32-bit allocation, so reject read + if (fileInfo.EndOfFile.HighPart > 0) + { + return E_FAIL; + } + + // Need at least enough data to fill the header and magic number to be a valid DDS + if (fileInfo.EndOfFile.LowPart < (sizeof(uint32_t) + sizeof(DDS_HEADER))) + { + return E_FAIL; + } + + // create enough space for the file data + ddsData.reset(new (std::nothrow) uint8_t[fileInfo.EndOfFile.LowPart]); + if (!ddsData) + { + return E_OUTOFMEMORY; + } + + // read the data in + DWORD BytesRead = 0; + if (!ReadFile(hFile.get(), + ddsData.get(), + fileInfo.EndOfFile.LowPart, + &BytesRead, + nullptr + )) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (BytesRead < fileInfo.EndOfFile.LowPart) + { + return E_FAIL; + } + + // DDS files always start with the same magic number ("DDS ") + auto dwMagicNumber = *reinterpret_cast(ddsData.get()); + if (dwMagicNumber != DDS_MAGIC) + { + return E_FAIL; + } + + auto hdr = reinterpret_cast(ddsData.get() + sizeof(uint32_t)); + + // Verify header to validate DDS file + if (hdr->size != sizeof(DDS_HEADER) || + hdr->ddspf.size != sizeof(DDS_PIXELFORMAT)) + { + return E_FAIL; + } + + // Check for DX10 extension + bool bDXT10Header = false; + if ((hdr->ddspf.flags & DDS_FOURCC) && + (MAKEFOURCC('D', 'X', '1', '0') == hdr->ddspf.fourCC)) + { + // Must be long enough for both headers and magic value + if (fileInfo.EndOfFile.LowPart < (sizeof(DDS_HEADER) + sizeof(uint32_t) + sizeof(DDS_HEADER_DXT10))) + { + return E_FAIL; + } + + bDXT10Header = true; + } + + // setup the pointers in the process request + *header = hdr; + auto offset = sizeof(uint32_t) + sizeof(DDS_HEADER) + + (bDXT10Header ? sizeof(DDS_HEADER_DXT10) : 0u); + *bitData = ddsData.get() + offset; + *bitSize = fileInfo.EndOfFile.LowPart - offset; + + return S_OK; + } + + //-------------------------------------------------------------------------------------- + // Get surface information for a particular format + //-------------------------------------------------------------------------------------- + inline HRESULT GetSurfaceInfo( + _In_ size_t width, + _In_ size_t height, + _In_ DXGI_FORMAT fmt, + _Out_opt_ size_t* outNumBytes, + _Out_opt_ size_t* outRowBytes, + _Out_opt_ size_t* outNumRows) noexcept + { + uint64_t numBytes = 0; + uint64_t rowBytes = 0; + uint64_t numRows = 0; + + bool bc = false; + bool packed = false; + bool planar = false; + size_t bpe = 0; + switch (fmt) + { + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + bc = true; + bpe = 8; + break; + + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + bc = true; + bpe = 16; + break; + + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_YUY2: + packed = true; + bpe = 4; + break; + + case DXGI_FORMAT_Y210: + case DXGI_FORMAT_Y216: + packed = true; + bpe = 8; + break; + + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_420_OPAQUE: + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN10) + case DXGI_FORMAT_P208: + #endif + planar = true; + bpe = 2; + break; + + case DXGI_FORMAT_P010: + case DXGI_FORMAT_P016: + planar = true; + bpe = 4; + break; + + #if (defined(_XBOX_ONE) && defined(_TITLE)) || defined(_GAMING_XBOX) + + case DXGI_FORMAT_D16_UNORM_S8_UINT: + case DXGI_FORMAT_R16_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X16_TYPELESS_G8_UINT: + planar = true; + bpe = 4; + break; + + #endif + + default: + break; + } + + if (bc) + { + uint64_t numBlocksWide = 0; + if (width > 0) + { + numBlocksWide = std::max(1u, (uint64_t(width) + 3u) / 4u); + } + uint64_t numBlocksHigh = 0; + if (height > 0) + { + numBlocksHigh = std::max(1u, (uint64_t(height) + 3u) / 4u); + } + rowBytes = numBlocksWide * bpe; + numRows = numBlocksHigh; + numBytes = rowBytes * numBlocksHigh; + } + else if (packed) + { + rowBytes = ((uint64_t(width) + 1u) >> 1) * bpe; + numRows = uint64_t(height); + numBytes = rowBytes * height; + } + else if (fmt == DXGI_FORMAT_NV11) + { + rowBytes = ((uint64_t(width) + 3u) >> 2) * 4u; + numRows = uint64_t(height) * 2u; // Direct3D makes this simplifying assumption, although it is larger than the 4:1:1 data + numBytes = rowBytes * numRows; + } + else if (planar) + { + rowBytes = ((uint64_t(width) + 1u) >> 1) * bpe; + numBytes = (rowBytes * uint64_t(height)) + ((rowBytes * uint64_t(height) + 1u) >> 1); + numRows = height + ((uint64_t(height) + 1u) >> 1); + } + else + { + size_t bpp = BitsPerPixel(fmt); + if (!bpp) + return E_INVALIDARG; + + rowBytes = (uint64_t(width) * bpp + 7u) / 8u; // round up to nearest byte + numRows = uint64_t(height); + numBytes = rowBytes * height; + } + + #if defined(_M_IX86) || defined(_M_ARM) || defined(_M_HYBRID_X86_ARM64) + static_assert(sizeof(size_t) == 4, "Not a 32-bit platform!"); + if (numBytes > UINT32_MAX || rowBytes > UINT32_MAX || numRows > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + #else + static_assert(sizeof(size_t) == 8, "Not a 64-bit platform!"); + #endif + + if (outNumBytes) + { + *outNumBytes = static_cast(numBytes); + } + if (outRowBytes) + { + *outRowBytes = static_cast(rowBytes); + } + if (outNumRows) + { + *outNumRows = static_cast(numRows); + } + + return S_OK; + } + + //-------------------------------------------------------------------------------------- + #define ISBITMASK( r,g,b,a ) ( ddpf.RBitMask == r && ddpf.GBitMask == g && ddpf.BBitMask == b && ddpf.ABitMask == a ) + + inline DXGI_FORMAT GetDXGIFormat(const DDS_PIXELFORMAT& ddpf) noexcept + { + if (ddpf.flags & DDS_RGB) + { + // Note that sRGB formats are written using the "DX10" extended header + + switch (ddpf.RGBBitCount) + { + case 32: + if (ISBITMASK(0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000)) + { + return DXGI_FORMAT_R8G8B8A8_UNORM; + } + + if (ISBITMASK(0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000)) + { + return DXGI_FORMAT_B8G8R8A8_UNORM; + } + + if (ISBITMASK(0x00ff0000, 0x0000ff00, 0x000000ff, 0)) + { + return DXGI_FORMAT_B8G8R8X8_UNORM; + } + + // No DXGI format maps to ISBITMASK(0x000000ff,0x0000ff00,0x00ff0000,0) aka D3DFMT_X8B8G8R8 + + // Note that many common DDS reader/writers (including D3DX) swap the + // the RED/BLUE masks for 10:10:10:2 formats. We assume + // below that the 'backwards' header mask is being used since it is most + // likely written by D3DX. The more robust solution is to use the 'DX10' + // header extension and specify the DXGI_FORMAT_R10G10B10A2_UNORM format directly + + // For 'correct' writers, this should be 0x000003ff,0x000ffc00,0x3ff00000 for RGB data + if (ISBITMASK(0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000)) + { + return DXGI_FORMAT_R10G10B10A2_UNORM; + } + + // No DXGI format maps to ISBITMASK(0x000003ff,0x000ffc00,0x3ff00000,0xc0000000) aka D3DFMT_A2R10G10B10 + + if (ISBITMASK(0x0000ffff, 0xffff0000, 0, 0)) + { + return DXGI_FORMAT_R16G16_UNORM; + } + + if (ISBITMASK(0xffffffff, 0, 0, 0)) + { + // Only 32-bit color channel format in D3D9 was R32F + return DXGI_FORMAT_R32_FLOAT; // D3DX writes this out as a FourCC of 114 + } + break; + + case 24: + // No 24bpp DXGI formats aka D3DFMT_R8G8B8 + break; + + case 16: + if (ISBITMASK(0x7c00, 0x03e0, 0x001f, 0x8000)) + { + return DXGI_FORMAT_B5G5R5A1_UNORM; + } + if (ISBITMASK(0xf800, 0x07e0, 0x001f, 0)) + { + return DXGI_FORMAT_B5G6R5_UNORM; + } + + // No DXGI format maps to ISBITMASK(0x7c00,0x03e0,0x001f,0) aka D3DFMT_X1R5G5B5 + + if (ISBITMASK(0x0f00, 0x00f0, 0x000f, 0xf000)) + { + return DXGI_FORMAT_B4G4R4A4_UNORM; + } + + // No DXGI format maps to ISBITMASK(0x0f00,0x00f0,0x000f,0) aka D3DFMT_X4R4G4B4 + + // No 3:3:2, 3:3:2:8, or paletted DXGI formats aka D3DFMT_A8R3G3B2, D3DFMT_R3G3B2, D3DFMT_P8, D3DFMT_A8P8, etc. + break; + } + } + else if (ddpf.flags & DDS_LUMINANCE) + { + if (8 == ddpf.RGBBitCount) + { + if (ISBITMASK(0xff, 0, 0, 0)) + { + return DXGI_FORMAT_R8_UNORM; // D3DX10/11 writes this out as DX10 extension + } + + // No DXGI format maps to ISBITMASK(0x0f,0,0,0xf0) aka D3DFMT_A4L4 + + if (ISBITMASK(0x00ff, 0, 0, 0xff00)) + { + return DXGI_FORMAT_R8G8_UNORM; // Some DDS writers assume the bitcount should be 8 instead of 16 + } + } + + if (16 == ddpf.RGBBitCount) + { + if (ISBITMASK(0xffff, 0, 0, 0)) + { + return DXGI_FORMAT_R16_UNORM; // D3DX10/11 writes this out as DX10 extension + } + if (ISBITMASK(0x00ff, 0, 0, 0xff00)) + { + return DXGI_FORMAT_R8G8_UNORM; // D3DX10/11 writes this out as DX10 extension + } + } + } + else if (ddpf.flags & DDS_ALPHA) + { + if (8 == ddpf.RGBBitCount) + { + return DXGI_FORMAT_A8_UNORM; + } + } + else if (ddpf.flags & DDS_BUMPDUDV) + { + if (16 == ddpf.RGBBitCount) + { + if (ISBITMASK(0x00ff, 0xff00, 0, 0)) + { + return DXGI_FORMAT_R8G8_SNORM; // D3DX10/11 writes this out as DX10 extension + } + } + + if (32 == ddpf.RGBBitCount) + { + if (ISBITMASK(0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000)) + { + return DXGI_FORMAT_R8G8B8A8_SNORM; // D3DX10/11 writes this out as DX10 extension + } + if (ISBITMASK(0x0000ffff, 0xffff0000, 0, 0)) + { + return DXGI_FORMAT_R16G16_SNORM; // D3DX10/11 writes this out as DX10 extension + } + + // No DXGI format maps to ISBITMASK(0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000) aka D3DFMT_A2W10V10U10 + } + + // No DXGI format maps to DDPF_BUMPLUMINANCE aka D3DFMT_L6V5U5, D3DFMT_X8L8V8U8 + } + else if (ddpf.flags & DDS_FOURCC) + { + if (MAKEFOURCC('D', 'X', 'T', '1') == ddpf.fourCC) + { + return DXGI_FORMAT_BC1_UNORM; + } + if (MAKEFOURCC('D', 'X', 'T', '3') == ddpf.fourCC) + { + return DXGI_FORMAT_BC2_UNORM; + } + if (MAKEFOURCC('D', 'X', 'T', '5') == ddpf.fourCC) + { + return DXGI_FORMAT_BC3_UNORM; + } + + // While pre-multiplied alpha isn't directly supported by the DXGI formats, + // they are basically the same as these BC formats so they can be mapped + if (MAKEFOURCC('D', 'X', 'T', '2') == ddpf.fourCC) + { + return DXGI_FORMAT_BC2_UNORM; + } + if (MAKEFOURCC('D', 'X', 'T', '4') == ddpf.fourCC) + { + return DXGI_FORMAT_BC3_UNORM; + } + + if (MAKEFOURCC('A', 'T', 'I', '1') == ddpf.fourCC) + { + return DXGI_FORMAT_BC4_UNORM; + } + if (MAKEFOURCC('B', 'C', '4', 'U') == ddpf.fourCC) + { + return DXGI_FORMAT_BC4_UNORM; + } + if (MAKEFOURCC('B', 'C', '4', 'S') == ddpf.fourCC) + { + return DXGI_FORMAT_BC4_SNORM; + } + + if (MAKEFOURCC('A', 'T', 'I', '2') == ddpf.fourCC) + { + return DXGI_FORMAT_BC5_UNORM; + } + if (MAKEFOURCC('B', 'C', '5', 'U') == ddpf.fourCC) + { + return DXGI_FORMAT_BC5_UNORM; + } + if (MAKEFOURCC('B', 'C', '5', 'S') == ddpf.fourCC) + { + return DXGI_FORMAT_BC5_SNORM; + } + + // BC6H and BC7 are written using the "DX10" extended header + + if (MAKEFOURCC('R', 'G', 'B', 'G') == ddpf.fourCC) + { + return DXGI_FORMAT_R8G8_B8G8_UNORM; + } + if (MAKEFOURCC('G', 'R', 'G', 'B') == ddpf.fourCC) + { + return DXGI_FORMAT_G8R8_G8B8_UNORM; + } + + if (MAKEFOURCC('Y', 'U', 'Y', '2') == ddpf.fourCC) + { + return DXGI_FORMAT_YUY2; + } + + // Check for D3DFORMAT enums being set here + switch (ddpf.fourCC) + { + case 36: // D3DFMT_A16B16G16R16 + return DXGI_FORMAT_R16G16B16A16_UNORM; + + case 110: // D3DFMT_Q16W16V16U16 + return DXGI_FORMAT_R16G16B16A16_SNORM; + + case 111: // D3DFMT_R16F + return DXGI_FORMAT_R16_FLOAT; + + case 112: // D3DFMT_G16R16F + return DXGI_FORMAT_R16G16_FLOAT; + + case 113: // D3DFMT_A16B16G16R16F + return DXGI_FORMAT_R16G16B16A16_FLOAT; + + case 114: // D3DFMT_R32F + return DXGI_FORMAT_R32_FLOAT; + + case 115: // D3DFMT_G32R32F + return DXGI_FORMAT_R32G32_FLOAT; + + case 116: // D3DFMT_A32B32G32R32F + return DXGI_FORMAT_R32G32B32A32_FLOAT; + + // No DXGI format maps to D3DFMT_CxV8U8 + } + } + + return DXGI_FORMAT_UNKNOWN; + } + + #undef ISBITMASK + + //-------------------------------------------------------------------------------------- + inline DirectX::DDS_ALPHA_MODE GetAlphaMode(_In_ const DDS_HEADER* header) noexcept + { + if (header->ddspf.flags & DDS_FOURCC) + { + if (MAKEFOURCC('D', 'X', '1', '0') == header->ddspf.fourCC) + { + auto d3d10ext = reinterpret_cast(reinterpret_cast(header) + sizeof(DDS_HEADER)); + auto mode = static_cast(d3d10ext->miscFlags2 & DDS_MISC_FLAGS2_ALPHA_MODE_MASK); + switch (mode) + { + case DDS_ALPHA_MODE_STRAIGHT: + case DDS_ALPHA_MODE_PREMULTIPLIED: + case DDS_ALPHA_MODE_OPAQUE: + case DDS_ALPHA_MODE_CUSTOM: + return mode; + + case DDS_ALPHA_MODE_UNKNOWN: + default: + break; + } + } + else if ((MAKEFOURCC('D', 'X', 'T', '2') == header->ddspf.fourCC) + || (MAKEFOURCC('D', 'X', 'T', '4') == header->ddspf.fourCC)) + { + return DDS_ALPHA_MODE_PREMULTIPLIED; + } + } + + return DDS_ALPHA_MODE_UNKNOWN; + } + + //-------------------------------------------------------------------------------------- + class auto_delete_file + { + public: + auto_delete_file(HANDLE hFile) noexcept : m_handle(hFile) {} + + auto_delete_file(const auto_delete_file&) = delete; + auto_delete_file& operator=(const auto_delete_file&) = delete; + + auto_delete_file(const auto_delete_file&&) = delete; + auto_delete_file& operator=(const auto_delete_file&&) = delete; + + ~auto_delete_file() + { + if (m_handle) + { + FILE_DISPOSITION_INFO info = {}; + info.DeleteFile = TRUE; + (void)SetFileInformationByHandle(m_handle, FileDispositionInfo, &info, sizeof(info)); + } + } + + void clear() noexcept { m_handle = nullptr; } + + private: + HANDLE m_handle; + }; + + class auto_delete_file_wic + { + public: + auto_delete_file_wic(Microsoft::WRL::ComPtr& hFile, LPCWSTR szFile) noexcept : m_filename(szFile), m_handle(hFile) {} + + auto_delete_file_wic(const auto_delete_file_wic&) = delete; + auto_delete_file_wic& operator=(const auto_delete_file_wic&) = delete; + + auto_delete_file_wic(const auto_delete_file_wic&&) = delete; + auto_delete_file_wic& operator=(const auto_delete_file_wic&&) = delete; + + ~auto_delete_file_wic() + { + if (m_filename) + { + m_handle.Reset(); + DeleteFileW(m_filename); + } + } + + void clear() noexcept { m_filename = nullptr; } + + private: + LPCWSTR m_filename; + Microsoft::WRL::ComPtr& m_handle; + }; + + inline uint32_t CountMips(uint32_t width, uint32_t height) noexcept + { + if (width == 0 || height == 0) + return 0; + + uint32_t count = 1; + while (width > 1 || height > 1) + { + width >>= 1; + height >>= 1; + count++; + } + return count; + } + + inline void FitPowerOf2(UINT origx, UINT origy, UINT& targetx, UINT& targety, size_t maxsize) + { + float origAR = float(origx) / float(origy); + + if (origx > origy) + { + size_t x; + for (x = maxsize; x > 1; x >>= 1) { if (x <= targetx) break; } + targetx = UINT(x); + + float bestScore = FLT_MAX; + for (size_t y = maxsize; y > 0; y >>= 1) + { + float score = fabsf((float(x) / float(y)) - origAR); + if (score < bestScore) + { + bestScore = score; + targety = UINT(y); + } + } + } + else + { + size_t y; + for (y = maxsize; y > 1; y >>= 1) { if (y <= targety) break; } + targety = UINT(y); + + float bestScore = FLT_MAX; + for (size_t x = maxsize; x > 0; x >>= 1) + { + float score = fabsf((float(x) / float(y)) - origAR); + if (score < bestScore) + { + bestScore = score; + targetx = UINT(x); + } + } + } + } + } +} diff --git a/Sdk/External/DirectXTK/Src/Model.cpp b/Sdk/External/DirectXTK/Src/Model.cpp new file mode 100644 index 0000000..d56d518 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Model.cpp @@ -0,0 +1,327 @@ +//-------------------------------------------------------------------------------------- +// File: Model.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Model.h" +#include "CommonStates.h" +#include "DirectXHelpers.h" +#include "Effects.h" +#include "PlatformHelpers.h" + +using namespace DirectX; + +#ifndef _CPPRTTI +#error Model requires RTTI +#endif + +//-------------------------------------------------------------------------------------- +// ModelMeshPart +//-------------------------------------------------------------------------------------- + +ModelMeshPart::ModelMeshPart() noexcept : + indexCount(0), + startIndex(0), + vertexOffset(0), + vertexStride(0), + primitiveType(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST), + indexFormat(DXGI_FORMAT_R16_UINT), + isAlpha(false) +{ +} + + +ModelMeshPart::~ModelMeshPart() +{ +} + + +_Use_decl_annotations_ +void ModelMeshPart::Draw( + ID3D11DeviceContext* deviceContext, + IEffect* ieffect, + ID3D11InputLayout* iinputLayout, + std::function setCustomState) const +{ + deviceContext->IASetInputLayout(iinputLayout); + + auto vb = vertexBuffer.Get(); + UINT vbStride = vertexStride; + UINT vbOffset = 0; + deviceContext->IASetVertexBuffers(0, 1, &vb, &vbStride, &vbOffset); + + // Note that if indexFormat is DXGI_FORMAT_R32_UINT, this model mesh part requires a Feature Level 9.2 or greater device + deviceContext->IASetIndexBuffer(indexBuffer.Get(), indexFormat, 0); + + assert(ieffect != nullptr); + ieffect->Apply(deviceContext); + + // Hook lets the caller replace our shaders or state settings with whatever else they see fit. + if (setCustomState) + { + setCustomState(); + } + + // Draw the primitive. + deviceContext->IASetPrimitiveTopology(primitiveType); + + deviceContext->DrawIndexed(indexCount, startIndex, vertexOffset); +} + + +_Use_decl_annotations_ +void ModelMeshPart::DrawInstanced( + ID3D11DeviceContext* deviceContext, + IEffect* ieffect, + ID3D11InputLayout* iinputLayout, + uint32_t instanceCount, uint32_t startInstanceLocation, + std::function setCustomState) const +{ + deviceContext->IASetInputLayout(iinputLayout); + + auto vb = vertexBuffer.Get(); + UINT vbStride = vertexStride; + UINT vbOffset = 0; + deviceContext->IASetVertexBuffers(0, 1, &vb, &vbStride, &vbOffset); + + // Note that if indexFormat is DXGI_FORMAT_R32_UINT, this model mesh part requires a Feature Level 9.2 or greater device + deviceContext->IASetIndexBuffer(indexBuffer.Get(), indexFormat, 0); + + assert(ieffect != nullptr); + ieffect->Apply(deviceContext); + + // Hook lets the caller replace our shaders or state settings with whatever else they see fit. + if (setCustomState) + { + setCustomState(); + } + + // Draw the primitive. + deviceContext->IASetPrimitiveTopology(primitiveType); + + deviceContext->DrawIndexedInstanced( + indexCount, instanceCount, startIndex, + vertexOffset, + startInstanceLocation); +} + + +_Use_decl_annotations_ +void ModelMeshPart::CreateInputLayout(ID3D11Device* d3dDevice, IEffect* ieffect, ID3D11InputLayout** iinputLayout) const +{ + if (iinputLayout) + { + *iinputLayout = nullptr; + } + + if (!vbDecl || vbDecl->empty()) + throw std::exception("Model mesh part missing vertex buffer input elements data"); + + if (vbDecl->size() > D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT) + throw std::exception("Model mesh part input layout size is too large for DirectX 11"); + + ThrowIfFailed( + CreateInputLayoutFromEffect(d3dDevice, ieffect, vbDecl->data(), vbDecl->size(), iinputLayout) + ); + + assert(iinputLayout != nullptr && *iinputLayout != nullptr); + _Analysis_assume_(iinputLayout != nullptr && *iinputLayout != nullptr); +} + + +_Use_decl_annotations_ +void ModelMeshPart::ModifyEffect(ID3D11Device* d3dDevice, std::shared_ptr& ieffect, bool isalpha) +{ + if (!vbDecl || vbDecl->empty()) + throw std::exception("Model mesh part missing vertex buffer input elements data"); + + if (vbDecl->size() > D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT) + throw std::exception("Model mesh part input layout size is too large for DirectX 11"); + + assert(ieffect != nullptr); + this->effect = ieffect; + this->isAlpha = isalpha; + + assert(d3dDevice != nullptr); + + ThrowIfFailed( + CreateInputLayoutFromEffect(d3dDevice, effect.get(), vbDecl->data(), vbDecl->size(), inputLayout.ReleaseAndGetAddressOf()) + ); +} + + +//-------------------------------------------------------------------------------------- +// ModelMesh +//-------------------------------------------------------------------------------------- + +ModelMesh::ModelMesh() noexcept : + ccw(true), + pmalpha(true) +{ +} + + +ModelMesh::~ModelMesh() +{ +} + + +_Use_decl_annotations_ +void ModelMesh::PrepareForRendering( + ID3D11DeviceContext* deviceContext, + const CommonStates& states, + bool alpha, + bool wireframe) const +{ + assert(deviceContext != nullptr); + + // Set the blend and depth stencil state. + ID3D11BlendState* blendState; + ID3D11DepthStencilState* depthStencilState; + + if (alpha) + { + if (pmalpha) + { + blendState = states.AlphaBlend(); + depthStencilState = states.DepthRead(); + } + else + { + blendState = states.NonPremultiplied(); + depthStencilState = states.DepthRead(); + } + } + else + { + blendState = states.Opaque(); + depthStencilState = states.DepthDefault(); + } + + deviceContext->OMSetBlendState(blendState, nullptr, 0xFFFFFFFF); + deviceContext->OMSetDepthStencilState(depthStencilState, 0); + + // Set the rasterizer state. + if (wireframe) + deviceContext->RSSetState(states.Wireframe()); + else + deviceContext->RSSetState(ccw ? states.CullCounterClockwise() : states.CullClockwise()); + + // Set sampler state. + ID3D11SamplerState* samplers[] = + { + states.LinearWrap(), + states.LinearWrap(), + }; + + deviceContext->PSSetSamplers(0, 2, samplers); +} + + +_Use_decl_annotations_ +void XM_CALLCONV ModelMesh::Draw( + ID3D11DeviceContext* deviceContext, + FXMMATRIX world, + CXMMATRIX view, + CXMMATRIX projection, + bool alpha, + std::function setCustomState) const +{ + assert(deviceContext != nullptr); + + for (auto it = meshParts.cbegin(); it != meshParts.cend(); ++it) + { + auto part = (*it).get(); + assert(part != nullptr); + + if (part->isAlpha != alpha) + { + // Skip alpha parts when drawing opaque or skip opaque parts if drawing alpha + continue; + } + + auto imatrices = dynamic_cast(part->effect.get()); + if (imatrices) + { + imatrices->SetMatrices(world, view, projection); + } + + part->Draw(deviceContext, part->effect.get(), part->inputLayout.Get(), setCustomState); + } +} + + +//-------------------------------------------------------------------------------------- +// Model +//-------------------------------------------------------------------------------------- + +Model::~Model() +{ +} + + +_Use_decl_annotations_ +void XM_CALLCONV Model::Draw( + ID3D11DeviceContext* deviceContext, + const CommonStates& states, + FXMMATRIX world, + CXMMATRIX view, + CXMMATRIX projection, + bool wireframe, std::function setCustomState) const +{ + assert(deviceContext != nullptr); + + // Draw opaque parts + for (auto it = meshes.cbegin(); it != meshes.cend(); ++it) + { + auto mesh = it->get(); + assert(mesh != nullptr); + + mesh->PrepareForRendering(deviceContext, states, false, wireframe); + + mesh->Draw(deviceContext, world, view, projection, false, setCustomState); + } + + // Draw alpha parts + for (auto it = meshes.cbegin(); it != meshes.cend(); ++it) + { + auto mesh = it->get(); + assert(mesh != nullptr); + + mesh->PrepareForRendering(deviceContext, states, true, wireframe); + + mesh->Draw(deviceContext, world, view, projection, true, setCustomState); + } +} + + +void Model::UpdateEffects(_In_ std::function setEffect) +{ + if (mEffectCache.empty()) + { + // This cache ensures we only set each effect once (could be shared) + for (auto mit = meshes.cbegin(); mit != meshes.cend(); ++mit) + { + auto mesh = mit->get(); + assert(mesh != nullptr); + + for (auto it = mesh->meshParts.cbegin(); it != mesh->meshParts.cend(); ++it) + { + if ((*it)->effect) + mEffectCache.insert((*it)->effect.get()); + } + } + } + + assert(setEffect != nullptr); + + for (auto it = mEffectCache.begin(); it != mEffectCache.end(); ++it) + { + setEffect(*it); + } +} diff --git a/Sdk/External/DirectXTK/Src/ModelLoadCMO.cpp b/Sdk/External/DirectXTK/Src/ModelLoadCMO.cpp new file mode 100644 index 0000000..9fed745 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/ModelLoadCMO.cpp @@ -0,0 +1,914 @@ +//-------------------------------------------------------------------------------------- +// File: ModelLoadCMO.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Model.h" +#include "DDSTextureLoader.h" +#include "DirectXHelpers.h" +#include "Effects.h" +#include "VertexTypes.h" +#include "BinaryReader.h" +#include "PlatformHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +//-------------------------------------------------------------------------------------- +// .CMO files are built by Visual Studio 2012 and an example renderer is provided +// in the VS Direct3D Starter Kit +// http://code.msdn.microsoft.com/Visual-Studio-3D-Starter-455a15f1 +//-------------------------------------------------------------------------------------- + +namespace VSD3DStarter +{ + // .CMO files + + // UINT - Mesh count + // { [Mesh count] + // UINT - Length of name + // wchar_t[] - Name of mesh (if length > 0) + // UINT - Material count + // { [Material count] + // UINT - Length of material name + // wchar_t[] - Name of material (if length > 0) + // Material structure + // UINT - Length of pixel shader name + // wchar_t[] - Name of pixel shader (if length > 0) + // { [8] + // UINT - Length of texture name + // wchar_t[] - Name of texture (if length > 0) + // } + // } + // BYTE - 1 if there is skeletal animation data present + // UINT - SubMesh count + // { [SubMesh count] + // SubMesh structure + // } + // UINT - IB Count + // { [IB Count] + // UINT - Number of USHORTs in IB + // USHORT[] - Array of indices + // } + // UINT - VB Count + // { [VB Count] + // UINT - Number of verts in VB + // Vertex[] - Array of vertices + // } + // UINT - Skinning VB Count + // { [Skinning VB Count] + // UINT - Number of verts in Skinning VB + // SkinningVertex[] - Array of skinning verts + // } + // MeshExtents structure + // [If skeleton animation data is not present, file ends here] + // UINT - Bone count + // { [Bone count] + // UINT - Length of bone name + // wchar_t[] - Bone name (if length > 0) + // Bone structure + // } + // UINT - Animation clip count + // { [Animation clip count] + // UINT - Length of clip name + // wchar_t[] - Clip name (if length > 0) + // float - Start time + // float - End time + // UINT - Keyframe count + // { [Keyframe count] + // Keyframe structure + // } + // } + // } + +#pragma pack(push,1) + + struct Material + { + DirectX::XMFLOAT4 Ambient; + DirectX::XMFLOAT4 Diffuse; + DirectX::XMFLOAT4 Specular; + float SpecularPower; + DirectX::XMFLOAT4 Emissive; + DirectX::XMFLOAT4X4 UVTransform; + }; + + const uint32_t MAX_TEXTURE = 8; + + struct SubMesh + { + UINT MaterialIndex; + UINT IndexBufferIndex; + UINT VertexBufferIndex; + UINT StartIndex; + UINT PrimCount; + }; + + const uint32_t NUM_BONE_INFLUENCES = 4; + + static_assert(sizeof(VertexPositionNormalTangentColorTexture) == 52, "mismatch with CMO vertex type"); + + struct SkinningVertex + { + UINT boneIndex[NUM_BONE_INFLUENCES]; + float boneWeight[NUM_BONE_INFLUENCES]; + }; + + struct MeshExtents + { + float CenterX, CenterY, CenterZ; + float Radius; + + float MinX, MinY, MinZ; + float MaxX, MaxY, MaxZ; + }; + + struct Bone + { + INT ParentIndex; + DirectX::XMFLOAT4X4 InvBindPos; + DirectX::XMFLOAT4X4 BindPos; + DirectX::XMFLOAT4X4 LocalTransform; + }; + + struct Clip + { + float StartTime; + float EndTime; + UINT keys; + }; + + struct Keyframe + { + UINT BoneIndex; + float Time; + DirectX::XMFLOAT4X4 Transform; + }; + +#pragma pack(pop) + + const Material s_defMaterial = + { + { 0.2f, 0.2f, 0.2f, 1.f }, + { 0.8f, 0.8f, 0.8f, 1.f }, + { 0.0f, 0.0f, 0.0f, 1.f }, + 1.f, + { 0.0f, 0.0f, 0.0f, 1.0f }, + { 1.f, 0.f, 0.f, 0.f, + 0.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f }, + }; +} // namespace + +static_assert(sizeof(VSD3DStarter::Material) == 132, "CMO Mesh structure size incorrect"); +static_assert(sizeof(VSD3DStarter::SubMesh) == 20, "CMO Mesh structure size incorrect"); +static_assert(sizeof(VSD3DStarter::SkinningVertex) == 32, "CMO Mesh structure size incorrect"); +static_assert(sizeof(VSD3DStarter::MeshExtents) == 40, "CMO Mesh structure size incorrect"); +static_assert(sizeof(VSD3DStarter::Bone) == 196, "CMO Mesh structure size incorrect"); +static_assert(sizeof(VSD3DStarter::Clip) == 12, "CMO Mesh structure size incorrect"); +static_assert(sizeof(VSD3DStarter::Keyframe) == 72, "CMO Mesh structure size incorrect"); + +namespace +{ + //---------------------------------------------------------------------------------- + struct MaterialRecordCMO + { + const VSD3DStarter::Material* pMaterial; + std::wstring name; + std::wstring pixelShader; + std::wstring texture[VSD3DStarter::MAX_TEXTURE]; + std::shared_ptr effect; + ComPtr il; + + MaterialRecordCMO() noexcept : + pMaterial(nullptr), + texture{} {} + }; + + // Helper for creating a D3D input layout. + void CreateCMOInputLayout(_In_ ID3D11Device* device, _In_ IEffect* effect, _Outptr_ ID3D11InputLayout** pInputLayout, bool skinning) + { + if (skinning) + { + ThrowIfFailed( + CreateInputLayoutFromEffect(device, effect, pInputLayout) + ); + } + else + { + ThrowIfFailed( + CreateInputLayoutFromEffect(device, effect, pInputLayout) + ); + } + + assert(pInputLayout != nullptr && *pInputLayout != nullptr); + _Analysis_assume_(pInputLayout != nullptr && *pInputLayout != nullptr); + + SetDebugObjectName(*pInputLayout, "ModelCMO"); + } + + // Shared VB input element description + INIT_ONCE g_InitOnce = INIT_ONCE_STATIC_INIT; + std::shared_ptr> g_vbdecl; + std::shared_ptr> g_vbdeclSkinning; + + BOOL CALLBACK InitializeDecl(PINIT_ONCE initOnce, PVOID Parameter, PVOID *lpContext) + { + UNREFERENCED_PARAMETER(initOnce); + UNREFERENCED_PARAMETER(Parameter); + UNREFERENCED_PARAMETER(lpContext); + + g_vbdecl = std::make_shared>( + VertexPositionNormalTangentColorTexture::InputElements, + VertexPositionNormalTangentColorTexture::InputElements + VertexPositionNormalTangentColorTexture::InputElementCount); + + g_vbdeclSkinning = std::make_shared>( + VertexPositionNormalTangentColorTextureSkinning::InputElements, + VertexPositionNormalTangentColorTextureSkinning::InputElements + VertexPositionNormalTangentColorTextureSkinning::InputElementCount); + return TRUE; + } + + inline XMFLOAT3 GetMaterialColor(float r, float g, float b, bool srgb) + { + if (srgb) + { + XMVECTOR v = XMVectorSet(r, g, b, 1.f); + v = XMColorSRGBToRGB(v); + + XMFLOAT3 result; + XMStoreFloat3(&result, v); + return result; + } + else + { + return XMFLOAT3(r, g, b); + } + } +} + + +//====================================================================================== +// Model Loader +//====================================================================================== + +_Use_decl_annotations_ +std::unique_ptr DirectX::Model::CreateFromCMO( + ID3D11Device* device, + const uint8_t* meshData, size_t dataSize, + IEffectFactory& fxFactory, + ModelLoaderFlags flags) +{ + if (!InitOnceExecuteOnce(&g_InitOnce, InitializeDecl, nullptr, nullptr)) + throw std::exception("One-time initialization failed"); + + if (!device || !meshData) + throw std::exception("Device and meshData cannot be null"); + + auto fxFactoryDGSL = dynamic_cast(&fxFactory); + + // Meshes + auto nMesh = reinterpret_cast(meshData); + size_t usedSize = sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nMesh) + throw std::exception("No meshes found"); + + auto model = std::make_unique(); + + for (UINT meshIndex = 0; meshIndex < *nMesh; ++meshIndex) + { + // Mesh name + auto nName = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto meshName = reinterpret_cast(meshData + usedSize); + + usedSize += sizeof(wchar_t)*(*nName); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto mesh = std::make_shared(); + mesh->name.assign(meshName, *nName); + mesh->ccw = (flags & ModelLoader_CounterClockwise) != 0; + mesh->pmalpha = (flags & ModelLoader_PremultipledAlpha) != 0; + + // Materials + auto nMats = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + std::vector materials; + materials.reserve(*nMats); + for (UINT j = 0; j < *nMats; ++j) + { + MaterialRecordCMO m; + + // Material name + nName = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto matName = reinterpret_cast(meshData + usedSize); + + usedSize += sizeof(wchar_t)*(*nName); + if (dataSize < usedSize) + throw std::exception("End of file"); + + m.name.assign(matName, *nName); + + // Material settings + auto matSetting = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(VSD3DStarter::Material); + if (dataSize < usedSize) + throw std::exception("End of file"); + + m.pMaterial = matSetting; + + // Pixel shader name + nName = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto psName = reinterpret_cast(meshData + usedSize); + + usedSize += sizeof(wchar_t)*(*nName); + if (dataSize < usedSize) + throw std::exception("End of file"); + + m.pixelShader.assign(psName, *nName); + + for (UINT t = 0; t < VSD3DStarter::MAX_TEXTURE; ++t) + { + nName = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto txtName = reinterpret_cast(meshData + usedSize); + + usedSize += sizeof(wchar_t)*(*nName); + if (dataSize < usedSize) + throw std::exception("End of file"); + + m.texture[t].assign(txtName, *nName); + } + + materials.emplace_back(m); + } + + assert(materials.size() == *nMats); + + if (materials.empty()) + { + // Add default material if none defined + MaterialRecordCMO m; + m.pMaterial = &VSD3DStarter::s_defMaterial; + m.name = L"Default"; + materials.emplace_back(m); + } + + // Skeletal data? + const BYTE* bSkeleton = meshData + usedSize; + usedSize += sizeof(BYTE); + if (dataSize < usedSize) + throw std::exception("End of file"); + + // Submeshes + auto nSubmesh = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nSubmesh) + throw std::exception("No submeshes found\n"); + + auto subMesh = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(VSD3DStarter::SubMesh) * (*nSubmesh); + if (dataSize < usedSize) + throw std::exception("End of file"); + + // Index buffers + auto nIBs = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nIBs) + throw std::exception("No index buffers found\n"); + + struct IBData + { + size_t nIndices; + const USHORT* ptr; + }; + + std::vector ibData; + ibData.reserve(*nIBs); + + std::vector> ibs; + ibs.resize(*nIBs); + + for (UINT j = 0; j < *nIBs; ++j) + { + auto nIndexes = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nIndexes) + throw std::exception("Empty index buffer found\n"); + + uint64_t sizeInBytes = uint64_t(*(nIndexes)) * sizeof(USHORT); + + if (sizeInBytes > UINT32_MAX) + throw std::exception("IB too large"); + + if (!(flags & ModelLoader_AllowLargeModels)) + { + if (sizeInBytes > (D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u)) + throw std::exception("IB too large for DirectX 11"); + } + + auto ibBytes = static_cast(sizeInBytes); + + auto indexes = reinterpret_cast(meshData + usedSize); + usedSize += ibBytes; + if (dataSize < usedSize) + throw std::exception("End of file"); + + IBData ib; + ib.nIndices = *nIndexes; + ib.ptr = indexes; + ibData.emplace_back(ib); + + D3D11_BUFFER_DESC desc = {}; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ByteWidth = static_cast(ibBytes); + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + + D3D11_SUBRESOURCE_DATA initData = { indexes, 0, 0 }; + + ThrowIfFailed( + device->CreateBuffer(&desc, &initData, &ibs[j]) + ); + + SetDebugObjectName(ibs[j].Get(), "ModelCMO"); + } + + assert(ibData.size() == *nIBs); + assert(ibs.size() == *nIBs); + + // Vertex buffers + auto nVBs = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nVBs) + throw std::exception("No vertex buffers found\n"); + + struct VBData + { + size_t nVerts; + const VertexPositionNormalTangentColorTexture* ptr; + const VSD3DStarter::SkinningVertex* skinPtr; + }; + + std::vector vbData; + vbData.reserve(*nVBs); + for (UINT j = 0; j < *nVBs; ++j) + { + auto nVerts = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nVerts) + throw std::exception("Empty vertex buffer found\n"); + + size_t vbBytes = sizeof(VertexPositionNormalTangentColorTexture) * (*(nVerts)); + + auto verts = reinterpret_cast(meshData + usedSize); + usedSize += vbBytes; + if (dataSize < usedSize) + throw std::exception("End of file"); + + VBData vb; + vb.nVerts = *nVerts; + vb.ptr = verts; + vb.skinPtr = nullptr; + vbData.emplace_back(vb); + } + + assert(vbData.size() == *nVBs); + + // Skinning vertex buffers + auto nSkinVBs = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (*nSkinVBs) + { + if (*nSkinVBs != *nVBs) + throw std::exception("Number of VBs not equal to number of skin VBs"); + + for (UINT j = 0; j < *nSkinVBs; ++j) + { + auto nVerts = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nVerts) + throw std::exception("Empty skinning vertex buffer found\n"); + + if (vbData[j].nVerts != *nVerts) + throw std::exception("Mismatched number of verts for skin VBs"); + + size_t vbBytes = sizeof(VSD3DStarter::SkinningVertex) * (*(nVerts)); + + auto verts = reinterpret_cast(meshData + usedSize); + usedSize += vbBytes; + if (dataSize < usedSize) + throw std::exception("End of file"); + + vbData[j].skinPtr = verts; + } + } + + // Extents + auto extents = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(VSD3DStarter::MeshExtents); + if (dataSize < usedSize) + throw std::exception("End of file"); + + mesh->boundingSphere.Center.x = extents->CenterX; + mesh->boundingSphere.Center.y = extents->CenterY; + mesh->boundingSphere.Center.z = extents->CenterZ; + mesh->boundingSphere.Radius = extents->Radius; + + XMVECTOR min = XMVectorSet(extents->MinX, extents->MinY, extents->MinZ, 0.f); + XMVECTOR max = XMVectorSet(extents->MaxX, extents->MaxY, extents->MaxZ, 0.f); + BoundingBox::CreateFromPoints(mesh->boundingBox, min, max); + + #if 0 + // Animation data + if (*bSkeleton) + { + // Bones + auto nBones = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!*nBones) + throw std::exception("Animation bone data is missing\n"); + + for (UINT j = 0; j < *nBones; ++j) + { + // Bone name + nName = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto boneName = reinterpret_cast(meshData + usedSize); + + usedSize += sizeof(wchar_t)*(*nName); + if (dataSize < usedSize) + throw std::exception("End of file"); + + // TODO - What to do with bone name? + boneName; + + // Bone settings + auto bones = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(VSD3DStarter::Bone); + if (dataSize < usedSize) + throw std::exception("End of file"); + + // TODO - What to do with bone data? + bones; + } + + // Animation Clips + auto nClips = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + for (UINT j = 0; j < *nClips; ++j) + { + // Clip name + nName = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(UINT); + if (dataSize < usedSize) + throw std::exception("End of file"); + + auto clipName = reinterpret_cast(meshData + usedSize); + + usedSize += sizeof(wchar_t)*(*nName); + if (dataSize < usedSize) + throw std::exception("End of file"); + + // TODO - What to do with clip name? + clipName; + + auto clip = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(VSD3DStarter::Clip); + if (dataSize < usedSize) + throw std::exception("End of file"); + + if (!clip->keys) + throw std::exception("Keyframes missing in clip"); + + auto keys = reinterpret_cast(meshData + usedSize); + usedSize += sizeof(VSD3DStarter::Keyframe) * clip->keys; + if (dataSize < usedSize) + throw std::exception("End of file"); + + // TODO - What to do with keys and clip->StartTime, clip->EndTime? + keys; + } + } + #else + UNREFERENCED_PARAMETER(bSkeleton); + #endif + + bool enableSkinning = (*nSkinVBs) != 0; + + // Build vertex buffers + std::vector> vbs; + vbs.resize(*nVBs); + + const size_t stride = enableSkinning ? sizeof(VertexPositionNormalTangentColorTextureSkinning) + : sizeof(VertexPositionNormalTangentColorTexture); + + for (UINT j = 0; j < *nVBs; ++j) + { + size_t nVerts = vbData[j].nVerts; + + uint64_t sizeInBytes = uint64_t(stride) * uint64_t(nVerts); + + if (sizeInBytes > UINT32_MAX) + throw std::exception("VB too large"); + + if (!(flags & ModelLoader_AllowLargeModels)) + { + if (sizeInBytes > uint64_t(D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u)) + throw std::exception("VB too large for DirectX 11"); + } + + size_t bytes = static_cast(sizeInBytes); + + D3D11_BUFFER_DESC desc = {}; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ByteWidth = static_cast(bytes); + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + + if (fxFactoryDGSL && !enableSkinning) + { + // Can use CMO vertex data directly + D3D11_SUBRESOURCE_DATA initData = { vbData[j].ptr, 0, 0 }; + + ThrowIfFailed( + device->CreateBuffer(&desc, &initData, &vbs[j]) + ); + } + else + { + auto temp = std::make_unique(bytes + (sizeof(UINT) * nVerts)); + + auto visited = reinterpret_cast(temp.get() + bytes); + memset(visited, 0xff, sizeof(UINT) * nVerts); + + assert(vbData[j].ptr != nullptr); + + if (enableSkinning) + { + // Combine CMO multi-stream data into a single stream + auto skinptr = vbData[j].skinPtr; + assert(skinptr != nullptr); + + uint8_t* ptr = temp.get(); + + auto sptr = vbData[j].ptr; + + for (size_t v = 0; v < nVerts; ++v) + { + *reinterpret_cast(ptr) = sptr[v]; + + auto skinv = reinterpret_cast(ptr); + skinv->SetBlendIndices(*reinterpret_cast(skinptr[v].boneIndex)); + skinv->SetBlendWeights(*reinterpret_cast(skinptr[v].boneWeight)); + + ptr += stride; + } + } + else + { + memcpy(temp.get(), vbData[j].ptr, bytes); + } + + if (!fxFactoryDGSL) + { + // Need to fix up VB tex coords for UV transform which is not supported by basic effects + for (UINT k = 0; k < *nSubmesh; ++k) + { + auto& sm = subMesh[k]; + + if (sm.VertexBufferIndex != j) + continue; + + if ((sm.IndexBufferIndex >= *nIBs) + || (sm.MaterialIndex >= materials.size())) + throw std::exception("Invalid submesh found\n"); + + XMMATRIX uvTransform = XMLoadFloat4x4(&materials[sm.MaterialIndex].pMaterial->UVTransform); + + auto ib = ibData[sm.IndexBufferIndex].ptr; + + size_t count = ibData[sm.IndexBufferIndex].nIndices; + + for (size_t q = 0; q < count; ++q) + { + size_t v = ib[q]; + + if (v >= nVerts) + throw std::exception("Invalid index found\n"); + + auto verts = reinterpret_cast(temp.get() + (v * stride)); + if (visited[v] == UINT(-1)) + { + visited[v] = sm.MaterialIndex; + + XMVECTOR t = XMLoadFloat2(&verts->textureCoordinate); + + t = XMVectorSelect(g_XMIdentityR3, t, g_XMSelect1110); + + t = XMVector4Transform(t, uvTransform); + + XMStoreFloat2(&verts->textureCoordinate, t); + } + else if (visited[v] != sm.MaterialIndex) + { + #ifdef _DEBUG + XMMATRIX uv2 = XMLoadFloat4x4(&materials[visited[v]].pMaterial->UVTransform); + + if (XMVector4NotEqual(uvTransform.r[0], uv2.r[0]) + || XMVector4NotEqual(uvTransform.r[1], uv2.r[1]) + || XMVector4NotEqual(uvTransform.r[2], uv2.r[2]) + || XMVector4NotEqual(uvTransform.r[3], uv2.r[3])) + { + DebugTrace("WARNING: %ls - mismatched UV transforms for the same vertex; texture coordinates may not be correct\n", mesh->name.c_str()); + } + #endif + } + } + } + } + + // Create vertex buffer from temporary buffer + D3D11_SUBRESOURCE_DATA initData = { temp.get(), 0, 0 }; + + ThrowIfFailed( + device->CreateBuffer(&desc, &initData, &vbs[j]) + ); + } + + SetDebugObjectName(vbs[j].Get(), "ModelCMO"); + } + + assert(vbs.size() == *nVBs); + + // Create Effects + bool srgb = (flags & ModelLoader_MaterialColorsSRGB) != 0; + + for (size_t j = 0; j < materials.size(); ++j) + { + auto& m = materials[j]; + + if (fxFactoryDGSL) + { + DGSLEffectFactory::DGSLEffectInfo info; + info.name = m.name.c_str(); + info.specularPower = m.pMaterial->SpecularPower; + info.perVertexColor = true; + info.enableSkinning = enableSkinning; + info.alpha = m.pMaterial->Diffuse.w; + info.ambientColor = GetMaterialColor(m.pMaterial->Ambient.x, m.pMaterial->Ambient.y, m.pMaterial->Ambient.z, srgb); + info.diffuseColor = GetMaterialColor(m.pMaterial->Diffuse.x, m.pMaterial->Diffuse.y, m.pMaterial->Diffuse.z, srgb); + info.specularColor = GetMaterialColor(m.pMaterial->Specular.x, m.pMaterial->Specular.y, m.pMaterial->Specular.z, srgb); + info.emissiveColor = GetMaterialColor(m.pMaterial->Emissive.x, m.pMaterial->Emissive.y, m.pMaterial->Emissive.z, srgb); + info.diffuseTexture = m.texture[0].empty() ? nullptr : m.texture[0].c_str(); + info.specularTexture = m.texture[1].empty() ? nullptr : m.texture[1].c_str(); + info.normalTexture = m.texture[2].empty() ? nullptr : m.texture[2].c_str(); + info.emissiveTexture = m.texture[3].empty() ? nullptr : m.texture[3].c_str(); + info.pixelShader = m.pixelShader.c_str(); + + constexpr int offset = DGSLEffectFactory::DGSLEffectInfo::BaseTextureOffset; + for (int i = 0; i < (DGSLEffect::MaxTextures - offset); ++i) + { + info.textures[i] = m.texture[i + offset].empty() ? nullptr : m.texture[i + offset].c_str(); + } + + m.effect = fxFactoryDGSL->CreateDGSLEffect(info, nullptr); + + auto dgslEffect = static_cast(m.effect.get()); + dgslEffect->SetUVTransform(XMLoadFloat4x4(&m.pMaterial->UVTransform)); + } + else + { + EffectFactory::EffectInfo info; + info.name = m.name.c_str(); + info.specularPower = m.pMaterial->SpecularPower; + info.perVertexColor = true; + info.enableSkinning = enableSkinning; + info.alpha = m.pMaterial->Diffuse.w; + info.ambientColor = GetMaterialColor(m.pMaterial->Ambient.x, m.pMaterial->Ambient.y, m.pMaterial->Ambient.z, srgb); + info.diffuseColor = GetMaterialColor(m.pMaterial->Diffuse.x, m.pMaterial->Diffuse.y, m.pMaterial->Diffuse.z, srgb); + info.specularColor = GetMaterialColor(m.pMaterial->Specular.x, m.pMaterial->Specular.y, m.pMaterial->Specular.z, srgb); + info.emissiveColor = GetMaterialColor(m.pMaterial->Emissive.x, m.pMaterial->Emissive.y, m.pMaterial->Emissive.z, srgb); + info.diffuseTexture = m.texture[0].c_str(); + + m.effect = fxFactory.CreateEffect(info, nullptr); + } + + CreateCMOInputLayout(device, m.effect.get(), &m.il, enableSkinning); + } + + // Build mesh parts + for (UINT j = 0; j < *nSubmesh; ++j) + { + auto& sm = subMesh[j]; + + if ((sm.IndexBufferIndex >= *nIBs) + || (sm.VertexBufferIndex >= *nVBs) + || (sm.MaterialIndex >= materials.size())) + throw std::exception("Invalid submesh found\n"); + + auto& mat = materials[sm.MaterialIndex]; + + auto part = new ModelMeshPart(); + + if (mat.pMaterial->Diffuse.w < 1) + part->isAlpha = true; + + part->indexCount = sm.PrimCount * 3; + part->startIndex = sm.StartIndex; + part->vertexStride = static_cast(stride); + part->inputLayout = mat.il; + part->indexBuffer = ibs[sm.IndexBufferIndex]; + part->vertexBuffer = vbs[sm.VertexBufferIndex]; + part->effect = mat.effect; + part->vbDecl = enableSkinning ? g_vbdeclSkinning : g_vbdecl; + + mesh->meshParts.emplace_back(part); + } + + model->meshes.emplace_back(mesh); + } + + return model; +} + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +std::unique_ptr DirectX::Model::CreateFromCMO( + ID3D11Device* device, + const wchar_t* szFileName, + IEffectFactory& fxFactory, + ModelLoaderFlags flags) +{ + size_t dataSize = 0; + std::unique_ptr data; + HRESULT hr = BinaryReader::ReadEntireFile(szFileName, data, &dataSize); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateFromCMO failed (%08X) loading '%ls'\n", + static_cast(hr), szFileName); + throw std::exception("CreateFromCMO"); + } + + auto model = CreateFromCMO(device, data.get(), dataSize, fxFactory, flags); + + model->name = szFileName; + + return model; +} diff --git a/Sdk/External/DirectXTK/Src/ModelLoadSDKMESH.cpp b/Sdk/External/DirectXTK/Src/ModelLoadSDKMESH.cpp new file mode 100644 index 0000000..6df64ce --- /dev/null +++ b/Sdk/External/DirectXTK/Src/ModelLoadSDKMESH.cpp @@ -0,0 +1,737 @@ +//-------------------------------------------------------------------------------------- +// File: ModelLoadSDKMESH.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Model.h" +#include "DirectXHelpers.h" +#include "Effects.h" +#include "VertexTypes.h" +#include "BinaryReader.h" +#include "PlatformHelpers.h" +#include "SDKMesh.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +namespace +{ + enum : unsigned int + { + PER_VERTEX_COLOR = 0x1, + SKINNING = 0x2, + DUAL_TEXTURE = 0x4, + NORMAL_MAPS = 0x8, + BIASED_VERTEX_NORMALS = 0x10, + USES_OBSOLETE_DEC3N = 0x20, + }; + + struct MaterialRecordSDKMESH + { + std::shared_ptr effect; + bool alpha; + + MaterialRecordSDKMESH() noexcept : alpha(false) {} + }; + + inline XMFLOAT3 GetMaterialColor(float r, float g, float b, bool srgb) + { + if (srgb) + { + XMVECTOR v = XMVectorSet(r, g, b, 1.f); + v = XMColorSRGBToRGB(v); + + XMFLOAT3 result; + XMStoreFloat3(&result, v); + return result; + } + else + { + return XMFLOAT3(r, g, b); + } + } + + void LoadMaterial(const DXUT::SDKMESH_MATERIAL& mh, + unsigned int flags, + IEffectFactory& fxFactory, + MaterialRecordSDKMESH& m, + bool srgb) + { + wchar_t matName[DXUT::MAX_MATERIAL_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.Name, -1, matName, DXUT::MAX_MATERIAL_NAME); + + wchar_t diffuseName[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.DiffuseTexture, -1, diffuseName, DXUT::MAX_TEXTURE_NAME); + + wchar_t specularName[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.SpecularTexture, -1, specularName, DXUT::MAX_TEXTURE_NAME); + + wchar_t normalName[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.NormalTexture, -1, normalName, DXUT::MAX_TEXTURE_NAME); + + if (flags & DUAL_TEXTURE && !mh.SpecularTexture[0]) + { + DebugTrace("WARNING: Material '%s' has multiple texture coords but not multiple textures\n", mh.Name); + flags &= ~static_cast(DUAL_TEXTURE); + } + + if (flags & NORMAL_MAPS) + { + if (!mh.NormalTexture[0]) + { + flags &= ~static_cast(NORMAL_MAPS); + *normalName = 0; + } + } + else if (mh.NormalTexture[0]) + { + DebugTrace("WARNING: Material '%s' has a normal map, but vertex buffer is missing tangents\n", mh.Name); + *normalName = 0; + } + + EffectFactory::EffectInfo info; + info.name = matName; + info.perVertexColor = (flags & PER_VERTEX_COLOR) != 0; + info.enableSkinning = (flags & SKINNING) != 0; + info.enableDualTexture = (flags & DUAL_TEXTURE) != 0; + info.enableNormalMaps = (flags & NORMAL_MAPS) != 0; + info.biasedVertexNormals = (flags & BIASED_VERTEX_NORMALS) != 0; + + if (mh.Ambient.x == 0 && mh.Ambient.y == 0 && mh.Ambient.z == 0 && mh.Ambient.w == 0 + && mh.Diffuse.x == 0 && mh.Diffuse.y == 0 && mh.Diffuse.z == 0 && mh.Diffuse.w == 0) + { + // SDKMESH material color block is uninitalized; assume defaults + info.diffuseColor = XMFLOAT3(1.f, 1.f, 1.f); + info.alpha = 1.f; + } + else + { + info.ambientColor = GetMaterialColor(mh.Ambient.x, mh.Ambient.y, mh.Ambient.z, srgb); + info.diffuseColor = GetMaterialColor(mh.Diffuse.x, mh.Diffuse.y, mh.Diffuse.z, srgb); + info.emissiveColor = GetMaterialColor(mh.Emissive.x, mh.Emissive.y, mh.Emissive.z, srgb); + + if (mh.Diffuse.w != 1.f && mh.Diffuse.w != 0.f) + { + info.alpha = mh.Diffuse.w; + } + else + info.alpha = 1.f; + + if (mh.Power > 0) + { + info.specularPower = mh.Power; + info.specularColor = XMFLOAT3(mh.Specular.x, mh.Specular.y, mh.Specular.z); + } + } + + info.diffuseTexture = diffuseName; + info.specularTexture = specularName; + info.normalTexture = normalName; + + m.effect = fxFactory.CreateEffect(info, nullptr); + m.alpha = (info.alpha < 1.f); + } + + void LoadMaterial(const DXUT::SDKMESH_MATERIAL_V2& mh, + unsigned int flags, + IEffectFactory& fxFactory, + MaterialRecordSDKMESH& m) + { + wchar_t matName[DXUT::MAX_MATERIAL_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.Name, -1, matName, DXUT::MAX_MATERIAL_NAME); + + wchar_t albetoTexture[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.AlbetoTexture, -1, albetoTexture, DXUT::MAX_TEXTURE_NAME); + + wchar_t normalName[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.NormalTexture, -1, normalName, DXUT::MAX_TEXTURE_NAME); + + wchar_t rmaName[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.RMATexture, -1, rmaName, DXUT::MAX_TEXTURE_NAME); + + wchar_t emissiveName[DXUT::MAX_TEXTURE_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.EmissiveTexture, -1, emissiveName, DXUT::MAX_TEXTURE_NAME); + + EffectFactory::EffectInfo info; + info.name = matName; + info.perVertexColor = false; + info.enableSkinning = false; + info.enableDualTexture = false; + info.enableNormalMaps = true; + info.biasedVertexNormals = (flags & BIASED_VERTEX_NORMALS) != 0; + info.alpha = (mh.Alpha == 0.f) ? 1.f : mh.Alpha; + + info.diffuseTexture = albetoTexture; + info.specularTexture = rmaName; + info.normalTexture = normalName; + info.emissiveTexture = emissiveName; + + m.effect = fxFactory.CreateEffect(info, nullptr); + m.alpha = (info.alpha < 1.f); + } + + + //-------------------------------------------------------------------------------------- + // Direct3D 9 Vertex Declaration to Direct3D 11 Input Layout mapping + + static_assert(D3D11_IA_VERTEX_INPUT_STRUCTURE_ELEMENT_COUNT >= 32, "SDKMESH supports decls up to 32 entries"); + + unsigned int GetInputLayoutDesc( + _In_reads_(32) const DXUT::D3DVERTEXELEMENT9 decl[], + std::vector& inputDesc) + { + static const D3D11_INPUT_ELEMENT_DESC s_elements[] = + { + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_B8G8R8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TANGENT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "BINORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "BLENDINDICES", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "BLENDWEIGHT", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + + using namespace DXUT; + + uint32_t offset = 0; + uint32_t texcoords = 0; + unsigned int flags = 0; + + bool posfound = false; + + for (uint32_t index = 0; index < DXUT::MAX_VERTEX_ELEMENTS; ++index) + { + if (decl[index].Usage == 0xFF) + break; + + if (decl[index].Type == D3DDECLTYPE_UNUSED) + break; + + if (decl[index].Offset != offset) + break; + + if (decl[index].Usage == D3DDECLUSAGE_POSITION) + { + if (decl[index].Type == D3DDECLTYPE_FLOAT3) + { + inputDesc.push_back(s_elements[0]); + offset += 12; + posfound = true; + } + else + break; + } + else if (decl[index].Usage == D3DDECLUSAGE_NORMAL + || decl[index].Usage == D3DDECLUSAGE_TANGENT + || decl[index].Usage == D3DDECLUSAGE_BINORMAL) + { + size_t base = 1; + if (decl[index].Usage == D3DDECLUSAGE_TANGENT) + base = 3; + else if (decl[index].Usage == D3DDECLUSAGE_BINORMAL) + base = 4; + + D3D11_INPUT_ELEMENT_DESC desc = s_elements[base]; + + bool unk = false; + switch (decl[index].Type) + { + case D3DDECLTYPE_FLOAT3: assert(desc.Format == DXGI_FORMAT_R32G32B32_FLOAT); offset += 12; break; + case D3DDECLTYPE_UBYTE4N: desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; flags |= BIASED_VERTEX_NORMALS; offset += 4; break; + case D3DDECLTYPE_SHORT4N: desc.Format = DXGI_FORMAT_R16G16B16A16_SNORM; offset += 8; break; + case D3DDECLTYPE_FLOAT16_4: desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; offset += 8; break; + case D3DDECLTYPE_DXGI_R10G10B10A2_UNORM: desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; flags |= BIASED_VERTEX_NORMALS; offset += 4; break; + case D3DDECLTYPE_DXGI_R11G11B10_FLOAT: desc.Format = DXGI_FORMAT_R11G11B10_FLOAT; flags |= BIASED_VERTEX_NORMALS; offset += 4; break; + case D3DDECLTYPE_DXGI_R8G8B8A8_SNORM: desc.Format = DXGI_FORMAT_R8G8B8A8_SNORM; offset += 4; break; + + #if defined(_XBOX_ONE) && defined(_TITLE) + case D3DDECLTYPE_DEC3N: desc.Format = DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM; offset += 4; break; + case (32 + DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM): desc.Format = DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM; offset += 4; break; + #else + case D3DDECLTYPE_DEC3N: desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; flags |= USES_OBSOLETE_DEC3N; offset += 4; break; + #endif + + default: + unk = true; + break; + } + + if (unk) + break; + + if (decl[index].Usage == D3DDECLUSAGE_TANGENT) + { + flags |= NORMAL_MAPS; + } + + inputDesc.push_back(desc); + } + else if (decl[index].Usage == D3DDECLUSAGE_COLOR) + { + D3D11_INPUT_ELEMENT_DESC desc = s_elements[2]; + + bool unk = false; + switch (decl[index].Type) + { + case D3DDECLTYPE_FLOAT4: desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; offset += 16; break; + case D3DDECLTYPE_D3DCOLOR: assert(desc.Format == DXGI_FORMAT_B8G8R8A8_UNORM); offset += 4; break; + case D3DDECLTYPE_UBYTE4N: desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; offset += 4; break; + case D3DDECLTYPE_FLOAT16_4: desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; offset += 8; break; + case D3DDECLTYPE_DXGI_R10G10B10A2_UNORM: desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; offset += 4; break; + case D3DDECLTYPE_DXGI_R11G11B10_FLOAT: desc.Format = DXGI_FORMAT_R11G11B10_FLOAT; offset += 4; break; + + default: + unk = true; + break; + } + + if (unk) + break; + + flags |= PER_VERTEX_COLOR; + + inputDesc.push_back(desc); + } + else if (decl[index].Usage == D3DDECLUSAGE_TEXCOORD) + { + D3D11_INPUT_ELEMENT_DESC desc = s_elements[5]; + desc.SemanticIndex = decl[index].UsageIndex; + + bool unk = false; + switch (decl[index].Type) + { + case D3DDECLTYPE_FLOAT1: desc.Format = DXGI_FORMAT_R32_FLOAT; offset += 4; break; + case D3DDECLTYPE_FLOAT2: assert(desc.Format == DXGI_FORMAT_R32G32_FLOAT); offset += 8; break; + case D3DDECLTYPE_FLOAT3: desc.Format = DXGI_FORMAT_R32G32B32_FLOAT; offset += 12; break; + case D3DDECLTYPE_FLOAT4: desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; offset += 16; break; + case D3DDECLTYPE_FLOAT16_2: desc.Format = DXGI_FORMAT_R16G16_FLOAT; offset += 4; break; + case D3DDECLTYPE_FLOAT16_4: desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; offset += 8; break; + + default: + unk = true; + break; + } + + if (unk) + break; + + ++texcoords; + + inputDesc.push_back(desc); + } + else if (decl[index].Usage == D3DDECLUSAGE_BLENDINDICES) + { + if (decl[index].Type == D3DDECLTYPE_UBYTE4) + { + flags |= SKINNING; + inputDesc.push_back(s_elements[6]); + offset += 4; + } + else + break; + } + else if (decl[index].Usage == D3DDECLUSAGE_BLENDWEIGHT) + { + if (decl[index].Type == D3DDECLTYPE_UBYTE4N) + { + flags |= SKINNING; + inputDesc.push_back(s_elements[7]); + offset += 4; + } + else + break; + } + else + break; + } + + if (!posfound) + throw std::exception("SV_Position is required"); + + if (texcoords == 2) + { + flags |= DUAL_TEXTURE; + } + + return flags; + } +} + + +//====================================================================================== +// Model Loader +//====================================================================================== + +_Use_decl_annotations_ +std::unique_ptr DirectX::Model::CreateFromSDKMESH( + ID3D11Device* d3dDevice, + const uint8_t* meshData, + size_t idataSize, + IEffectFactory& fxFactory, + ModelLoaderFlags flags) +{ + if (!d3dDevice || !meshData) + throw std::exception("Device and meshData cannot be null"); + + uint64_t dataSize = idataSize; + + // File Headers + if (dataSize < sizeof(DXUT::SDKMESH_HEADER)) + throw std::exception("End of file"); + auto header = reinterpret_cast(meshData); + + size_t headerSize = sizeof(DXUT::SDKMESH_HEADER) + + header->NumVertexBuffers * sizeof(DXUT::SDKMESH_VERTEX_BUFFER_HEADER) + + header->NumIndexBuffers * sizeof(DXUT::SDKMESH_INDEX_BUFFER_HEADER); + if (header->HeaderSize != headerSize) + throw std::exception("Not a valid SDKMESH file"); + + if (dataSize < header->HeaderSize) + throw std::exception("End of file"); + + if (header->Version != DXUT::SDKMESH_FILE_VERSION && header->Version != DXUT::SDKMESH_FILE_VERSION_V2) + throw std::exception("Not a supported SDKMESH version"); + + if (header->IsBigEndian) + throw std::exception("Loading BigEndian SDKMESH files not supported"); + + if (!header->NumMeshes) + throw std::exception("No meshes found"); + + if (!header->NumVertexBuffers) + throw std::exception("No vertex buffers found"); + + if (!header->NumIndexBuffers) + throw std::exception("No index buffers found"); + + if (!header->NumTotalSubsets) + throw std::exception("No subsets found"); + + if (!header->NumMaterials) + throw std::exception("No materials found"); + + // Sub-headers + if (dataSize < header->VertexStreamHeadersOffset + || (dataSize < (header->VertexStreamHeadersOffset + uint64_t(header->NumVertexBuffers) * sizeof(DXUT::SDKMESH_VERTEX_BUFFER_HEADER)))) + throw std::exception("End of file"); + auto vbArray = reinterpret_cast(meshData + header->VertexStreamHeadersOffset); + + if (dataSize < header->IndexStreamHeadersOffset + || (dataSize < (header->IndexStreamHeadersOffset + uint64_t(header->NumIndexBuffers) * sizeof(DXUT::SDKMESH_INDEX_BUFFER_HEADER)))) + throw std::exception("End of file"); + auto ibArray = reinterpret_cast(meshData + header->IndexStreamHeadersOffset); + + if (dataSize < header->MeshDataOffset + || (dataSize < (header->MeshDataOffset + uint64_t(header->NumMeshes) * sizeof(DXUT::SDKMESH_MESH)))) + throw std::exception("End of file"); + auto meshArray = reinterpret_cast(meshData + header->MeshDataOffset); + + if (dataSize < header->SubsetDataOffset + || (dataSize < (header->SubsetDataOffset + uint64_t(header->NumTotalSubsets) * sizeof(DXUT::SDKMESH_SUBSET)))) + throw std::exception("End of file"); + auto subsetArray = reinterpret_cast(meshData + header->SubsetDataOffset); + + if (dataSize < header->FrameDataOffset + || (dataSize < (header->FrameDataOffset + uint64_t(header->NumFrames) * sizeof(DXUT::SDKMESH_FRAME)))) + throw std::exception("End of file"); + // TODO - auto frameArray = reinterpret_cast( meshData + header->FrameDataOffset ); + + if (dataSize < header->MaterialDataOffset + || (dataSize < (header->MaterialDataOffset + uint64_t(header->NumMaterials) * sizeof(DXUT::SDKMESH_MATERIAL)))) + throw std::exception("End of file"); + + const DXUT::SDKMESH_MATERIAL* materialArray = nullptr; + const DXUT::SDKMESH_MATERIAL_V2* materialArray_v2 = nullptr; + if (header->Version == DXUT::SDKMESH_FILE_VERSION_V2) + { + materialArray_v2 = reinterpret_cast(meshData + header->MaterialDataOffset); + } + else + { + materialArray = reinterpret_cast(meshData + header->MaterialDataOffset); + } + + // Buffer data + uint64_t bufferDataOffset = header->HeaderSize + header->NonBufferDataSize; + if ((dataSize < bufferDataOffset) + || (dataSize < bufferDataOffset + header->BufferDataSize)) + throw std::exception("End of file"); + const uint8_t* bufferData = meshData + bufferDataOffset; + + // Create vertex buffers + std::vector> vbs; + vbs.resize(header->NumVertexBuffers); + + std::vector>> vbDecls; + vbDecls.resize(header->NumVertexBuffers); + + std::vector materialFlags; + materialFlags.resize(header->NumVertexBuffers); + + bool dec3nwarning = false; + for (UINT j = 0; j < header->NumVertexBuffers; ++j) + { + auto& vh = vbArray[j]; + + if (vh.SizeBytes > UINT32_MAX) + throw std::exception("VB too large"); + + if (!(flags & ModelLoader_AllowLargeModels)) + { + if (vh.SizeBytes > (D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u)) + throw std::exception("VB too large for DirectX 11"); + } + + if (dataSize < vh.DataOffset + || (dataSize < vh.DataOffset + vh.SizeBytes)) + throw std::exception("End of file"); + + vbDecls[j] = std::make_shared>(); + unsigned int ilflags = GetInputLayoutDesc(vh.Decl, *vbDecls[j].get()); + + if (ilflags & SKINNING) + { + ilflags &= ~static_cast(DUAL_TEXTURE | NORMAL_MAPS); + } + if (ilflags & DUAL_TEXTURE) + { + ilflags &= ~static_cast(NORMAL_MAPS); + } + + if (ilflags & USES_OBSOLETE_DEC3N) + { + dec3nwarning = true; + } + + materialFlags[j] = ilflags; + + auto verts = bufferData + (vh.DataOffset - bufferDataOffset); + + D3D11_BUFFER_DESC desc = {}; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ByteWidth = static_cast(vh.SizeBytes); + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + + D3D11_SUBRESOURCE_DATA initData = { verts, 0, 0 }; + + ThrowIfFailed( + d3dDevice->CreateBuffer(&desc, &initData, &vbs[j]) + ); + + SetDebugObjectName(vbs[j].Get(), "ModelSDKMESH"); + } + + if (dec3nwarning) + { + DebugTrace("WARNING: Vertex declaration uses legacy Direct3D 9 D3DDECLTYPE_DEC3N which has no DXGI equivalent\n" + " (treating as DXGI_FORMAT_R10G10B10A2_UNORM which is not a signed format)\n"); + } + + // Create index buffers + std::vector> ibs; + ibs.resize(header->NumIndexBuffers); + + for (UINT j = 0; j < header->NumIndexBuffers; ++j) + { + auto& ih = ibArray[j]; + + if (ih.SizeBytes > UINT32_MAX) + throw std::exception("IB too large"); + + if (!(flags & ModelLoader_AllowLargeModels)) + { + if (ih.SizeBytes > (D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u)) + throw std::exception("IB too large for DirectX 11"); + } + + if (dataSize < ih.DataOffset + || (dataSize < ih.DataOffset + ih.SizeBytes)) + throw std::exception("End of file"); + + if (ih.IndexType != DXUT::IT_16BIT && ih.IndexType != DXUT::IT_32BIT) + throw std::exception("Invalid index buffer type found"); + + auto indices = bufferData + (ih.DataOffset - bufferDataOffset); + + D3D11_BUFFER_DESC desc = {}; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ByteWidth = static_cast(ih.SizeBytes); + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + + D3D11_SUBRESOURCE_DATA initData = { indices, 0, 0 }; + + ThrowIfFailed( + d3dDevice->CreateBuffer(&desc, &initData, &ibs[j]) + ); + + SetDebugObjectName(ibs[j].Get(), "ModelSDKMESH"); + } + + // Create meshes + std::vector materials; + materials.resize(header->NumMaterials); + + auto model = std::make_unique(); + model->meshes.reserve(header->NumMeshes); + + for (UINT meshIndex = 0; meshIndex < header->NumMeshes; ++meshIndex) + { + auto& mh = meshArray[meshIndex]; + + if (!mh.NumSubsets + || !mh.NumVertexBuffers + || mh.IndexBuffer >= header->NumIndexBuffers + || mh.VertexBuffers[0] >= header->NumVertexBuffers) + throw std::exception("Invalid mesh found"); + + // mh.NumVertexBuffers is sometimes not what you'd expect, so we skip validating it + + if (dataSize < mh.SubsetOffset + || (dataSize < mh.SubsetOffset + uint64_t(mh.NumSubsets) * sizeof(UINT))) + throw std::exception("End of file"); + + auto subsets = reinterpret_cast(meshData + mh.SubsetOffset); + + if (mh.NumFrameInfluences > 0) + { + if (dataSize < mh.FrameInfluenceOffset + || (dataSize < mh.FrameInfluenceOffset + uint64_t(mh.NumFrameInfluences) * sizeof(UINT))) + throw std::exception("End of file"); + + // TODO - auto influences = reinterpret_cast( meshData + mh.FrameInfluenceOffset ); + } + + auto mesh = std::make_shared(); + wchar_t meshName[DXUT::MAX_MESH_NAME] = {}; + MultiByteToWideChar(CP_UTF8, 0, mh.Name, -1, meshName, DXUT::MAX_MESH_NAME); + mesh->name = meshName; + mesh->ccw = (flags & ModelLoader_CounterClockwise) != 0; + mesh->pmalpha = (flags & ModelLoader_PremultipledAlpha) != 0; + + // Extents + mesh->boundingBox.Center = mh.BoundingBoxCenter; + mesh->boundingBox.Extents = mh.BoundingBoxExtents; + BoundingSphere::CreateFromBoundingBox(mesh->boundingSphere, mesh->boundingBox); + + // Create subsets + mesh->meshParts.reserve(mh.NumSubsets); + for (UINT j = 0; j < mh.NumSubsets; ++j) + { + auto sIndex = subsets[j]; + if (sIndex >= header->NumTotalSubsets) + throw std::exception("Invalid mesh found"); + + auto& subset = subsetArray[sIndex]; + + D3D11_PRIMITIVE_TOPOLOGY primType; + switch (subset.PrimitiveType) + { + case DXUT::PT_TRIANGLE_LIST: primType = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case DXUT::PT_TRIANGLE_STRIP: primType = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + case DXUT::PT_LINE_LIST: primType = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; break; + case DXUT::PT_LINE_STRIP: primType = D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP; break; + case DXUT::PT_POINT_LIST: primType = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; break; + case DXUT::PT_TRIANGLE_LIST_ADJ: primType = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; break; + case DXUT::PT_TRIANGLE_STRIP_ADJ: primType = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ; break; + case DXUT::PT_LINE_LIST_ADJ: primType = D3D11_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; break; + case DXUT::PT_LINE_STRIP_ADJ: primType = D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ; break; + + case DXUT::PT_QUAD_PATCH_LIST: + case DXUT::PT_TRIANGLE_PATCH_LIST: + throw std::exception("Direct3D9 era tessellation not supported"); + + default: + throw std::exception("Unknown primitive type"); + } + + if (subset.MaterialID >= header->NumMaterials) + throw std::exception("Invalid mesh found"); + + auto& mat = materials[subset.MaterialID]; + + if (!mat.effect) + { + size_t vi = mh.VertexBuffers[0]; + + if (materialArray_v2) + { + LoadMaterial( + materialArray_v2[subset.MaterialID], + materialFlags[vi], + fxFactory, + mat); + } + else + { + LoadMaterial( + materialArray[subset.MaterialID], + materialFlags[vi], + fxFactory, + mat, + (flags & ModelLoader_MaterialColorsSRGB) != 0); + } + } + + ComPtr il; + ThrowIfFailed( + CreateInputLayoutFromEffect(d3dDevice, mat.effect.get(), + vbDecls[mh.VertexBuffers[0]]->data(), vbDecls[mh.VertexBuffers[0]]->size(), il.GetAddressOf()) + ); + + SetDebugObjectName(il.Get(), "ModelSDKMESH"); + + auto part = new ModelMeshPart(); + part->isAlpha = mat.alpha; + + part->indexCount = static_cast(subset.IndexCount); + part->startIndex = static_cast(subset.IndexStart); + part->vertexOffset = static_cast(subset.VertexStart); + part->vertexStride = static_cast(vbArray[mh.VertexBuffers[0]].StrideBytes); + part->indexFormat = (ibArray[mh.IndexBuffer].IndexType == DXUT::IT_32BIT) ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; + part->primitiveType = primType; + part->inputLayout = il; + part->indexBuffer = ibs[mh.IndexBuffer]; + part->vertexBuffer = vbs[mh.VertexBuffers[0]]; + part->effect = mat.effect; + part->vbDecl = vbDecls[mh.VertexBuffers[0]]; + + mesh->meshParts.emplace_back(part); + } + + model->meshes.emplace_back(mesh); + } + + return model; +} + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +std::unique_ptr DirectX::Model::CreateFromSDKMESH( + ID3D11Device* device, + const wchar_t* szFileName, + IEffectFactory& fxFactory, + ModelLoaderFlags flags) +{ + size_t dataSize = 0; + std::unique_ptr data; + HRESULT hr = BinaryReader::ReadEntireFile(szFileName, data, &dataSize); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateFromSDKMESH failed (%08X) loading '%ls'\n", + static_cast(hr), szFileName); + throw std::exception("CreateFromSDKMESH"); + } + + auto model = CreateFromSDKMESH(device, data.get(), dataSize, fxFactory, flags); + + model->name = szFileName; + + return model; +} diff --git a/Sdk/External/DirectXTK/Src/ModelLoadVBO.cpp b/Sdk/External/DirectXTK/Src/ModelLoadVBO.cpp new file mode 100644 index 0000000..bde3f16 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/ModelLoadVBO.cpp @@ -0,0 +1,202 @@ +//-------------------------------------------------------------------------------------- +// File: ModelLoadVBO.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Model.h" +#include "DirectXHelpers.h" +#include "Effects.h" +#include "VertexTypes.h" +#include "BinaryReader.h" +#include "PlatformHelpers.h" + +#include "vbo.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +static_assert(sizeof(VertexPositionNormalTexture) == 32, "VBO vertex size mismatch"); + +namespace +{ + //-------------------------------------------------------------------------------------- + // Shared VB input element description + INIT_ONCE g_InitOnce = INIT_ONCE_STATIC_INIT; + std::shared_ptr> g_vbdecl; + + BOOL CALLBACK InitializeDecl(PINIT_ONCE initOnce, PVOID Parameter, PVOID *lpContext) + { + UNREFERENCED_PARAMETER(initOnce); + UNREFERENCED_PARAMETER(Parameter); + UNREFERENCED_PARAMETER(lpContext); + + g_vbdecl = std::make_shared>( + VertexPositionNormalTexture::InputElements, + VertexPositionNormalTexture::InputElements + VertexPositionNormalTexture::InputElementCount); + + return TRUE; + } +} + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +std::unique_ptr DirectX::Model::CreateFromVBO( + ID3D11Device* device, + const uint8_t* meshData, size_t dataSize, + std::shared_ptr ieffect, + ModelLoaderFlags flags) +{ + if (!InitOnceExecuteOnce(&g_InitOnce, InitializeDecl, nullptr, nullptr)) + throw std::exception("One-time initialization failed"); + + if (!device || !meshData) + throw std::exception("Device and meshData cannot be null"); + + // File Header + if (dataSize < sizeof(VBO::header_t)) + throw std::exception("End of file"); + auto header = reinterpret_cast(meshData); + + if (!header->numVertices || !header->numIndices) + throw std::exception("No vertices or indices found"); + + uint64_t sizeInBytes = uint64_t(header->numVertices) * sizeof(VertexPositionNormalTexture); + + if (sizeInBytes > UINT32_MAX) + throw std::exception("VB too large"); + + if (!(flags & ModelLoader_AllowLargeModels)) + { + if (sizeInBytes > uint64_t(D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u)) + throw std::exception("VB too large for DirectX 11"); + } + + auto vertSize = static_cast(sizeInBytes); + + if (dataSize < (vertSize + sizeof(VBO::header_t))) + throw std::exception("End of file"); + auto verts = reinterpret_cast(meshData + sizeof(VBO::header_t)); + + sizeInBytes = uint64_t(header->numIndices) * sizeof(uint16_t); + + if (sizeInBytes > UINT32_MAX) + throw std::exception("IB too large"); + + if (!(flags & ModelLoader_AllowLargeModels)) + { + if (sizeInBytes > uint64_t(D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u)) + throw std::exception("IB too large for DirectX 11"); + } + + auto indexSize = static_cast(sizeInBytes); + + if (dataSize < (sizeof(VBO::header_t) + vertSize + indexSize)) + throw std::exception("End of file"); + auto indices = reinterpret_cast(meshData + sizeof(VBO::header_t) + vertSize); + + // Create vertex buffer + ComPtr vb; + { + D3D11_BUFFER_DESC desc = {}; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ByteWidth = static_cast(vertSize); + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + + D3D11_SUBRESOURCE_DATA initData = { verts, 0, 0 }; + + ThrowIfFailed( + device->CreateBuffer(&desc, &initData, vb.GetAddressOf()) + ); + + SetDebugObjectName(vb.Get(), "ModelVBO"); + } + + // Create index buffer + ComPtr ib; + { + D3D11_BUFFER_DESC desc = {}; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.ByteWidth = static_cast(indexSize); + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + + D3D11_SUBRESOURCE_DATA initData = { indices, 0, 0 }; + + ThrowIfFailed( + device->CreateBuffer(&desc, &initData, ib.GetAddressOf()) + ); + + SetDebugObjectName(ib.Get(), "ModelVBO"); + } + + // Create input layout and effect + if (!ieffect) + { + auto effect = std::make_shared(device); + effect->EnableDefaultLighting(); + effect->SetLightingEnabled(true); + + ieffect = effect; + } + + ComPtr il; + + ThrowIfFailed( + CreateInputLayoutFromEffect(device, ieffect.get(), il.GetAddressOf()) + ); + + SetDebugObjectName(il.Get(), "ModelVBO"); + + auto part = new ModelMeshPart(); + part->indexCount = header->numIndices; + part->startIndex = 0; + part->vertexStride = static_cast(sizeof(VertexPositionNormalTexture)); + part->inputLayout = il; + part->indexBuffer = ib; + part->vertexBuffer = vb; + part->effect = ieffect; + part->vbDecl = g_vbdecl; + + auto mesh = std::make_shared(); + mesh->ccw = (flags & ModelLoader_CounterClockwise) != 0; + mesh->pmalpha = (flags & ModelLoader_PremultipledAlpha) != 0; + BoundingSphere::CreateFromPoints(mesh->boundingSphere, header->numVertices, &verts->position, sizeof(VertexPositionNormalTexture)); + BoundingBox::CreateFromPoints(mesh->boundingBox, header->numVertices, &verts->position, sizeof(VertexPositionNormalTexture)); + mesh->meshParts.emplace_back(part); + + auto model = std::make_unique(); + model->meshes.emplace_back(mesh); + + return model; +} + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +std::unique_ptr DirectX::Model::CreateFromVBO( + ID3D11Device* device, + const wchar_t* szFileName, + std::shared_ptr ieffect, + ModelLoaderFlags flags) +{ + size_t dataSize = 0; + std::unique_ptr data; + HRESULT hr = BinaryReader::ReadEntireFile(szFileName, data, &dataSize); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateFromVBO failed (%08X) loading '%ls'\n", + static_cast(hr), szFileName); + throw std::exception("CreateFromVBO"); + } + + auto model = CreateFromVBO(device, data.get(), dataSize, ieffect, flags); + + model->name = szFileName; + + return model; +} diff --git a/Sdk/External/DirectXTK/Src/Mouse.cpp b/Sdk/External/DirectXTK/Src/Mouse.cpp new file mode 100644 index 0000000..96f5974 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Mouse.cpp @@ -0,0 +1,1502 @@ +//-------------------------------------------------------------------------------------- +// File: Mouse.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Mouse.h" + +#include "PlatformHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +#if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_GAMES) + +#include + +//====================================================================================== +// Win32 + GameInput implementation +//====================================================================================== + +// +// Call this static function from your Window Message Procedure +// +// LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +// { +// switch (message) +// { +// case WM_ACTIVATEAPP: +// case WM_MOUSEMOVE: +// case WM_LBUTTONDOWN: +// case WM_LBUTTONUP: +// case WM_RBUTTONDOWN: +// case WM_RBUTTONUP: +// case WM_MBUTTONDOWN: +// case WM_MBUTTONUP: +// case WM_MOUSEWHEEL: +// case WM_XBUTTONDOWN: +// case WM_XBUTTONUP: +// Mouse::ProcessMessage(message, wParam, lParam); +// break; +// +// } +// } +// + +class Mouse::Impl +{ +public: + explicit Impl(Mouse* owner) noexcept(false) : + mState{}, + mOwner(owner), + mIs4k(false), + mConnected(0), + mDeviceToken(0), + mMode(MODE_ABSOLUTE), + mScrollWheelCurrent(0), + mRelativeX(INT64_MAX), + mRelativeY(INT64_MAX), + mRelativeWheelY(INT64_MAX) + { + if (s_mouse) + { + throw std::exception("Mouse is a singleton"); + } + + s_mouse = this; + + ThrowIfFailed(GameInputCreate(mGameInput.GetAddressOf())); + + ThrowIfFailed(mGameInput->RegisterDeviceCallback( + nullptr, + GameInputKindMouse, + GameInputDeviceConnected, + GameInputBlockingEnumeration, + this, + OnGameInputDevice, + &mDeviceToken)); + + mScrollWheelValue.reset(CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mScrollWheelValue) + { + throw std::exception("CreateEventEx"); + } + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + if (mDeviceToken) + { + if (mGameInput) + { + HRESULT hr = mGameInput->UnregisterCallback(mDeviceToken, UINT64_MAX); + if (FAILED(hr)) + { + DebugTrace("ERROR: GameInput::UnregisterCallback [mouse] failed (%08X)", static_cast(hr)); + } + } + + mDeviceToken = 0; + } + + s_mouse = nullptr; + } + + void GetState(State& state) const + { + memcpy(&state, &mState, sizeof(State)); + state.positionMode = mMode; + + DWORD result = WaitForSingleObjectEx(mScrollWheelValue.get(), 0, FALSE); + if (result == WAIT_FAILED) + throw std::exception("WaitForSingleObjectEx"); + + if (result == WAIT_OBJECT_0) + { + mScrollWheelCurrent = 0; + } + + if (state.positionMode == MODE_RELATIVE) + { + state.x = state.y = 0; + + ComPtr reading; + if (SUCCEEDED(mGameInput->GetCurrentReading(GameInputKindMouse, nullptr, reading.GetAddressOf()))) + { + GameInputMouseState mouse; + if (reading->GetMouseState(&mouse)) + { + state.leftButton = (mouse.buttons & GameInputMouseLeftButton) != 0; + state.middleButton = (mouse.buttons & GameInputMouseMiddleButton) != 0; + state.rightButton = (mouse.buttons & GameInputMouseRightButton) != 0; + state.xButton1 = (mouse.buttons & GameInputMouseButton4) != 0; + state.xButton2 = (mouse.buttons & GameInputMouseButton5) != 0; + + if (mRelativeX != INT64_MAX) + { + state.x = static_cast(mouse.positionX - mRelativeX); + state.y = static_cast(mouse.positionY - mRelativeY); + int scrollDelta = static_cast(mouse.wheelY - mRelativeWheelY); + mScrollWheelCurrent += scrollDelta; + } + + mRelativeX = mouse.positionX; + mRelativeY = mouse.positionY; + mRelativeWheelY = mouse.wheelY; + } + } + } + + state.scrollWheelValue = mScrollWheelCurrent; + } + + void ResetScrollWheelValue() noexcept + { + SetEvent(mScrollWheelValue.get()); + } + + void SetMode(Mode mode) + { + if (mMode == mode) + return; + + mMode = mode; + mRelativeX = INT64_MAX; + mRelativeY = INT64_MAX; + mRelativeWheelY = INT64_MAX; + + ShowCursor((mode == MODE_ABSOLUTE) ? TRUE : FALSE); + } + + bool IsConnected() const noexcept + { + return mConnected > 0; + } + + bool IsVisible() const noexcept + { + if (mMode == MODE_RELATIVE) + return false; + + CURSORINFO info = { sizeof(CURSORINFO), 0, nullptr, {} }; + if (!GetCursorInfo(&info)) + return false; + + return (info.flags & CURSOR_SHOWING) != 0; + } + + void SetVisible(bool visible) + { + if (mMode == MODE_RELATIVE) + return; + + CURSORINFO info = { sizeof(CURSORINFO), 0, nullptr, {} }; + if (!GetCursorInfo(&info)) + { + throw std::exception("GetCursorInfo"); + } + + bool isvisible = (info.flags & CURSOR_SHOWING) != 0; + if (isvisible != visible) + { + ShowCursor(visible); + } + } + + State mState; + Mouse* mOwner; + bool mIs4k; + uint32_t mConnected; + + static Mouse::Impl* s_mouse; + +private: + ComPtr mGameInput; + GameInputCallbackToken mDeviceToken; + + Mode mMode; + ScopedHandle mScrollWheelValue; + + mutable int mScrollWheelCurrent; + mutable int64_t mRelativeX; + mutable int64_t mRelativeY; + mutable int64_t mRelativeWheelY; + + friend void Mouse::ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam); + + static void CALLBACK OnGameInputDevice( + _In_ GameInputCallbackToken, + _In_ void * context, + _In_ IGameInputDevice *, + _In_ uint64_t, + _In_ GameInputDeviceStatus currentStatus, + _In_ GameInputDeviceStatus) noexcept + { + auto impl = reinterpret_cast(context); + + if (currentStatus & GameInputDeviceConnected) + { + ++impl->mConnected; + } + else if (impl->mConnected > 0) + { + --impl->mConnected; + } + } +}; + + +Mouse::Impl* Mouse::Impl::s_mouse = nullptr; + + +void Mouse::ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam) +{ + auto pImpl = Impl::s_mouse; + + if (!pImpl) + return; + + DWORD result = WaitForSingleObjectEx(pImpl->mScrollWheelValue.get(), 0, FALSE); + if (result == WAIT_FAILED) + throw std::exception("WaitForSingleObjectEx"); + + if (result == WAIT_OBJECT_0) + { + pImpl->mScrollWheelCurrent = 0; + } + + switch (message) + { + case WM_ACTIVATEAPP: + if (wParam) + { + if (pImpl->mMode == MODE_RELATIVE) + { + pImpl->mRelativeX = INT64_MAX; + pImpl->mRelativeY = INT64_MAX; + + ShowCursor(FALSE); + } + } + else + { + memset(&pImpl->mState, 0, sizeof(State)); + } + return; + + case WM_MOUSEMOVE: + break; + + case WM_LBUTTONDOWN: + pImpl->mState.leftButton = true; + break; + + case WM_LBUTTONUP: + pImpl->mState.leftButton = false; + break; + + case WM_RBUTTONDOWN: + pImpl->mState.rightButton = true; + break; + + case WM_RBUTTONUP: + pImpl->mState.rightButton = false; + break; + + case WM_MBUTTONDOWN: + pImpl->mState.middleButton = true; + break; + + case WM_MBUTTONUP: + pImpl->mState.middleButton = false; + break; + + case WM_MOUSEWHEEL: + if (pImpl->mMode == MODE_ABSOLUTE) + { + pImpl->mScrollWheelCurrent += GET_WHEEL_DELTA_WPARAM(wParam); + } + return; + + case WM_XBUTTONDOWN: + switch (GET_XBUTTON_WPARAM(wParam)) + { + case XBUTTON1: + pImpl->mState.xButton1 = true; + break; + + case XBUTTON2: + pImpl->mState.xButton2 = true; + break; + } + break; + + case WM_XBUTTONUP: + switch (GET_XBUTTON_WPARAM(wParam)) + { + case XBUTTON1: + pImpl->mState.xButton1 = false; + break; + + case XBUTTON2: + pImpl->mState.xButton2 = false; + break; + } + break; + + default: + // Not a mouse message, so exit + return; + } + + if (pImpl->mMode == MODE_ABSOLUTE) + { + // All mouse messages provide a new pointer position + int xPos = static_cast(LOWORD(lParam)); // GET_X_LPARAM(lParam); + int yPos = static_cast(HIWORD(lParam)); // GET_Y_LPARAM(lParam); + + if (pImpl->mIs4k) + { + pImpl->mState.x = static_cast(xPos) * 2; + pImpl->mState.y = static_cast(yPos) * 2; + } + else + { + pImpl->mState.x = static_cast(xPos); + pImpl->mState.y = static_cast(yPos); + } + } +} + + +void Mouse::SetResolution(bool use4k) +{ + auto pImpl = Impl::s_mouse; + + if (!pImpl) + return; + + pImpl->mIs4k = use4k; +} + + +#elif !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) + +//====================================================================================== +// Win32 desktop implementation +//====================================================================================== + +// +// For a Win32 desktop application, in your window setup be sure to call this method: +// +// m_mouse->SetWindow(hwnd); +// +// And call this static function from your Window Message Procedure +// +// LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +// { +// switch (message) +// { +// case WM_ACTIVATEAPP: +// case WM_INPUT: +// case WM_MOUSEMOVE: +// case WM_LBUTTONDOWN: +// case WM_LBUTTONUP: +// case WM_RBUTTONDOWN: +// case WM_RBUTTONUP: +// case WM_MBUTTONDOWN: +// case WM_MBUTTONUP: +// case WM_MOUSEWHEEL: +// case WM_XBUTTONDOWN: +// case WM_XBUTTONUP: +// case WM_MOUSEHOVER: +// Mouse::ProcessMessage(message, wParam, lParam); +// break; +// +// } +// } +// + +class Mouse::Impl +{ +public: + explicit Impl(Mouse* owner) noexcept(false) : + mState{}, + mOwner(owner), + mWindow(nullptr), + mMode(MODE_ABSOLUTE), + mLastX(0), + mLastY(0), + mRelativeX(INT32_MAX), + mRelativeY(INT32_MAX), + mInFocus(true) + { + if (s_mouse) + { + throw std::exception("Mouse is a singleton"); + } + + s_mouse = this; + + mScrollWheelValue.reset(CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET, EVENT_MODIFY_STATE | SYNCHRONIZE)); + mRelativeRead.reset(CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET, EVENT_MODIFY_STATE | SYNCHRONIZE)); + mAbsoluteMode.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + mRelativeMode.reset(CreateEventEx(nullptr, nullptr, 0, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mScrollWheelValue + || !mRelativeRead + || !mAbsoluteMode + || !mRelativeMode) + { + throw std::exception("CreateEventEx"); + } + } + + Impl(Impl&&) = default; + Impl& operator= (Impl&&) = default; + + Impl(Impl const&) = delete; + Impl& operator= (Impl const&) = delete; + + ~Impl() + { + s_mouse = nullptr; + } + + void GetState(State& state) const + { + memcpy(&state, &mState, sizeof(State)); + state.positionMode = mMode; + + DWORD result = WaitForSingleObjectEx(mScrollWheelValue.get(), 0, FALSE); + if (result == WAIT_FAILED) + throw std::exception("WaitForSingleObjectEx"); + + if (result == WAIT_OBJECT_0) + { + state.scrollWheelValue = 0; + } + + if (state.positionMode == MODE_RELATIVE) + { + result = WaitForSingleObjectEx(mRelativeRead.get(), 0, FALSE); + + if (result == WAIT_FAILED) + throw std::exception("WaitForSingleObjectEx"); + + if (result == WAIT_OBJECT_0) + { + state.x = 0; + state.y = 0; + } + else + { + SetEvent(mRelativeRead.get()); + } + } + } + + void ResetScrollWheelValue() noexcept + { + SetEvent(mScrollWheelValue.get()); + } + + void SetMode(Mode mode) + { + if (mMode == mode) + return; + + SetEvent((mode == MODE_ABSOLUTE) ? mAbsoluteMode.get() : mRelativeMode.get()); + + assert(mWindow != nullptr); + + TRACKMOUSEEVENT tme; + tme.cbSize = sizeof(tme); + tme.dwFlags = TME_HOVER; + tme.hwndTrack = mWindow; + tme.dwHoverTime = 1; + if (!TrackMouseEvent(&tme)) + { + throw std::exception("TrackMouseEvent"); + } + } + + bool IsConnected() const noexcept + { + return GetSystemMetrics(SM_MOUSEPRESENT) != 0; + } + + bool IsVisible() const noexcept + { + if (mMode == MODE_RELATIVE) + return false; + + CURSORINFO info = { sizeof(CURSORINFO), 0, nullptr, {} }; + if (!GetCursorInfo(&info)) + return false; + + return (info.flags & CURSOR_SHOWING) != 0; + } + + void SetVisible(bool visible) + { + if (mMode == MODE_RELATIVE) + return; + + CURSORINFO info = { sizeof(CURSORINFO), 0, nullptr, {} }; + if (!GetCursorInfo(&info)) + { + throw std::exception("GetCursorInfo"); + } + + bool isvisible = (info.flags & CURSOR_SHOWING) != 0; + if (isvisible != visible) + { + ShowCursor(visible); + } + } + + void SetWindow(HWND window) + { + if (mWindow == window) + return; + + assert(window != nullptr); + + RAWINPUTDEVICE Rid; + Rid.usUsagePage = 0x1 /* HID_USAGE_PAGE_GENERIC */; + Rid.usUsage = 0x2 /* HID_USAGE_GENERIC_MOUSE */; + Rid.dwFlags = RIDEV_INPUTSINK; + Rid.hwndTarget = window; + if (!RegisterRawInputDevices(&Rid, 1, sizeof(RAWINPUTDEVICE))) + { + throw std::exception("RegisterRawInputDevices"); + } + + mWindow = window; + } + + State mState; + + Mouse* mOwner; + + static Mouse::Impl* s_mouse; + +private: + HWND mWindow; + Mode mMode; + + ScopedHandle mScrollWheelValue; + ScopedHandle mRelativeRead; + ScopedHandle mAbsoluteMode; + ScopedHandle mRelativeMode; + + int mLastX; + int mLastY; + int mRelativeX; + int mRelativeY; + + bool mInFocus; + + friend void Mouse::ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam); + + void ClipToWindow() noexcept + { + assert(mWindow != nullptr); + + RECT rect; + GetClientRect(mWindow, &rect); + + POINT ul; + ul.x = rect.left; + ul.y = rect.top; + + POINT lr; + lr.x = rect.right; + lr.y = rect.bottom; + + MapWindowPoints(mWindow, nullptr, &ul, 1); + MapWindowPoints(mWindow, nullptr, &lr, 1); + + rect.left = ul.x; + rect.top = ul.y; + + rect.right = lr.x; + rect.bottom = lr.y; + + ClipCursor(&rect); + } +}; + + +Mouse::Impl* Mouse::Impl::s_mouse = nullptr; + + +void Mouse::SetWindow(HWND window) +{ + pImpl->SetWindow(window); +} + + +void Mouse::ProcessMessage(UINT message, WPARAM wParam, LPARAM lParam) +{ + auto pImpl = Impl::s_mouse; + + if (!pImpl) + return; + + HANDLE events[3] = { pImpl->mScrollWheelValue.get(), pImpl->mAbsoluteMode.get(), pImpl->mRelativeMode.get() }; + switch (WaitForMultipleObjectsEx(_countof(events), events, FALSE, 0, FALSE)) + { + default: + case WAIT_TIMEOUT: + break; + + case WAIT_OBJECT_0: + pImpl->mState.scrollWheelValue = 0; + ResetEvent(events[0]); + break; + + case (WAIT_OBJECT_0 + 1): + { + pImpl->mMode = MODE_ABSOLUTE; + ClipCursor(nullptr); + + POINT point; + point.x = pImpl->mLastX; + point.y = pImpl->mLastY; + + // We show the cursor before moving it to support Remote Desktop + ShowCursor(TRUE); + + if (MapWindowPoints(pImpl->mWindow, nullptr, &point, 1)) + { + SetCursorPos(point.x, point.y); + } + pImpl->mState.x = pImpl->mLastX; + pImpl->mState.y = pImpl->mLastY; + } + break; + + case (WAIT_OBJECT_0 + 2): + { + ResetEvent(pImpl->mRelativeRead.get()); + + pImpl->mMode = MODE_RELATIVE; + pImpl->mState.x = pImpl->mState.y = 0; + pImpl->mRelativeX = INT32_MAX; + pImpl->mRelativeY = INT32_MAX; + + ShowCursor(FALSE); + + pImpl->ClipToWindow(); + } + break; + + case WAIT_FAILED: + throw std::exception("WaitForMultipleObjectsEx"); + } + + switch (message) + { + case WM_ACTIVATEAPP: + if (wParam) + { + pImpl->mInFocus = true; + + if (pImpl->mMode == MODE_RELATIVE) + { + pImpl->mState.x = pImpl->mState.y = 0; + + ShowCursor(FALSE); + + pImpl->ClipToWindow(); + } + } + else + { + int scrollWheel = pImpl->mState.scrollWheelValue; + memset(&pImpl->mState, 0, sizeof(State)); + pImpl->mState.scrollWheelValue = scrollWheel; + + pImpl->mInFocus = false; + } + return; + + case WM_INPUT: + if (pImpl->mInFocus && pImpl->mMode == MODE_RELATIVE) + { + RAWINPUT raw; + UINT rawSize = sizeof(raw); + + UINT resultData = GetRawInputData(reinterpret_cast(lParam), RID_INPUT, &raw, &rawSize, sizeof(RAWINPUTHEADER)); + if (resultData == UINT(-1)) + { + throw std::exception("GetRawInputData"); + } + + if (raw.header.dwType == RIM_TYPEMOUSE) + { + if (!(raw.data.mouse.usFlags & MOUSE_MOVE_ABSOLUTE)) + { + pImpl->mState.x = raw.data.mouse.lLastX; + pImpl->mState.y = raw.data.mouse.lLastY; + + ResetEvent(pImpl->mRelativeRead.get()); + } + else if (raw.data.mouse.usFlags & MOUSE_VIRTUAL_DESKTOP) + { + // This is used to make Remote Desktop sessons work + const int width = GetSystemMetrics(SM_CXVIRTUALSCREEN); + const int height = GetSystemMetrics(SM_CYVIRTUALSCREEN); + + int x = static_cast((float(raw.data.mouse.lLastX) / 65535.0f) * float(width)); + int y = static_cast((float(raw.data.mouse.lLastY) / 65535.0f) * float(height)); + + if (pImpl->mRelativeX == INT32_MAX) + { + pImpl->mState.x = pImpl->mState.y = 0; + } + else + { + pImpl->mState.x = x - pImpl->mRelativeX; + pImpl->mState.y = y - pImpl->mRelativeY; + } + + pImpl->mRelativeX = x; + pImpl->mRelativeY = y; + + ResetEvent(pImpl->mRelativeRead.get()); + } + } + } + return; + + case WM_MOUSEMOVE: + break; + + case WM_LBUTTONDOWN: + pImpl->mState.leftButton = true; + break; + + case WM_LBUTTONUP: + pImpl->mState.leftButton = false; + break; + + case WM_RBUTTONDOWN: + pImpl->mState.rightButton = true; + break; + + case WM_RBUTTONUP: + pImpl->mState.rightButton = false; + break; + + case WM_MBUTTONDOWN: + pImpl->mState.middleButton = true; + break; + + case WM_MBUTTONUP: + pImpl->mState.middleButton = false; + break; + + case WM_MOUSEWHEEL: + pImpl->mState.scrollWheelValue += GET_WHEEL_DELTA_WPARAM(wParam); + return; + + case WM_XBUTTONDOWN: + switch (GET_XBUTTON_WPARAM(wParam)) + { + case XBUTTON1: + pImpl->mState.xButton1 = true; + break; + + case XBUTTON2: + pImpl->mState.xButton2 = true; + break; + } + break; + + case WM_XBUTTONUP: + switch (GET_XBUTTON_WPARAM(wParam)) + { + case XBUTTON1: + pImpl->mState.xButton1 = false; + break; + + case XBUTTON2: + pImpl->mState.xButton2 = false; + break; + } + break; + + case WM_MOUSEHOVER: + break; + + default: + // Not a mouse message, so exit + return; + } + + if (pImpl->mMode == MODE_ABSOLUTE) + { + // All mouse messages provide a new pointer position + int xPos = static_cast(LOWORD(lParam)); // GET_X_LPARAM(lParam); + int yPos = static_cast(HIWORD(lParam)); // GET_Y_LPARAM(lParam); + + pImpl->mState.x = pImpl->mLastX = xPos; + pImpl->mState.y = pImpl->mLastY = yPos; + } +} + + +#elif defined(_XBOX_ONE) && (!defined(_TITLE) || (_XDK_VER < 0x42D907D1)) + +//====================================================================================== +// Null device +//====================================================================================== + +class Mouse::Impl +{ +public: + explicit Impl(Mouse* owner) noexcept(false) : + mOwner(owner) + { + if (s_mouse) + { + throw std::exception("Mouse is a singleton"); + } + + s_mouse = this; + } + + ~Impl() + { + s_mouse = nullptr; + } + + void GetState(State& state) const + { + memset(&state, 0, sizeof(State)); + } + + void ResetScrollWheelValue() noexcept + { + } + + void SetMode(Mode) + { + } + + bool IsConnected() const + { + return false; + } + + bool IsVisible() const noexcept + { + return false; + } + + void SetVisible(bool) + { + } + + Mouse* mOwner; + + static Mouse::Impl* s_mouse; +}; + +Mouse::Impl* Mouse::Impl::s_mouse = nullptr; + + +#else + +//====================================================================================== +// Windows Store or Universal Windows Platform (UWP) app implementation +//====================================================================================== + +// +// For a Windows Store app or Universal Windows Platform (UWP) app, add the following to your existing +// application methods: +// +// void App::SetWindow(CoreWindow^ window ) +// { +// m_mouse->SetWindow(window); +// } +// +// void App::OnDpiChanged(DisplayInformation^ sender, Object^ args) +// { +// m_mouse->SetDpi(sender->LogicalDpi); +// } +// + +#include + +class Mouse::Impl +{ +public: + explicit Impl(Mouse* owner) noexcept(false) : + mState{}, + mOwner(owner), + mDPI(96.f), + mMode(MODE_ABSOLUTE), + mPointerPressedToken{}, + mPointerReleasedToken{}, + mPointerMovedToken{}, + mPointerWheelToken{}, + mPointerMouseMovedToken{} + { + if (s_mouse) + { + throw std::exception("Mouse is a singleton"); + } + + s_mouse = this; + + mScrollWheelValue.reset(CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET, EVENT_MODIFY_STATE | SYNCHRONIZE)); + mRelativeRead.reset(CreateEventEx(nullptr, nullptr, CREATE_EVENT_MANUAL_RESET, EVENT_MODIFY_STATE | SYNCHRONIZE)); + if (!mScrollWheelValue + || !mRelativeRead) + { + throw std::exception("CreateEventEx"); + } + } + + ~Impl() + { + s_mouse = nullptr; + + RemoveHandlers(); + } + + void GetState(State& state) const + { + memcpy(&state, &mState, sizeof(State)); + + DWORD result = WaitForSingleObjectEx(mScrollWheelValue.get(), 0, FALSE); + if (result == WAIT_FAILED) + throw std::exception("WaitForSingleObjectEx"); + + if (result == WAIT_OBJECT_0) + { + state.scrollWheelValue = 0; + } + + if (mMode == MODE_RELATIVE) + { + result = WaitForSingleObjectEx(mRelativeRead.get(), 0, FALSE); + + if (result == WAIT_FAILED) + throw std::exception("WaitForSingleObjectEx"); + + if (result == WAIT_OBJECT_0) + { + state.x = 0; + state.y = 0; + } + else + { + SetEvent(mRelativeRead.get()); + } + } + + state.positionMode = mMode; + } + + void ResetScrollWheelValue() noexcept + { + SetEvent(mScrollWheelValue.get()); + } + + void SetMode(Mode mode) + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::UI::Core; + using namespace ABI::Windows::Foundation; + + if (mMode == mode) + return; + + ComPtr statics; + HRESULT hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_UI_Core_CoreWindow).Get(), statics.GetAddressOf()); + ThrowIfFailed(hr); + + ComPtr window; + hr = statics->GetForCurrentThread(window.GetAddressOf()); + ThrowIfFailed(hr); + + if (mode == MODE_RELATIVE) + { + hr = window->get_PointerCursor(mCursor.ReleaseAndGetAddressOf()); + ThrowIfFailed(hr); + + hr = window->put_PointerCursor(nullptr); + ThrowIfFailed(hr); + + SetEvent(mRelativeRead.get()); + + mMode = MODE_RELATIVE; + } + else + { + if (!mCursor) + { + ComPtr factory; + hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_UI_Core_CoreCursor).Get(), factory.GetAddressOf()); + ThrowIfFailed(hr); + + hr = factory->CreateCursor(CoreCursorType_Arrow, 0, mCursor.GetAddressOf()); + ThrowIfFailed(hr); + } + + hr = window->put_PointerCursor(mCursor.Get()); + ThrowIfFailed(hr); + + mCursor.Reset(); + + mMode = MODE_ABSOLUTE; + } + } + + bool IsConnected() const + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Devices::Input; + using namespace ABI::Windows::Foundation; + + ComPtr caps; + HRESULT hr = RoActivateInstance(HStringReference(RuntimeClass_Windows_Devices_Input_MouseCapabilities).Get(), &caps); + ThrowIfFailed(hr); + + INT32 value; + if (SUCCEEDED(caps->get_MousePresent(&value))) + { + return value != 0; + } + + return false; + } + + bool IsVisible() const noexcept + { + if (mMode == MODE_RELATIVE) + return false; + + ComPtr cursor; + if (FAILED(mWindow->get_PointerCursor(cursor.GetAddressOf()))) + return false; + + return cursor != 0; + } + + void SetVisible(bool visible) + { + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::UI::Core; + + if (mMode == MODE_RELATIVE) + return; + + if (visible) + { + if (!mCursor) + { + ComPtr factory; + HRESULT hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_UI_Core_CoreCursor).Get(), factory.GetAddressOf()); + ThrowIfFailed(hr); + + hr = factory->CreateCursor(CoreCursorType_Arrow, 0, mCursor.GetAddressOf()); + ThrowIfFailed(hr); + } + + HRESULT hr = mWindow->put_PointerCursor(mCursor.Get()); + ThrowIfFailed(hr); + } + else + { + HRESULT hr = mWindow->put_PointerCursor(nullptr); + ThrowIfFailed(hr); + } + } + + void SetWindow(ABI::Windows::UI::Core::ICoreWindow* window) + { + using namespace Microsoft::WRL; + using namespace Microsoft::WRL::Wrappers; + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::Devices::Input; + + if (mWindow.Get() == window) + return; + + RemoveHandlers(); + + mWindow = window; + + if (!window) + { + mCursor.Reset(); + mMouse.Reset(); + return; + } + + ComPtr mouseStatics; + HRESULT hr = GetActivationFactory(HStringReference(RuntimeClass_Windows_Devices_Input_MouseDevice).Get(), mouseStatics.GetAddressOf()); + ThrowIfFailed(hr); + + hr = mouseStatics->GetForCurrentView(mMouse.ReleaseAndGetAddressOf()); + ThrowIfFailed(hr); + + typedef __FITypedEventHandler_2_Windows__CDevices__CInput__CMouseDevice_Windows__CDevices__CInput__CMouseEventArgs MouseMovedHandler; + hr = mMouse->add_MouseMoved(Callback(MouseMovedEvent).Get(), &mPointerMouseMovedToken); + ThrowIfFailed(hr); + + typedef __FITypedEventHandler_2_Windows__CUI__CCore__CCoreWindow_Windows__CUI__CCore__CPointerEventArgs PointerHandler; + auto cb = Callback(PointerEvent); + + hr = window->add_PointerPressed(cb.Get(), &mPointerPressedToken); + ThrowIfFailed(hr); + + hr = window->add_PointerReleased(cb.Get(), &mPointerReleasedToken); + ThrowIfFailed(hr); + + hr = window->add_PointerMoved(cb.Get(), &mPointerMovedToken); + ThrowIfFailed(hr); + + hr = window->add_PointerWheelChanged(Callback(PointerWheel).Get(), &mPointerWheelToken); + ThrowIfFailed(hr); + } + + State mState; + Mouse* mOwner; + float mDPI; + + static Mouse::Impl* s_mouse; + +private: + Mode mMode; + + ComPtr mWindow; + ComPtr mMouse; + ComPtr mCursor; + + ScopedHandle mScrollWheelValue; + ScopedHandle mRelativeRead; + + EventRegistrationToken mPointerPressedToken; + EventRegistrationToken mPointerReleasedToken; + EventRegistrationToken mPointerMovedToken; + EventRegistrationToken mPointerWheelToken; + EventRegistrationToken mPointerMouseMovedToken; + + void RemoveHandlers() + { + if (mWindow) + { + (void)mWindow->remove_PointerPressed(mPointerPressedToken); + mPointerPressedToken.value = 0; + + (void)mWindow->remove_PointerReleased(mPointerReleasedToken); + mPointerReleasedToken.value = 0; + + (void)mWindow->remove_PointerMoved(mPointerMovedToken); + mPointerMovedToken.value = 0; + + (void)mWindow->remove_PointerWheelChanged(mPointerWheelToken); + mPointerWheelToken.value = 0; + } + + if (mMouse) + { + (void)mMouse->remove_MouseMoved(mPointerMouseMovedToken); + mPointerMouseMovedToken.value = 0; + } + } + + static HRESULT PointerEvent(IInspectable *, ABI::Windows::UI::Core::IPointerEventArgs*args) + { + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::UI::Input; + using namespace ABI::Windows::Devices::Input; + + if (!s_mouse) + return S_OK; + + ComPtr currentPoint; + HRESULT hr = args->get_CurrentPoint(currentPoint.GetAddressOf()); + ThrowIfFailed(hr); + + ComPtr pointerDevice; + hr = currentPoint->get_PointerDevice(pointerDevice.GetAddressOf()); + ThrowIfFailed(hr); + + PointerDeviceType devType; + hr = pointerDevice->get_PointerDeviceType(&devType); + ThrowIfFailed(hr); + + if (devType == PointerDeviceType::PointerDeviceType_Mouse) + { + ComPtr props; + hr = currentPoint->get_Properties(props.GetAddressOf()); + ThrowIfFailed(hr); + + boolean value; + hr = props->get_IsLeftButtonPressed(&value); + ThrowIfFailed(hr); + s_mouse->mState.leftButton = value != 0; + + hr = props->get_IsRightButtonPressed(&value); + ThrowIfFailed(hr); + s_mouse->mState.rightButton = value != 0; + + hr = props->get_IsMiddleButtonPressed(&value); + ThrowIfFailed(hr); + s_mouse->mState.middleButton = value != 0; + + hr = props->get_IsXButton1Pressed(&value); + ThrowIfFailed(hr); + s_mouse->mState.xButton1 = value != 0; + + hr = props->get_IsXButton2Pressed(&value); + ThrowIfFailed(hr); + s_mouse->mState.xButton2 = value != 0; + } + + if (s_mouse->mMode == MODE_ABSOLUTE) + { + Point pos; + hr = currentPoint->get_Position(&pos); + ThrowIfFailed(hr); + + float dpi = s_mouse->mDPI; + + s_mouse->mState.x = static_cast(pos.X * dpi / 96.f + 0.5f); + s_mouse->mState.y = static_cast(pos.Y * dpi / 96.f + 0.5f); + } + + return S_OK; + } + + static HRESULT PointerWheel(IInspectable *, ABI::Windows::UI::Core::IPointerEventArgs*args) + { + using namespace ABI::Windows::Foundation; + using namespace ABI::Windows::UI::Input; + using namespace ABI::Windows::Devices::Input; + + if (!s_mouse) + return S_OK; + + ComPtr currentPoint; + HRESULT hr = args->get_CurrentPoint(currentPoint.GetAddressOf()); + ThrowIfFailed(hr); + + ComPtr pointerDevice; + hr = currentPoint->get_PointerDevice(pointerDevice.GetAddressOf()); + ThrowIfFailed(hr); + + PointerDeviceType devType; + hr = pointerDevice->get_PointerDeviceType(&devType); + ThrowIfFailed(hr); + + if (devType == PointerDeviceType::PointerDeviceType_Mouse) + { + ComPtr props; + hr = currentPoint->get_Properties(props.GetAddressOf()); + ThrowIfFailed(hr); + + INT32 value; + hr = props->get_MouseWheelDelta(&value); + ThrowIfFailed(hr); + + HANDLE evt = s_mouse->mScrollWheelValue.get(); + if (WaitForSingleObjectEx(evt, 0, FALSE) == WAIT_OBJECT_0) + { + s_mouse->mState.scrollWheelValue = 0; + ResetEvent(evt); + } + + s_mouse->mState.scrollWheelValue += value; + + if (s_mouse->mMode == MODE_ABSOLUTE) + { + Point pos; + hr = currentPoint->get_Position(&pos); + ThrowIfFailed(hr); + + float dpi = s_mouse->mDPI; + + s_mouse->mState.x = static_cast(pos.X * dpi / 96.f + 0.5f); + s_mouse->mState.y = static_cast(pos.Y * dpi / 96.f + 0.5f); + } + } + + return S_OK; + } + + static HRESULT MouseMovedEvent(IInspectable *, ABI::Windows::Devices::Input::IMouseEventArgs* args) + { + using namespace ABI::Windows::Devices::Input; + + if (!s_mouse) + return S_OK; + + if (s_mouse->mMode == MODE_RELATIVE) + { + MouseDelta delta; + HRESULT hr = args->get_MouseDelta(&delta); + ThrowIfFailed(hr); + + s_mouse->mState.x = delta.X; + s_mouse->mState.y = delta.Y; + + ResetEvent(s_mouse->mRelativeRead.get()); + } + + return S_OK; + } +}; + + +Mouse::Impl* Mouse::Impl::s_mouse = nullptr; + + +void Mouse::SetWindow(ABI::Windows::UI::Core::ICoreWindow* window) +{ + pImpl->SetWindow(window); +} + + +void Mouse::SetDpi(float dpi) +{ + auto pImpl = Impl::s_mouse; + + if (!pImpl) + return; + + pImpl->mDPI = dpi; +} + +#endif + +#pragma warning( disable : 4355 ) + +// Public constructor. +Mouse::Mouse() noexcept(false) + : pImpl(std::make_unique(this)) +{ +} + + +// Move constructor. +Mouse::Mouse(Mouse&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ + pImpl->mOwner = this; +} + + +// Move assignment. +Mouse& Mouse::operator= (Mouse&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + pImpl->mOwner = this; + return *this; +} + + +// Public destructor. +Mouse::~Mouse() +{ +} + + +Mouse::State Mouse::GetState() const +{ + State state; + pImpl->GetState(state); + return state; +} + + +void Mouse::ResetScrollWheelValue() noexcept +{ + pImpl->ResetScrollWheelValue(); +} + + +void Mouse::SetMode(Mode mode) +{ + pImpl->SetMode(mode); +} + + +bool Mouse::IsConnected() const +{ + return pImpl->IsConnected(); +} + +bool Mouse::IsVisible() const noexcept +{ + return pImpl->IsVisible(); +} + +void Mouse::SetVisible(bool visible) +{ + pImpl->SetVisible(visible); +} + +Mouse& Mouse::Get() +{ + if (!Impl::s_mouse || !Impl::s_mouse->mOwner) + throw std::exception("Mouse is a singleton"); + + return *Impl::s_mouse->mOwner; +} + + + +//====================================================================================== +// ButtonStateTracker +//====================================================================================== + +#define UPDATE_BUTTON_STATE(field) field = static_cast( ( !!state.field ) | ( ( !!state.field ^ !!lastState.field ) << 1 ) ); + +void Mouse::ButtonStateTracker::Update(const Mouse::State& state) noexcept +{ + UPDATE_BUTTON_STATE(leftButton) + + assert((!state.leftButton && !lastState.leftButton) == (leftButton == UP)); + assert((state.leftButton && lastState.leftButton) == (leftButton == HELD)); + assert((!state.leftButton && lastState.leftButton) == (leftButton == RELEASED)); + assert((state.leftButton && !lastState.leftButton) == (leftButton == PRESSED)); + + UPDATE_BUTTON_STATE(middleButton) + UPDATE_BUTTON_STATE(rightButton) + UPDATE_BUTTON_STATE(xButton1) + UPDATE_BUTTON_STATE(xButton2) + + lastState = state; +} + +#undef UPDATE_BUTTON_STATE + + +void Mouse::ButtonStateTracker::Reset() noexcept +{ + memset(this, 0, sizeof(ButtonStateTracker)); +} diff --git a/Sdk/External/DirectXTK/Src/NormalMapEffect.cpp b/Sdk/External/DirectXTK/Src/NormalMapEffect.cpp new file mode 100644 index 0000000..5e12474 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/NormalMapEffect.cpp @@ -0,0 +1,507 @@ +//-------------------------------------------------------------------------------------- +// File: NormalMapEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; + + +// Constant buffer layout. Must match the shader! +struct NormalMapEffectConstants +{ + XMVECTOR diffuseColor; + XMVECTOR emissiveColor; + XMVECTOR specularColorAndPower; + + XMVECTOR lightDirection[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightSpecularColor[IEffectLights::MaxDirectionalLights]; + + XMVECTOR eyePosition; + + XMVECTOR fogColor; + XMVECTOR fogVector; + + XMMATRIX world; + XMVECTOR worldInverseTranspose[3]; + XMMATRIX worldViewProj; +}; + +static_assert((sizeof(NormalMapEffectConstants) % 16) == 0, "CB size not padded correctly"); + + +// Traits type describes our characteristics to the EffectBase template. +struct NormalMapEffectTraits +{ + using ConstantBufferType = NormalMapEffectConstants; + + static constexpr int VertexShaderCount = 4; + static constexpr int PixelShaderCount = 4; + static constexpr int ShaderPermutationCount = 16; +}; + + +// Internal NormalMapEffect implementation class. +class NormalMapEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + Microsoft::WRL::ComPtr specularTexture; + Microsoft::WRL::ComPtr normalTexture; + + bool vertexColorEnabled; + bool biasedVertexNormals; + + EffectLights lights; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneNormalMapEffect_VSNormalPixelLightingTx.inc" + #include "Shaders/Compiled/XboxOneNormalMapEffect_VSNormalPixelLightingTxVc.inc" + + #include "Shaders/Compiled/XboxOneNormalMapEffect_VSNormalPixelLightingTxBn.inc" + #include "Shaders/Compiled/XboxOneNormalMapEffect_VSNormalPixelLightingTxVcBn.inc" + + #include "Shaders/Compiled/XboxOneNormalMapEffect_PSNormalPixelLightingTx.inc" + #include "Shaders/Compiled/XboxOneNormalMapEffect_PSNormalPixelLightingTxNoFog.inc" + #include "Shaders/Compiled/XboxOneNormalMapEffect_PSNormalPixelLightingTxNoSpec.inc" + #include "Shaders/Compiled/XboxOneNormalMapEffect_PSNormalPixelLightingTxNoFogSpec.inc" +#else + #include "Shaders/Compiled/NormalMapEffect_VSNormalPixelLightingTx.inc" + #include "Shaders/Compiled/NormalMapEffect_VSNormalPixelLightingTxVc.inc" + + #include "Shaders/Compiled/NormalMapEffect_VSNormalPixelLightingTxBn.inc" + #include "Shaders/Compiled/NormalMapEffect_VSNormalPixelLightingTxVcBn.inc" + + #include "Shaders/Compiled/NormalMapEffect_PSNormalPixelLightingTx.inc" + #include "Shaders/Compiled/NormalMapEffect_PSNormalPixelLightingTxNoFog.inc" + #include "Shaders/Compiled/NormalMapEffect_PSNormalPixelLightingTxNoSpec.inc" + #include "Shaders/Compiled/NormalMapEffect_PSNormalPixelLightingTxNoFogSpec.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { NormalMapEffect_VSNormalPixelLightingTx, sizeof(NormalMapEffect_VSNormalPixelLightingTx) }, + { NormalMapEffect_VSNormalPixelLightingTxVc, sizeof(NormalMapEffect_VSNormalPixelLightingTxVc) }, + + { NormalMapEffect_VSNormalPixelLightingTxBn, sizeof(NormalMapEffect_VSNormalPixelLightingTxBn) }, + { NormalMapEffect_VSNormalPixelLightingTxVcBn, sizeof(NormalMapEffect_VSNormalPixelLightingTxVcBn) }, +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // pixel lighting + texture + 0, // pixel lighting + texture, no fog + 1, // pixel lighting + texture + vertex color + 1, // pixel lighting + texture + vertex color, no fog + + 0, // pixel lighting + texture, no specular + 0, // pixel lighting + texture, no fog or specular + 1, // pixel lighting + texture + vertex color, no specular + 1, // pixel lighting + texture + vertex color, no fog or specular + + 2, // pixel lighting (biased vertex normal) + texture + 2, // pixel lighting (biased vertex normal) + texture, no fog + 3, // pixel lighting (biased vertex normal) + texture + vertex color + 3, // pixel lighting (biased vertex normal) + texture + vertex color, no fog + + 2, // pixel lighting (biased vertex normal) + texture, no specular + 2, // pixel lighting (biased vertex normal) + texture, no fog or specular + 3, // pixel lighting (biased vertex normal) + texture + vertex color, no specular + 3, // pixel lighting (biased vertex normal) + texture + vertex color, no fog or specular +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { NormalMapEffect_PSNormalPixelLightingTx, sizeof(NormalMapEffect_PSNormalPixelLightingTx) }, + { NormalMapEffect_PSNormalPixelLightingTxNoFog, sizeof(NormalMapEffect_PSNormalPixelLightingTxNoFog) }, + { NormalMapEffect_PSNormalPixelLightingTxNoSpec, sizeof(NormalMapEffect_PSNormalPixelLightingTxNoSpec) }, + { NormalMapEffect_PSNormalPixelLightingTxNoFogSpec, sizeof(NormalMapEffect_PSNormalPixelLightingTxNoFogSpec) }, +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // pixel lighting + texture + 1, // pixel lighting + texture, no fog + 0, // pixel lighting + texture + vertex color + 1, // pixel lighting + texture + vertex color, no fog + + 2, // pixel lighting + texture, no specular + 3, // pixel lighting + texture, no fog or specular + 2, // pixel lighting + texture + vertex color, no specular + 3, // pixel lighting + texture + vertex color, no fog or specular + + 0, // pixel lighting (biased vertex normal) + texture + 1, // pixel lighting (biased vertex normal) + texture, no fog + 0, // pixel lighting (biased vertex normal) + texture + vertex color + 1, // pixel lighting (biased vertex normal) + texture + vertex color, no fog + + 2, // pixel lighting (biased vertex normal) + texture, no specular + 3, // pixel lighting (biased vertex normal) + texture, no fog or specular + 2, // pixel lighting (biased vertex normal) + texture + vertex color, no specular + 3, // pixel lighting (biased vertex normal) + texture + vertex color, no fog or specular +}; + + +// Global pool of per-device NormalMapEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +NormalMapEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + vertexColorEnabled(false), + biasedVertexNormals(false) +{ + if (device->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("NormalMapEffect requires Feature Level 10.0 or later"); + } + + static_assert(_countof(EffectBase::VertexShaderIndices) == NormalMapEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == NormalMapEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == NormalMapEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == NormalMapEffectTraits::ShaderPermutationCount, "array/max mismatch"); + + lights.InitializeConstants(constants.specularColorAndPower, constants.lightDirection, constants.lightDiffuseColor, constants.lightSpecularColor); +} + + +int NormalMapEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Use optimized shaders if fog is disabled. + if (!fog.enabled) + { + permutation += 1; + } + + // Support vertex coloring? + if (vertexColorEnabled) + { + permutation += 2; + } + + // Specular map? + if (!specularTexture) + { + permutation += 4; + } + + if (biasedVertexNormals) + { + // Compressed normals need to be scaled and biased in the vertex shader. + permutation += 8; + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void NormalMapEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + fog.SetConstants(dirtyFlags, matrices.worldView, constants.fogVector); + + lights.SetConstants(dirtyFlags, matrices, constants.world, constants.worldInverseTranspose, constants.eyePosition, constants.diffuseColor, constants.emissiveColor, true); + + // Set the textures + ID3D11ShaderResourceView* textures[] = { texture.Get(), specularTexture.Get(), normalTexture.Get()}; + deviceContext->PSSetShaderResources(0, _countof(textures), textures); + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +NormalMapEffect::NormalMapEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +NormalMapEffect::NormalMapEffect(NormalMapEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +NormalMapEffect& NormalMapEffect::operator= (NormalMapEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +NormalMapEffect::~NormalMapEffect() +{ +} + + +// IEffect methods. +void NormalMapEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void NormalMapEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV NormalMapEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV NormalMapEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV NormalMapEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV NormalMapEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +// Material settings. +void XM_CALLCONV NormalMapEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV NormalMapEffect::SetEmissiveColor(FXMVECTOR value) +{ + pImpl->lights.emissiveColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV NormalMapEffect::SetSpecularColor(FXMVECTOR value) +{ + // Set xyz to new value, but preserve existing w (specular power). + pImpl->constants.specularColorAndPower = XMVectorSelect(pImpl->constants.specularColorAndPower, value, g_XMSelect1110); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void NormalMapEffect::SetSpecularPower(float value) +{ + // Set w to new value, but preserve existing xyz (specular color). + pImpl->constants.specularColorAndPower = XMVectorSetW(pImpl->constants.specularColorAndPower, value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void NormalMapEffect::DisableSpecular() +{ + // Set specular color to black, power to 1 + // Note: Don't use a power of 0 or the shader will generate strange highlights on non-specular materials + + pImpl->constants.specularColorAndPower = g_XMIdentityR3; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void NormalMapEffect::SetAlpha(float value) +{ + pImpl->lights.alpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV NormalMapEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + pImpl->lights.alpha = XMVectorGetW(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +// Light settings. +void NormalMapEffect::SetLightingEnabled(bool value) +{ + if (!value) + { + throw std::exception("NormalMapEffect does not support turning off lighting"); + } +} + + +void NormalMapEffect::SetPerPixelLighting(bool) +{ + // Unsupported interface method. +} + + +void XM_CALLCONV NormalMapEffect::SetAmbientLightColor(FXMVECTOR value) +{ + pImpl->lights.ambientLightColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void NormalMapEffect::SetLightEnabled(int whichLight, bool value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightEnabled(whichLight, value, pImpl->constants.lightDiffuseColor, pImpl->constants.lightSpecularColor); +} + + +void XM_CALLCONV NormalMapEffect::SetLightDirection(int whichLight, FXMVECTOR value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->constants.lightDirection[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV NormalMapEffect::SetLightDiffuseColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightDiffuseColor(whichLight, value, pImpl->constants.lightDiffuseColor); +} + + +void XM_CALLCONV NormalMapEffect::SetLightSpecularColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightSpecularColor(whichLight, value, pImpl->constants.lightSpecularColor); +} + + +void NormalMapEffect::EnableDefaultLighting() +{ + EffectLights::EnableDefaultLighting(this); +} + + +// Fog settings. +void NormalMapEffect::SetFogEnabled(bool value) +{ + pImpl->fog.enabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogEnable; +} + + +void NormalMapEffect::SetFogStart(float value) +{ + pImpl->fog.start = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void NormalMapEffect::SetFogEnd(float value) +{ + pImpl->fog.end = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV NormalMapEffect::SetFogColor(FXMVECTOR value) +{ + pImpl->constants.fogColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Vertex color setting. +void NormalMapEffect::SetVertexColorEnabled(bool value) +{ + pImpl->vertexColorEnabled = value; +} + + +// Texture settings. +void NormalMapEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +void NormalMapEffect::SetNormalTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->normalTexture = value; +} + + +void NormalMapEffect::SetSpecularTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->specularTexture = value; +} + + +// Normal compression settings. +void NormalMapEffect::SetBiasedVertexNormals(bool value) +{ + pImpl->biasedVertexNormals = value; +} diff --git a/Sdk/External/DirectXTK/Src/PBREffect.cpp b/Sdk/External/DirectXTK/Src/PBREffect.cpp new file mode 100644 index 0000000..dcb8928 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/PBREffect.cpp @@ -0,0 +1,532 @@ +//-------------------------------------------------------------------------------------- +// File: PBREffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; + + +// Constant buffer layout. Must match the shader! +struct PBREffectConstants +{ + XMVECTOR eyePosition; + XMMATRIX world; + XMVECTOR worldInverseTranspose[3]; + XMMATRIX worldViewProj; + XMMATRIX prevWorldViewProj; // for velocity generation + + XMVECTOR lightDirection[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[IEffectLights::MaxDirectionalLights]; + + // PBR Parameters + XMVECTOR Albedo; + float Metallic; + float Roughness; + int numRadianceMipLevels; + + // Size of render target + float targetWidth; + float targetHeight; +}; + +static_assert((sizeof(PBREffectConstants) % 16) == 0, "CB size not padded correctly"); + + +// Traits type describes our characteristics to the EffectBase template. +struct PBREffectTraits +{ + using ConstantBufferType = PBREffectConstants; + + static constexpr int VertexShaderCount = 4; + static constexpr int PixelShaderCount = 5; + static constexpr int ShaderPermutationCount = 10; + static constexpr int RootSignatureCount = 1; +}; + + +// Internal PBREffect implementation class. +class PBREffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + Microsoft::WRL::ComPtr albedoTexture; + Microsoft::WRL::ComPtr normalTexture; + Microsoft::WRL::ComPtr rmaTexture; + Microsoft::WRL::ComPtr emissiveTexture; + + Microsoft::WRL::ComPtr radianceTexture; + Microsoft::WRL::ComPtr irradianceTexture; + + bool biasedVertexNormals; + bool velocityEnabled; + + XMVECTOR lightColor[MaxDirectionalLights]; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOnePBREffect_VSConstant.inc" + #include "Shaders/Compiled/XboxOnePBREffect_VSConstantVelocity.inc" + #include "Shaders/Compiled/XboxOnePBREffect_VSConstantBn.inc" + #include "Shaders/Compiled/XboxOnePBREffect_VSConstantVelocityBn.inc" + + #include "Shaders/Compiled/XboxOnePBREffect_PSConstant.inc" + #include "Shaders/Compiled/XboxOnePBREffect_PSTextured.inc" + #include "Shaders/Compiled/XboxOnePBREffect_PSTexturedEmissive.inc" + #include "Shaders/Compiled/XboxOnePBREffect_PSTexturedVelocity.inc" + #include "Shaders/Compiled/XboxOnePBREffect_PSTexturedEmissiveVelocity.inc" +#else + #include "Shaders/Compiled/PBREffect_VSConstant.inc" + #include "Shaders/Compiled/PBREffect_VSConstantVelocity.inc" + #include "Shaders/Compiled/PBREffect_VSConstantBn.inc" + #include "Shaders/Compiled/PBREffect_VSConstantVelocityBn.inc" + + #include "Shaders/Compiled/PBREffect_PSConstant.inc" + #include "Shaders/Compiled/PBREffect_PSTextured.inc" + #include "Shaders/Compiled/PBREffect_PSTexturedEmissive.inc" + #include "Shaders/Compiled/PBREffect_PSTexturedVelocity.inc" + #include "Shaders/Compiled/PBREffect_PSTexturedEmissiveVelocity.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { PBREffect_VSConstant, sizeof(PBREffect_VSConstant) }, + { PBREffect_VSConstantVelocity, sizeof(PBREffect_VSConstantVelocity) }, + { PBREffect_VSConstantBn, sizeof(PBREffect_VSConstantBn) }, + { PBREffect_VSConstantVelocityBn, sizeof(PBREffect_VSConstantVelocityBn) }, +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // constant + 0, // textured + 0, // textured + emissive + 1, // textured + velocity + 1, // textured + emissive + velocity + + 2, // constant (biased vertex normals) + 2, // textured (biased vertex normals) + 2, // textured + emissive (biased vertex normals) + 3, // textured + velocity (biased vertex normals) + 3, // textured + emissive + velocity (biasoed vertex normals) +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { PBREffect_PSConstant, sizeof(PBREffect_PSConstant) }, + { PBREffect_PSTextured, sizeof(PBREffect_PSTextured) }, + { PBREffect_PSTexturedEmissive, sizeof(PBREffect_PSTexturedEmissive) }, + { PBREffect_PSTexturedVelocity, sizeof(PBREffect_PSTexturedVelocity) }, + { PBREffect_PSTexturedEmissiveVelocity, sizeof(PBREffect_PSTexturedEmissiveVelocity) } +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // constant + 1, // textured + 2, // textured + emissive + 3, // textured + velocity + 4, // textured + emissive + velocity + + 0, // constant (biased vertex normals) + 1, // textured (biased vertex normals) + 2, // textured + emissive (biased vertex normals) + 3, // textured + velocity (biased vertex normals) + 4, // textured + emissive + velocity (biased vertex normals) +}; + +// Global pool of per-device PBREffect resources. Required by EffectBase<>, but not used. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + +// Constructor. +PBREffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + biasedVertexNormals(false), + velocityEnabled(false), + lightColor{} +{ + if (device->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("PBREffect requires Feature Level 10.0 or later"); + } + + static_assert(_countof(EffectBase::VertexShaderIndices) == PBREffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == PBREffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == PBREffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == PBREffectTraits::ShaderPermutationCount, "array/max mismatch"); + + // Lighting + static const XMVECTORF32 defaultLightDirection = { { { 0, -1, 0, 0 } } }; + for (int i = 0; i < MaxDirectionalLights; i++) + { + lightColor[i] = g_XMOne; + constants.lightDirection[i] = defaultLightDirection; + constants.lightDiffuseColor[i] = g_XMZero; + } + + // Default PBR values + constants.Albedo = g_XMOne; + constants.Metallic = 0.5f; + constants.Roughness = 0.2f; + constants.numRadianceMipLevels = 1; +} + + +int PBREffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Textured RMA vs. constant albedo/roughness/metalness? + if (velocityEnabled) + { + // Optional velocity buffer (implies textured RMA)? + permutation = 3; + } + else if (albedoTexture) + { + permutation = 1; + } + + // Using an emissive texture? + if (emissiveTexture) + { + permutation += 1; + } + + if (biasedVertexNormals) + { + // Compressed normals need to be scaled and biased in the vertex shader. + permutation += 5; + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void PBREffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Store old wvp for velocity calculation in shader + constants.prevWorldViewProj = constants.worldViewProj; + + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + // World inverse transpose matrix. + if (dirtyFlags & EffectDirtyFlags::WorldInverseTranspose) + { + constants.world = XMMatrixTranspose(matrices.world); + + XMMATRIX worldInverse = XMMatrixInverse(nullptr, matrices.world); + + constants.worldInverseTranspose[0] = worldInverse.r[0]; + constants.worldInverseTranspose[1] = worldInverse.r[1]; + constants.worldInverseTranspose[2] = worldInverse.r[2]; + + dirtyFlags &= ~EffectDirtyFlags::WorldInverseTranspose; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + + // Eye position vector. + if (dirtyFlags & EffectDirtyFlags::EyePosition) + { + XMMATRIX viewInverse = XMMatrixInverse(nullptr, matrices.view); + + constants.eyePosition = viewInverse.r[3]; + + dirtyFlags &= ~EffectDirtyFlags::EyePosition; + dirtyFlags |= EffectDirtyFlags::ConstantBuffer; + } + + // Set the textures + if (albedoTexture) + { + ID3D11ShaderResourceView* textures[] = { + albedoTexture.Get(), normalTexture.Get(), rmaTexture.Get(), + emissiveTexture.Get(), + radianceTexture.Get(), irradianceTexture.Get() }; + deviceContext->PSSetShaderResources(0, _countof(textures), textures); + } + else + { + ID3D11ShaderResourceView* textures[] = { + nullptr, nullptr, nullptr, + nullptr, + radianceTexture.Get(), irradianceTexture.Get() }; + deviceContext->PSSetShaderResources(0, _countof(textures), textures); + } + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + +// Public constructor. +PBREffect::PBREffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +PBREffect::PBREffect(PBREffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +PBREffect& PBREffect::operator= (PBREffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +PBREffect::~PBREffect() +{ +} + + +// IEffect methods. +void PBREffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void PBREffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV PBREffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose; +} + + +void XM_CALLCONV PBREffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition; +} + + +void XM_CALLCONV PBREffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV PBREffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition; +} + + +// Light settings +void PBREffect::SetLightingEnabled(bool value) +{ + if (!value) + { + throw std::exception("PBREffect does not support turning off lighting"); + } +} + + +void PBREffect::SetPerPixelLighting(bool) +{ + // Unsupported interface method. +} + + +void XM_CALLCONV PBREffect::SetAmbientLightColor(FXMVECTOR) +{ + // Unsupported interface. +} + + +void PBREffect::SetLightEnabled(int whichLight, bool value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->constants.lightDiffuseColor[whichLight] = (value) ? pImpl->lightColor[whichLight] : g_XMZero; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV PBREffect::SetLightDirection(int whichLight, FXMVECTOR value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->constants.lightDirection[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV PBREffect::SetLightDiffuseColor(int whichLight, FXMVECTOR value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->lightColor[whichLight] = value; + pImpl->constants.lightDiffuseColor[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV PBREffect::SetLightSpecularColor(int, FXMVECTOR) +{ + // Unsupported interface. +} + + +void PBREffect::EnableDefaultLighting() +{ + EffectLights::EnableDefaultLighting(this); +} + + +// PBR Settings +void PBREffect::SetAlpha(float value) +{ + // Set w to new value, but preserve existing xyz (constant albedo). + pImpl->constants.Albedo = XMVectorSetW(pImpl->constants.Albedo, value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void PBREffect::SetConstantAlbedo(FXMVECTOR value) +{ + // Set xyz to new value, but preserve existing w (alpha). + pImpl->constants.Albedo = XMVectorSelect(pImpl->constants.Albedo, value, g_XMSelect1110); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void PBREffect::SetConstantMetallic(float value) +{ + pImpl->constants.Metallic = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void PBREffect::SetConstantRoughness(float value) +{ + pImpl->constants.Roughness = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Texture settings. +void PBREffect::SetAlbedoTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->albedoTexture = value; +} + + +void PBREffect::SetNormalTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->normalTexture = value; +} + + +void PBREffect::SetRMATexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->rmaTexture = value; +} + +void PBREffect::SetEmissiveTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->emissiveTexture = value; +} + + +void PBREffect::SetSurfaceTextures( + _In_opt_ ID3D11ShaderResourceView* albedo, + _In_opt_ ID3D11ShaderResourceView* normal, + _In_opt_ ID3D11ShaderResourceView* roughnessMetallicAmbientOcclusion) +{ + pImpl->albedoTexture = albedo; + pImpl->normalTexture = normal; + pImpl->rmaTexture = roughnessMetallicAmbientOcclusion; +} + + +void PBREffect::SetIBLTextures( + _In_opt_ ID3D11ShaderResourceView* radiance, + int numRadianceMips, + _In_opt_ ID3D11ShaderResourceView* irradiance) +{ + pImpl->radianceTexture = radiance; + pImpl->irradianceTexture = irradiance; + + pImpl->constants.numRadianceMipLevels = numRadianceMips; + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Normal compression settings. +void PBREffect::SetBiasedVertexNormals(bool value) +{ + pImpl->biasedVertexNormals = value; +} + + +// Additional settings. +void PBREffect::SetVelocityGeneration(bool value) +{ + pImpl->velocityEnabled = value; +} + + +void PBREffect::SetRenderTargetSizeInPixels(int width, int height) +{ + pImpl->constants.targetWidth = static_cast(width); + pImpl->constants.targetHeight = static_cast(height); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} diff --git a/Sdk/External/DirectXTK/Src/PBREffectFactory.cpp b/Sdk/External/DirectXTK/Src/PBREffectFactory.cpp new file mode 100644 index 0000000..c2534a5 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/PBREffectFactory.cpp @@ -0,0 +1,301 @@ +//-------------------------------------------------------------------------------------- +// File: PBREffectFactory.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "Effects.h" +#include "DemandCreate.h" +#include "SharedResourcePool.h" + +#include "DDSTextureLoader.h" +#include "WICTextureLoader.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +// Internal PBREffectFactory implementation class. Only one of these helpers is allocated +// per D3D device, even if there are multiple public facing PBREffectFactory instances. +class PBREffectFactory::Impl +{ +public: + Impl(_In_ ID3D11Device* device) + : mPath{}, + mDevice(device), + mSharing(true), + mForceSRGB(false) + {} + + std::shared_ptr CreateEffect(_In_ IEffectFactory* factory, _In_ const IEffectFactory::EffectInfo& info, _In_opt_ ID3D11DeviceContext* deviceContext); + void CreateTexture(_In_z_ const wchar_t* texture, _In_opt_ ID3D11DeviceContext* deviceContext, _Outptr_ ID3D11ShaderResourceView** textureView); + + void ReleaseCache(); + void SetSharing(bool enabled) noexcept { mSharing = enabled; } + void EnableForceSRGB(bool forceSRGB) noexcept { mForceSRGB = forceSRGB; } + + static SharedResourcePool instancePool; + + wchar_t mPath[MAX_PATH]; + + ComPtr mDevice; + +private: + using EffectCache = std::map< std::wstring, std::shared_ptr >; + using TextureCache = std::map< std::wstring, ComPtr >; + + EffectCache mEffectCache; + TextureCache mTextureCache; + + bool mSharing; + bool mForceSRGB; + + std::mutex mutex; +}; + + +// Global instance pool. +SharedResourcePool PBREffectFactory::Impl::instancePool; + + +_Use_decl_annotations_ +std::shared_ptr PBREffectFactory::Impl::CreateEffect(IEffectFactory* factory, const IEffectFactory::EffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + if (mSharing && info.name && *info.name) + { + auto it = mEffectCache.find(info.name); + if (mSharing && it != mEffectCache.end()) + { + return it->second; + } + } + + auto effect = std::make_shared(mDevice.Get()); + + // We don't use EnableDefaultLighting generally for PBR as it uses Image-Based Lighting instead. + + effect->SetAlpha(info.alpha); + + ComPtr albetoSrv; + if (info.diffuseTexture && *info.diffuseTexture) + { + factory->CreateTexture(info.diffuseTexture, deviceContext, albetoSrv.GetAddressOf()); + } + + ComPtr normalSrv; + if (info.normalTexture && *info.normalTexture) + { + factory->CreateTexture(info.normalTexture, deviceContext, normalSrv.GetAddressOf()); + } + + ComPtr rmaSrv; + if (info.specularTexture && *info.specularTexture) + { + // We use the specular texture for the roughness/metalness/ambient-occlusion texture + factory->CreateTexture(info.specularTexture, deviceContext, rmaSrv.GetAddressOf()); + } + + effect->SetSurfaceTextures(albetoSrv.Get(), normalSrv.Get(), rmaSrv.Get()); + + if (info.emissiveTexture && *info.emissiveTexture) + { + ComPtr srv; + factory->CreateTexture(info.emissiveTexture, deviceContext, srv.GetAddressOf()); + + effect->SetEmissiveTexture(srv.Get()); + } + + if (info.biasedVertexNormals) + { + effect->SetBiasedVertexNormals(true); + } + + if (mSharing && info.name && *info.name) + { + std::lock_guard lock(mutex); + EffectCache::value_type v(info.name, effect); + mEffectCache.insert(v); + } + + return std::move(effect); +} + +_Use_decl_annotations_ +void PBREffectFactory::Impl::CreateTexture(const wchar_t* name, ID3D11DeviceContext* deviceContext, ID3D11ShaderResourceView** textureView) +{ + if (!name || !textureView) + throw std::exception("invalid arguments"); + +#if defined(_XBOX_ONE) && defined(_TITLE) + UNREFERENCED_PARAMETER(deviceContext); +#endif + + auto it = mTextureCache.find(name); + + if (mSharing && it != mTextureCache.end()) + { + ID3D11ShaderResourceView* srv = it->second.Get(); + srv->AddRef(); + *textureView = srv; + } + else + { + wchar_t fullName[MAX_PATH] = {}; + wcscpy_s(fullName, mPath); + wcscat_s(fullName, name); + + WIN32_FILE_ATTRIBUTE_DATA fileAttr = {}; + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + // Try Current Working Directory (CWD) + wcscpy_s(fullName, name); + if (!GetFileAttributesExW(fullName, GetFileExInfoStandard, &fileAttr)) + { + DebugTrace("ERROR: PBREffectFactory could not find texture file '%ls'\n", name); + throw std::exception("CreateTexture"); + } + } + + wchar_t ext[_MAX_EXT]; + _wsplitpath_s(name, nullptr, 0, nullptr, 0, nullptr, 0, ext, _MAX_EXT); + + if (_wcsicmp(ext, L".dds") == 0) + { + HRESULT hr = CreateDDSTextureFromFileEx( + mDevice.Get(), fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateDDSTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateDDSTextureFromFile"); + } + } + #if !defined(_XBOX_ONE) || !defined(_TITLE) + else if (deviceContext) + { + std::lock_guard lock(mutex); + HRESULT hr = CreateWICTextureFromFileEx( + mDevice.Get(), deviceContext, fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB ? WIC_LOADER_FORCE_SRGB : WIC_LOADER_DEFAULT, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateWICTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateWICTextureFromFile"); + } + } + #endif + else + { + HRESULT hr = CreateWICTextureFromFileEx( + mDevice.Get(), fullName, 0, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + mForceSRGB ? WIC_LOADER_FORCE_SRGB : WIC_LOADER_DEFAULT, nullptr, textureView); + if (FAILED(hr)) + { + DebugTrace("ERROR: CreateWICTextureFromFile failed (%08X) for '%ls'\n", + static_cast(hr), fullName); + throw std::exception("CreateWICTextureFromFile"); + } + } + + if (mSharing && *name && it == mTextureCache.end()) + { + std::lock_guard lock(mutex); + TextureCache::value_type v(name, *textureView); + mTextureCache.insert(v); + } + } +} + +void PBREffectFactory::Impl::ReleaseCache() +{ + std::lock_guard lock(mutex); + mEffectCache.clear(); + mTextureCache.clear(); +} + + + +//-------------------------------------------------------------------------------------- +// PBREffectFactory +//-------------------------------------------------------------------------------------- + +PBREffectFactory::PBREffectFactory(_In_ ID3D11Device* device) + : pImpl(Impl::instancePool.DemandCreate(device)) +{ +} + +PBREffectFactory::~PBREffectFactory() +{ +} + + +PBREffectFactory::PBREffectFactory(PBREffectFactory&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + +PBREffectFactory& PBREffectFactory::operator= (PBREffectFactory&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + +_Use_decl_annotations_ +std::shared_ptr PBREffectFactory::CreateEffect(const EffectInfo& info, ID3D11DeviceContext* deviceContext) +{ + return pImpl->CreateEffect(this, info, deviceContext); +} + +_Use_decl_annotations_ +void PBREffectFactory::CreateTexture(const wchar_t* name, ID3D11DeviceContext* deviceContext, ID3D11ShaderResourceView** textureView) +{ + return pImpl->CreateTexture(name, deviceContext, textureView); +} + +void PBREffectFactory::ReleaseCache() +{ + pImpl->ReleaseCache(); +} + +void PBREffectFactory::SetSharing(bool enabled) noexcept +{ + pImpl->SetSharing(enabled); +} + +void PBREffectFactory::EnableForceSRGB(bool forceSRGB) noexcept +{ + pImpl->EnableForceSRGB(forceSRGB); +} + +void PBREffectFactory::SetDirectory(_In_opt_z_ const wchar_t* path) noexcept +{ + if (path && *path != 0) + { + wcscpy_s(pImpl->mPath, path); + size_t len = wcsnlen(pImpl->mPath, MAX_PATH); + if (len > 0 && len < (MAX_PATH - 1)) + { + // Ensure it has a trailing slash + if (pImpl->mPath[len - 1] != L'\\') + { + pImpl->mPath[len] = L'\\'; + pImpl->mPath[len + 1] = 0; + } + } + } + else + *pImpl->mPath = 0; +} + +ID3D11Device* PBREffectFactory::GetDevice() const noexcept +{ + return pImpl->mDevice.Get(); +} diff --git a/Sdk/External/DirectXTK/Src/PlatformHelpers.h b/Sdk/External/DirectXTK/Src/PlatformHelpers.h new file mode 100644 index 0000000..1083225 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/PlatformHelpers.h @@ -0,0 +1,86 @@ +//-------------------------------------------------------------------------------------- +// File: PlatformHelpers.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#pragma warning(disable : 4324) + +#include +#include + +#ifndef MAKEFOURCC + #define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + (static_cast(static_cast(ch0)) \ + | (static_cast(static_cast(ch1)) << 8) \ + | (static_cast(static_cast(ch2)) << 16) \ + | (static_cast(static_cast(ch3)) << 24)) +#endif /* defined(MAKEFOURCC) */ + +namespace DirectX +{ + // Helper class for COM exceptions + class com_exception : public std::exception + { + public: + com_exception(HRESULT hr) noexcept : result(hr) {} + + const char* what() const override + { + static char s_str[64] = {}; + sprintf_s(s_str, "Failure with HRESULT of %08X", static_cast(result)); + return s_str; + } + + HRESULT get_result() const noexcept { return result; } + + private: + HRESULT result; + }; + + // Helper utility converts D3D API failures into exceptions. + inline void ThrowIfFailed(HRESULT hr) noexcept(false) + { + if (FAILED(hr)) + { + throw com_exception(hr); + } + } + + + // Helper for output debug tracing + inline void DebugTrace(_In_z_ _Printf_format_string_ const char* format, ...) noexcept + { + #ifdef _DEBUG + va_list args; + va_start(args, format); + + char buff[1024] = {}; + vsprintf_s(buff, format, args); + OutputDebugStringA(buff); + va_end(args); + #else + UNREFERENCED_PARAMETER(format); + #endif + } + + + // Helper smart-pointers +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN10) || (defined(_XBOX_ONE) && defined(_TITLE)) || !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) + struct virtual_deleter { void operator()(void* p) noexcept { if (p) VirtualFree(p, 0, MEM_RELEASE); } }; +#endif + + struct aligned_deleter { void operator()(void* p) noexcept { _aligned_free(p); } }; + + struct handle_closer { void operator()(HANDLE h) noexcept { if (h) CloseHandle(h); } }; + + using ScopedHandle = std::unique_ptr; + + inline HANDLE safe_handle(HANDLE h) noexcept { return (h == INVALID_HANDLE_VALUE) ? nullptr : h; } +} diff --git a/Sdk/External/DirectXTK/Src/PrimitiveBatch.cpp b/Sdk/External/DirectXTK/Src/PrimitiveBatch.cpp new file mode 100644 index 0000000..c1c23d4 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/PrimitiveBatch.cpp @@ -0,0 +1,465 @@ +//-------------------------------------------------------------------------------------- +// File: PrimitiveBatch.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "PrimitiveBatch.h" +#include "DirectXHelpers.h" +#include "GraphicsMemory.h" +#include "PlatformHelpers.h" + +using namespace DirectX; +using namespace DirectX::Internal; +using Microsoft::WRL::ComPtr; + + +// Internal PrimitiveBatch implementation class. +class PrimitiveBatchBase::Impl +{ +public: + Impl(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices, size_t maxVertices, size_t vertexSize); + + void Begin(); + void End(); + + void Draw(D3D11_PRIMITIVE_TOPOLOGY topology, bool isIndexed, _In_opt_count_(indexCount) uint16_t const* indices, size_t indexCount, size_t vertexCount, _Out_ void** pMappedVertices); + +private: + void FlushBatch(); + +#if defined(_XBOX_ONE) && defined(_TITLE) + ComPtr mDeviceContext; +#else + ComPtr mDeviceContext; +#endif + ComPtr mIndexBuffer; + ComPtr mVertexBuffer; + + size_t mMaxIndices; + size_t mMaxVertices; + size_t mVertexSize; + + D3D11_PRIMITIVE_TOPOLOGY mCurrentTopology; + bool mInBeginEndPair; + bool mCurrentlyIndexed; + + size_t mCurrentIndex; + size_t mCurrentVertex; + + size_t mBaseIndex; + size_t mBaseVertex; + +#if defined(_XBOX_ONE) && defined(_TITLE) + void *grfxMemoryIB; + void *grfxMemoryVB; +#else + D3D11_MAPPED_SUBRESOURCE mMappedIndices; + D3D11_MAPPED_SUBRESOURCE mMappedVertices; +#endif +}; + + +namespace +{ + // Helper for creating a D3D vertex or index buffer. +#if defined(_XBOX_ONE) && defined(_TITLE) + void CreateDynamicBuffer(_In_ ID3D11DeviceX* device, uint32_t bufferSize, D3D11_BIND_FLAG bindFlag, _Outptr_ ID3D11Buffer** pBuffer) + { + D3D11_BUFFER_DESC desc = {}; + + desc.ByteWidth = bufferSize; + desc.BindFlags = bindFlag; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + ThrowIfFailed( + device->CreatePlacementBuffer(&desc, nullptr, pBuffer) + ); + + SetDebugObjectName(*pBuffer, "DirectXTK:PrimitiveBatch"); + } +#else + void CreateDynamicBuffer(_In_ ID3D11Device* device, uint32_t bufferSize, D3D11_BIND_FLAG bindFlag, _Outptr_ ID3D11Buffer** pBuffer) + { + D3D11_BUFFER_DESC desc = {}; + + desc.ByteWidth = bufferSize; + desc.BindFlags = bindFlag; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + ThrowIfFailed( + device->CreateBuffer(&desc, nullptr, pBuffer) + ); + + assert(pBuffer != nullptr && *pBuffer != nullptr); + _Analysis_assume_(pBuffer != nullptr && *pBuffer != nullptr); + + SetDebugObjectName(*pBuffer, "DirectXTK:PrimitiveBatch"); + } +#endif +} + + +// Constructor. +PrimitiveBatchBase::Impl::Impl(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices, size_t maxVertices, size_t vertexSize) + : mMaxIndices(maxIndices), + mMaxVertices(maxVertices), + mVertexSize(vertexSize), + mCurrentTopology(D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED), + mInBeginEndPair(false), + mCurrentlyIndexed(false), + mCurrentIndex(0), + mCurrentVertex(0), + mBaseIndex(0), + mBaseVertex(0), +#if defined(_XBOX_ONE) && defined(_TITLE) + grfxMemoryIB(nullptr), + grfxMemoryVB(nullptr) +#else + mMappedIndices{}, + mMappedVertices{} +#endif +{ + ComPtr device; + deviceContext->GetDevice(&device); + + if (!maxVertices) + throw std::exception("maxVertices must be greater than 0"); + + if (vertexSize > D3D11_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES) + throw std::exception("Vertex size is too large for DirectX 11"); + + uint64_t ibBytes = uint64_t(maxIndices) * sizeof(uint16_t); + if (ibBytes > uint64_t(D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u) + || ibBytes > UINT32_MAX) + throw std::exception("IB too large for DirectX 11"); + + uint64_t vbBytes = uint64_t(maxVertices) * uint64_t(vertexSize); + if (vbBytes > uint64_t(D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u) + || vbBytes > UINT32_MAX) + throw std::exception("VB too large for DirectX 11"); + +#if defined(_XBOX_ONE) && defined(_TITLE) + ThrowIfFailed(deviceContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(mDeviceContext.GetAddressOf()))); + + ComPtr deviceX; + ThrowIfFailed(device.As(&deviceX)); + + // If you only intend to draw non-indexed geometry, specify maxIndices = 0 to skip creating the index buffer. + if (maxIndices > 0) + { + CreateDynamicBuffer(deviceX.Get(), static_cast(ibBytes), D3D11_BIND_INDEX_BUFFER, &mIndexBuffer); + } + + // Create the vertex buffer. + CreateDynamicBuffer(deviceX.Get(), static_cast(vbBytes), D3D11_BIND_VERTEX_BUFFER, &mVertexBuffer); + + grfxMemoryIB = grfxMemoryVB = nullptr; +#else + mDeviceContext = deviceContext; + + // If you only intend to draw non-indexed geometry, specify maxIndices = 0 to skip creating the index buffer. + if (maxIndices > 0) + { + CreateDynamicBuffer(device.Get(), static_cast(ibBytes), D3D11_BIND_INDEX_BUFFER, &mIndexBuffer); + } + + // Create the vertex buffer. + CreateDynamicBuffer(device.Get(), static_cast(vbBytes), D3D11_BIND_VERTEX_BUFFER, &mVertexBuffer); +#endif +} + + +// Begins a batch of primitive drawing operations. +void PrimitiveBatchBase::Impl::Begin() +{ + if (mInBeginEndPair) + throw std::exception("Cannot nest Begin calls"); + +#if defined(_XBOX_ONE) && defined(_TITLE) + mDeviceContext->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); +#else + // Bind the index buffer. + if (mMaxIndices > 0) + { + mDeviceContext->IASetIndexBuffer(mIndexBuffer.Get(), DXGI_FORMAT_R16_UINT, 0); + } + + // Bind the vertex buffer. + auto vertexBuffer = mVertexBuffer.Get(); + UINT vertexStride = static_cast(mVertexSize); + UINT vertexOffset = 0; + + mDeviceContext->IASetVertexBuffers(0, 1, &vertexBuffer, &vertexStride, &vertexOffset); +#endif + + // If this is a deferred D3D context, reset position so the first Map calls will use D3D11_MAP_WRITE_DISCARD. + if (mDeviceContext->GetType() == D3D11_DEVICE_CONTEXT_DEFERRED) + { + mCurrentIndex = 0; + mCurrentVertex = 0; + } + + mInBeginEndPair = true; +} + + +// Ends a batch of primitive drawing operations. +void PrimitiveBatchBase::Impl::End() +{ + if (!mInBeginEndPair) + throw std::exception("Begin must be called before End"); + + FlushBatch(); + + mInBeginEndPair = false; +} + + +namespace +{ + // Can we combine adjacent primitives using this topology into a single draw call? + bool CanBatchPrimitives(D3D11_PRIMITIVE_TOPOLOGY topology) noexcept + { + switch (topology) + { + case D3D11_PRIMITIVE_TOPOLOGY_POINTLIST: + case D3D11_PRIMITIVE_TOPOLOGY_LINELIST: + case D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST: + // Lists can easily be merged. + return true; + + default: + // Strips cannot. + return false; + } + + // We could also merge indexed strips by inserting degenerates, + // but that's not always a perf win, so let's keep things simple. + } + + +#if !defined(_XBOX_ONE) || !defined(_TITLE) + // Helper for locking a vertex or index buffer. + void LockBuffer(_In_ ID3D11DeviceContext* deviceContext, _In_ ID3D11Buffer* buffer, size_t currentPosition, _Out_ size_t* basePosition, _Out_ D3D11_MAPPED_SUBRESOURCE* mappedResource) + { + D3D11_MAP mapType = (currentPosition == 0) ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE; + + ThrowIfFailed( + deviceContext->Map(buffer, 0, mapType, 0, mappedResource) + ); + + *basePosition = currentPosition; + } +#endif +} + + +// Adds new geometry to the batch. +_Use_decl_annotations_ +void PrimitiveBatchBase::Impl::Draw(D3D11_PRIMITIVE_TOPOLOGY topology, bool isIndexed, uint16_t const* indices, size_t indexCount, size_t vertexCount, void** pMappedVertices) +{ + if (isIndexed && !indices) + throw std::exception("Indices cannot be null"); + + if (indexCount >= mMaxIndices) + throw std::exception("Too many indices"); + + if (vertexCount >= mMaxVertices) + throw std::exception("Too many vertices"); + + if (!mInBeginEndPair) + throw std::exception("Begin must be called before Draw"); + + // Can we merge this primitive in with an existing batch, or must we flush first? + bool wrapIndexBuffer = (mCurrentIndex + indexCount > mMaxIndices); + bool wrapVertexBuffer = (mCurrentVertex + vertexCount > mMaxVertices); + + if ((topology != mCurrentTopology) || + (isIndexed != mCurrentlyIndexed) || + !CanBatchPrimitives(topology) || + wrapIndexBuffer || wrapVertexBuffer) + { + FlushBatch(); + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + if (mCurrentTopology == D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED) + { + auto& grfxMem = GraphicsMemory::Get(); + + if (isIndexed) + { + grfxMemoryIB = grfxMem.Allocate(mDeviceContext.Get(), mMaxIndices * sizeof(uint16_t), 64); + } + + grfxMemoryVB = grfxMem.Allocate(mDeviceContext.Get(), mMaxVertices * mVertexSize, 64); + + mCurrentTopology = topology; + mCurrentlyIndexed = isIndexed; + mCurrentIndex = mCurrentVertex = 0; + } + + // Copy over the index data. + if (isIndexed) + { + assert(grfxMemoryIB != nullptr); + auto outputIndices = reinterpret_cast(grfxMemoryIB) + mCurrentIndex; + + for (size_t i = 0; i < indexCount; i++) + { + outputIndices[i] = (uint16_t)(indices[i] + mCurrentVertex); + } + + mCurrentIndex += indexCount; + } + + // Return the output vertex data location. + assert(grfxMemoryVB != nullptr); + *pMappedVertices = reinterpret_cast(grfxMemoryVB) + (mCurrentVertex * mVertexSize); + + mCurrentVertex += vertexCount; +#else + if (wrapIndexBuffer) + mCurrentIndex = 0; + + if (wrapVertexBuffer) + mCurrentVertex = 0; + + // If we are not already in a batch, lock the buffers. + if (mCurrentTopology == D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED) + { + if (isIndexed) + { + LockBuffer(mDeviceContext.Get(), mIndexBuffer.Get(), mCurrentIndex, &mBaseIndex, &mMappedIndices); + } + + LockBuffer(mDeviceContext.Get(), mVertexBuffer.Get(), mCurrentVertex, &mBaseVertex, &mMappedVertices); + + mCurrentTopology = topology; + mCurrentlyIndexed = isIndexed; + } + + // Copy over the index data. + if (isIndexed) + { + auto outputIndices = static_cast(mMappedIndices.pData) + mCurrentIndex; + + for (size_t i = 0; i < indexCount; i++) + { + outputIndices[i] = static_cast(indices[i] + mCurrentVertex - mBaseVertex); + } + + mCurrentIndex += indexCount; + } + + // Return the output vertex data location. + *pMappedVertices = static_cast(mMappedVertices.pData) + (mCurrentVertex * mVertexSize); + + mCurrentVertex += vertexCount; +#endif +} + + +// Sends queued primitives to the graphics device. +void PrimitiveBatchBase::Impl::FlushBatch() +{ + // Early out if there is nothing to flush. + if (mCurrentTopology == D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED) + return; + + mDeviceContext->IASetPrimitiveTopology(mCurrentTopology); + +#if defined(_XBOX_ONE) && defined(_TITLE) + if (mCurrentlyIndexed) + { + // Draw indexed geometry. + mDeviceContext->IASetPlacementIndexBuffer(mIndexBuffer.Get(), grfxMemoryIB, DXGI_FORMAT_R16_UINT); + mDeviceContext->IASetPlacementVertexBuffer(0, mVertexBuffer.Get(), grfxMemoryVB, (UINT)mVertexSize); + + mDeviceContext->DrawIndexed((UINT)mCurrentIndex, 0, 0); + } + else + { + // Draw non-indexed geometry. + mDeviceContext->IASetPlacementVertexBuffer(0, mVertexBuffer.Get(), grfxMemoryVB, (UINT)mVertexSize); + + mDeviceContext->Draw((UINT)mCurrentVertex, 0); + } + + grfxMemoryIB = grfxMemoryVB = nullptr; +#else + mDeviceContext->Unmap(mVertexBuffer.Get(), 0); + + if (mCurrentlyIndexed) + { + // Draw indexed geometry. + mDeviceContext->Unmap(mIndexBuffer.Get(), 0); + + mDeviceContext->DrawIndexed( + static_cast(mCurrentIndex - mBaseIndex), + static_cast(mBaseIndex), + static_cast(mBaseVertex)); + } + else + { + // Draw non-indexed geometry. + mDeviceContext->Draw(static_cast(mCurrentVertex - mBaseVertex), static_cast(mBaseVertex)); + } +#endif + + mCurrentTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; +} + + +// Public constructor. +PrimitiveBatchBase::PrimitiveBatchBase(_In_ ID3D11DeviceContext* deviceContext, size_t maxIndices, size_t maxVertices, size_t vertexSize) + : pImpl(std::make_unique(deviceContext, maxIndices, maxVertices, vertexSize)) +{ +} + + +// Move constructor. +PrimitiveBatchBase::PrimitiveBatchBase(PrimitiveBatchBase&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +PrimitiveBatchBase& PrimitiveBatchBase::operator= (PrimitiveBatchBase&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +PrimitiveBatchBase::~PrimitiveBatchBase() +{ +} + + +void PrimitiveBatchBase::Begin() +{ + pImpl->Begin(); +} + + +void PrimitiveBatchBase::End() +{ + pImpl->End(); +} + + +_Use_decl_annotations_ +void PrimitiveBatchBase::Draw(D3D11_PRIMITIVE_TOPOLOGY topology, bool isIndexed, uint16_t const* indices, size_t indexCount, size_t vertexCount, void** pMappedVertices) +{ + pImpl->Draw(topology, isIndexed, indices, indexCount, vertexCount, pMappedVertices); +} diff --git a/Sdk/External/DirectXTK/Src/SDKMesh.h b/Sdk/External/DirectXTK/Src/SDKMesh.h new file mode 100644 index 0000000..54c11c8 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/SDKMesh.h @@ -0,0 +1,338 @@ +//-------------------------------------------------------------------------------------- +// File: SDKMesh.h +// +// SDKMESH format is generated by the legacy DirectX SDK's Content Exporter and +// originally rendered by the DXUT helper class SDKMesh +// +// http://go.microsoft.com/fwlink/?LinkId=226208 +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include + +namespace DXUT +{ + // .SDKMESH files + + // SDKMESH_HEADER + // SDKMESH_VERTEX_BUFFER_HEADER header->VertexStreamHeadersOffset + // SDKMESH_INDEX_BUFFER_HEADER header->IndexStreamHeadersOffset + // SDKMESH_MESH header->MeshDataOffset + // SDKMESH_SUBSET header->SubsetDataOffset + // SDKMESH_FRAME header->FrameDataOffset + // SDKMESH_MATERIAL header->MaterialDataOffset + // [header->NonBufferDataSize] + // { [ header->NumVertexBuffers] + // VB data + // } + // { [ header->NumIndexBuffers] + // IB data + // } + + + // .SDDKANIM files + + // SDKANIMATION_FILE_HEADER + // uint8_t[] - Length of fileheader->AnimationDataSize + + // .SDKMESH uses Direct3D 9 decls, but only a subset of these is ever generated by the legacy DirectX SDK Content Exporter + + // D3DDECLUSAGE_POSITION / D3DDECLTYPE_FLOAT3 + // (D3DDECLUSAGE_BLENDWEIGHT / D3DDECLTYPE_UBYTE4N + // D3DDECLUSAGE_BLENDINDICES / D3DDECLTYPE_UBYTE4)? + // (D3DDECLUSAGE_NORMAL / D3DDECLTYPE_FLOAT3, D3DDECLTYPE_FLOAT16_4, D3DDECLTYPE_SHORT4N, D3DDECLTYPE_UBYTE4N, or D3DDECLTYPE_DEC3N)? + // (D3DDECLUSAGE_COLOR / D3DDECLTYPE_D3DCOLOR)? + // (D3DDECLUSAGE_TEXCOORD / D3DDECLTYPE_FLOAT1, D3DDECLTYPE_FLOAT2 or D3DDECLTYPE_FLOAT16_2, D3DDECLTYPE_FLOAT3 or D3DDECLTYPE_FLOAT16_4, D3DDECLTYPE_FLOAT4 or D3DDECLTYPE_FLOAT16_4)* + // (D3DDECLUSAGE_TANGENT / same as D3DDECLUSAGE_NORMAL)? + // (D3DDECLUSAGE_BINORMAL / same as D3DDECLUSAGE_NORMAL)? + + enum D3DDECLUSAGE + { + D3DDECLUSAGE_POSITION = 0, + D3DDECLUSAGE_BLENDWEIGHT =1, + D3DDECLUSAGE_BLENDINDICES =2, + D3DDECLUSAGE_NORMAL =3, + D3DDECLUSAGE_TEXCOORD = 5, + D3DDECLUSAGE_TANGENT = 6, + D3DDECLUSAGE_BINORMAL = 7, + D3DDECLUSAGE_COLOR = 10, + }; + + enum D3DDECLTYPE + { + D3DDECLTYPE_FLOAT1 = 0, // 1D float expanded to (value, 0., 0., 1.) + D3DDECLTYPE_FLOAT2 = 1, // 2D float expanded to (value, value, 0., 1.) + D3DDECLTYPE_FLOAT3 = 2, // 3D float expanded to (value, value, value, 1.) + D3DDECLTYPE_FLOAT4 = 3, // 4D float + D3DDECLTYPE_D3DCOLOR = 4, // 4D packed unsigned bytes mapped to 0. to 1. range + // Input is in D3DCOLOR format (ARGB) expanded to (R, G, B, A) + D3DDECLTYPE_UBYTE4 = 5, // 4D unsigned uint8_t + D3DDECLTYPE_UBYTE4N = 8, // Each of 4 bytes is normalized by dividing to 255.0 + D3DDECLTYPE_SHORT4N = 10, // 4D signed short normalized (v[0]/32767.0,v[1]/32767.0,v[2]/32767.0,v[3]/32767.0) + D3DDECLTYPE_DEC3N = 14, // 3D signed normalized (v[0]/511.0, v[1]/511.0, v[2]/511.0, 1.) + // Note: There is no equivalent to D3DDECLTYPE_DEC3N (14) as a DXGI_FORMAT + D3DDECLTYPE_FLOAT16_2 = 15, // Two 16-bit floating point values, expanded to (value, value, 0, 1) + D3DDECLTYPE_FLOAT16_4 = 16, // Four 16-bit floating point values + + D3DDECLTYPE_UNUSED = 17, // When the type field in a decl is unused. + + // These are extensions for DXGI-based versions of Direct3D + D3DDECLTYPE_DXGI_R10G10B10A2_UNORM = 32 + DXGI_FORMAT_R10G10B10A2_UNORM, + D3DDECLTYPE_DXGI_R11G11B10_FLOAT = 32 + DXGI_FORMAT_R11G11B10_FLOAT, + D3DDECLTYPE_DXGI_R8G8B8A8_SNORM = 32 + DXGI_FORMAT_R8G8B8A8_SNORM, + }; + + #pragma pack(push,4) + + struct D3DVERTEXELEMENT9 + { + uint16_t Stream; // Stream index + uint16_t Offset; // Offset in the stream in bytes + uint8_t Type; // Data type + uint8_t Method; // Processing method + uint8_t Usage; // Semantics + uint8_t UsageIndex; // Semantic index + }; + + #pragma pack(pop) + + //-------------------------------------------------------------------------------------- + // Hard Defines for the various structures + //-------------------------------------------------------------------------------------- + constexpr uint32_t SDKMESH_FILE_VERSION = 101; + constexpr uint32_t SDKMESH_FILE_VERSION_V2 = 200; + + constexpr uint32_t MAX_VERTEX_ELEMENTS = 32; + constexpr uint32_t MAX_VERTEX_STREAMS = 16; + constexpr uint32_t MAX_FRAME_NAME = 100; + constexpr uint32_t MAX_MESH_NAME = 100; + constexpr uint32_t MAX_SUBSET_NAME = 100; + constexpr uint32_t MAX_MATERIAL_NAME = 100; + constexpr uint32_t MAX_TEXTURE_NAME = MAX_PATH; + constexpr uint32_t MAX_MATERIAL_PATH = MAX_PATH; + constexpr uint32_t INVALID_FRAME = uint32_t(-1); + constexpr uint32_t INVALID_MESH = uint32_t(-1); + constexpr uint32_t INVALID_MATERIAL = uint32_t(-1); + constexpr uint32_t INVALID_SUBSET = uint32_t(-1); + constexpr uint32_t INVALID_ANIMATION_DATA = uint32_t(-1); + + //-------------------------------------------------------------------------------------- + // Enumerated Types. + //-------------------------------------------------------------------------------------- + enum SDKMESH_PRIMITIVE_TYPE + { + PT_TRIANGLE_LIST = 0, + PT_TRIANGLE_STRIP, + PT_LINE_LIST, + PT_LINE_STRIP, + PT_POINT_LIST, + PT_TRIANGLE_LIST_ADJ, + PT_TRIANGLE_STRIP_ADJ, + PT_LINE_LIST_ADJ, + PT_LINE_STRIP_ADJ, + PT_QUAD_PATCH_LIST, + PT_TRIANGLE_PATCH_LIST, + }; + + enum SDKMESH_INDEX_TYPE + { + IT_16BIT = 0, + IT_32BIT, + }; + + enum FRAME_TRANSFORM_TYPE + { + FTT_RELATIVE = 0, + FTT_ABSOLUTE, //This is not currently used but is here to support absolute transformations in the future + }; + + //-------------------------------------------------------------------------------------- + // Structures. + //-------------------------------------------------------------------------------------- + #pragma pack(push,8) + + struct SDKMESH_HEADER + { + //Basic Info and sizes + uint32_t Version; + uint8_t IsBigEndian; + uint64_t HeaderSize; + uint64_t NonBufferDataSize; + uint64_t BufferDataSize; + + //Stats + uint32_t NumVertexBuffers; + uint32_t NumIndexBuffers; + uint32_t NumMeshes; + uint32_t NumTotalSubsets; + uint32_t NumFrames; + uint32_t NumMaterials; + + //Offsets to Data + uint64_t VertexStreamHeadersOffset; + uint64_t IndexStreamHeadersOffset; + uint64_t MeshDataOffset; + uint64_t SubsetDataOffset; + uint64_t FrameDataOffset; + uint64_t MaterialDataOffset; + }; + + struct SDKMESH_VERTEX_BUFFER_HEADER + { + uint64_t NumVertices; + uint64_t SizeBytes; + uint64_t StrideBytes; + D3DVERTEXELEMENT9 Decl[MAX_VERTEX_ELEMENTS]; + uint64_t DataOffset; + }; + + struct SDKMESH_INDEX_BUFFER_HEADER + { + uint64_t NumIndices; + uint64_t SizeBytes; + uint32_t IndexType; + uint64_t DataOffset; + }; + + struct SDKMESH_MESH + { + char Name[MAX_MESH_NAME]; + uint8_t NumVertexBuffers; + uint32_t VertexBuffers[MAX_VERTEX_STREAMS]; + uint32_t IndexBuffer; + uint32_t NumSubsets; + uint32_t NumFrameInfluences; //aka bones + + DirectX::XMFLOAT3 BoundingBoxCenter; + DirectX::XMFLOAT3 BoundingBoxExtents; + + union + { + uint64_t SubsetOffset; + INT* pSubsets; + }; + union + { + uint64_t FrameInfluenceOffset; + uint32_t* pFrameInfluences; + }; + }; + + struct SDKMESH_SUBSET + { + char Name[MAX_SUBSET_NAME]; + uint32_t MaterialID; + uint32_t PrimitiveType; + uint64_t IndexStart; + uint64_t IndexCount; + uint64_t VertexStart; + uint64_t VertexCount; + }; + + struct SDKMESH_FRAME + { + char Name[MAX_FRAME_NAME]; + uint32_t Mesh; + uint32_t ParentFrame; + uint32_t ChildFrame; + uint32_t SiblingFrame; + DirectX::XMFLOAT4X4 Matrix; + uint32_t AnimationDataIndex; //Used to index which set of keyframes transforms this frame + }; + + struct SDKMESH_MATERIAL + { + char Name[MAX_MATERIAL_NAME]; + + // Use MaterialInstancePath + char MaterialInstancePath[MAX_MATERIAL_PATH]; + + // Or fall back to d3d8-type materials + char DiffuseTexture[MAX_TEXTURE_NAME]; + char NormalTexture[MAX_TEXTURE_NAME]; + char SpecularTexture[MAX_TEXTURE_NAME]; + + DirectX::XMFLOAT4 Diffuse; + DirectX::XMFLOAT4 Ambient; + DirectX::XMFLOAT4 Specular; + DirectX::XMFLOAT4 Emissive; + float Power; + + uint64_t Force64_1; + uint64_t Force64_2; + uint64_t Force64_3; + uint64_t Force64_4; + uint64_t Force64_5; + uint64_t Force64_6; + }; + + struct SDKMESH_MATERIAL_V2 + { + char Name[MAX_MATERIAL_NAME]; + + // PBR materials + char RMATexture[MAX_TEXTURE_NAME]; + char AlbetoTexture[MAX_TEXTURE_NAME]; + char NormalTexture[MAX_TEXTURE_NAME]; + char EmissiveTexture[MAX_TEXTURE_NAME]; + + float Alpha; + + char Reserved[60]; + + uint64_t Force64_1; + uint64_t Force64_2; + uint64_t Force64_3; + uint64_t Force64_4; + uint64_t Force64_5; + uint64_t Force64_6; + }; + + struct SDKANIMATION_FILE_HEADER + { + uint32_t Version; + uint8_t IsBigEndian; + uint32_t FrameTransformType; + uint32_t NumFrames; + uint32_t NumAnimationKeys; + uint32_t AnimationFPS; + uint64_t AnimationDataSize; + uint64_t AnimationDataOffset; + }; + + struct SDKANIMATION_DATA + { + DirectX::XMFLOAT3 Translation; + DirectX::XMFLOAT4 Orientation; + DirectX::XMFLOAT3 Scaling; + }; + + struct SDKANIMATION_FRAME_DATA + { + char FrameName[MAX_FRAME_NAME]; + uint64_t DataOffset; + }; + + #pragma pack(pop) + +} // namespace + +static_assert( sizeof(DXUT::D3DVERTEXELEMENT9) == 8, "Direct3D9 Decl structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_HEADER)== 104, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_VERTEX_BUFFER_HEADER) == 288, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_INDEX_BUFFER_HEADER) == 32, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_MESH) == 224, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_SUBSET) == 144, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_FRAME) == 184, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_MATERIAL) == 1256, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKMESH_MATERIAL_V2) == sizeof(DXUT::SDKMESH_MATERIAL), "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKANIMATION_FILE_HEADER) == 40, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKANIMATION_DATA) == 40, "SDK Mesh structure size incorrect" ); +static_assert( sizeof(DXUT::SDKANIMATION_FRAME_DATA) == 112, "SDK Mesh structure size incorrect" ); diff --git a/Sdk/External/DirectXTK/Src/ScreenGrab.cpp b/Sdk/External/DirectXTK/Src/ScreenGrab.cpp new file mode 100644 index 0000000..8ab78b3 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/ScreenGrab.cpp @@ -0,0 +1,678 @@ +//-------------------------------------------------------------------------------------- +// File: ScreenGrab.cpp +// +// Function for capturing a 2D texture and saving it to a file (aka a 'screenshot' +// when used on a Direct3D Render Target). +// +// Note these functions are useful as a light-weight runtime screen grabber. For +// full-featured texture capture, DDS writer, and texture processing pipeline, +// see the 'Texconv' sample and the 'DirectXTex' library. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +// Does not capture 1D textures or 3D textures (volume maps) + +// Does not capture mipmap chains, only the top-most texture level is saved + +// For 2D array textures and cubemaps, it captures only the first image in the array + +#include "pch.h" + +#include "ScreenGrab.h" +#include "DirectXHelpers.h" + +#include "PlatformHelpers.h" +#include "DDS.h" +#include "LoaderHelpers.h" + +using Microsoft::WRL::ComPtr; +using namespace DirectX; +using namespace DirectX::LoaderHelpers; + +namespace +{ + //-------------------------------------------------------------------------------------- + HRESULT CaptureTexture( + _In_ ID3D11DeviceContext* pContext, + _In_ ID3D11Resource* pSource, + D3D11_TEXTURE2D_DESC& desc, + ComPtr& pStaging) noexcept + { + if (!pContext || !pSource) + return E_INVALIDARG; + + D3D11_RESOURCE_DIMENSION resType = D3D11_RESOURCE_DIMENSION_UNKNOWN; + pSource->GetType(&resType); + + if (resType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) + { + DebugTrace("ERROR: ScreenGrab does not support 1D or volume textures. Consider using DirectXTex instead.\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + ComPtr pTexture; + HRESULT hr = pSource->QueryInterface(IID_GRAPHICS_PPV_ARGS(pTexture.GetAddressOf())); + if (FAILED(hr)) + return hr; + + assert(pTexture); + + pTexture->GetDesc(&desc); + + if (desc.ArraySize > 1 || desc.MipLevels > 1) + { + DebugTrace("WARNING: ScreenGrab does not support 2D arrays, cubemaps, or mipmaps; only the first surface is written. Consider using DirectXTex instead.\n"); + } + + ComPtr d3dDevice; + pContext->GetDevice(d3dDevice.GetAddressOf()); + + if (desc.SampleDesc.Count > 1) + { + // MSAA content must be resolved before being copied to a staging texture + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + + ComPtr pTemp; + hr = d3dDevice->CreateTexture2D(&desc, nullptr, pTemp.GetAddressOf()); + if (FAILED(hr)) + return hr; + + assert(pTemp); + + DXGI_FORMAT fmt = EnsureNotTypeless(desc.Format); + + UINT support = 0; + hr = d3dDevice->CheckFormatSupport(fmt, &support); + if (FAILED(hr)) + return hr; + + if (!(support & D3D11_FORMAT_SUPPORT_MULTISAMPLE_RESOLVE)) + return E_FAIL; + + for (UINT item = 0; item < desc.ArraySize; ++item) + { + for (UINT level = 0; level < desc.MipLevels; ++level) + { + UINT index = D3D11CalcSubresource(level, item, desc.MipLevels); + pContext->ResolveSubresource(pTemp.Get(), index, pSource, index, fmt); + } + } + + desc.BindFlags = 0; + desc.MiscFlags &= D3D11_RESOURCE_MISC_TEXTURECUBE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.Usage = D3D11_USAGE_STAGING; + + hr = d3dDevice->CreateTexture2D(&desc, nullptr, pStaging.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + return hr; + + assert(pStaging); + + pContext->CopyResource(pStaging.Get(), pTemp.Get()); + } + else if ((desc.Usage == D3D11_USAGE_STAGING) && (desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ)) + { + // Handle case where the source is already a staging texture we can use directly + pStaging = pTexture; + } + else + { + // Otherwise, create a staging texture from the non-MSAA source + desc.BindFlags = 0; + desc.MiscFlags &= D3D11_RESOURCE_MISC_TEXTURECUBE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.Usage = D3D11_USAGE_STAGING; + + hr = d3dDevice->CreateTexture2D(&desc, nullptr, pStaging.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + return hr; + + assert(pStaging); + + pContext->CopyResource(pStaging.Get(), pSource); + } + + #if defined(_XBOX_ONE) && defined(_TITLE) + + if (d3dDevice->GetCreationFlags() & D3D11_CREATE_DEVICE_IMMEDIATE_CONTEXT_FAST_SEMANTICS) + { + ComPtr d3dDeviceX; + hr = d3dDevice.As(&d3dDeviceX); + if (FAILED(hr)) + return hr; + + ComPtr d3dContextX; + hr = pContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(d3dContextX.GetAddressOf())); + if (FAILED(hr)) + return hr; + + UINT64 copyFence = d3dContextX->InsertFence(0); + + while (d3dDeviceX->IsFencePending(copyFence)) + { + SwitchToThread(); + } + } + + #endif + + return S_OK; + } +} // anonymous namespace + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::SaveDDSTextureToFile( + ID3D11DeviceContext* pContext, + ID3D11Resource* pSource, + const wchar_t* fileName) noexcept +{ + if (!fileName) + return E_INVALIDARG; + + D3D11_TEXTURE2D_DESC desc = {}; + ComPtr pStaging; + HRESULT hr = CaptureTexture(pContext, pSource, desc, pStaging); + if (FAILED(hr)) + return hr; + + // Create file +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) + ScopedHandle hFile(safe_handle(CreateFile2(fileName, GENERIC_WRITE | DELETE, 0, CREATE_ALWAYS, nullptr))); +#else + ScopedHandle hFile(safe_handle(CreateFileW(fileName, GENERIC_WRITE | DELETE, 0, nullptr, CREATE_ALWAYS, 0, nullptr))); +#endif + if (!hFile) + return HRESULT_FROM_WIN32(GetLastError()); + + auto_delete_file delonfail(hFile.get()); + + // Setup header + const size_t MAX_HEADER_SIZE = sizeof(uint32_t) + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_DXT10); + uint8_t fileHeader[MAX_HEADER_SIZE] = {}; + + *reinterpret_cast(&fileHeader[0]) = DDS_MAGIC; + + auto header = reinterpret_cast(&fileHeader[0] + sizeof(uint32_t)); + size_t headerSize = sizeof(uint32_t) + sizeof(DDS_HEADER); + header->size = sizeof(DDS_HEADER); + header->flags = DDS_HEADER_FLAGS_TEXTURE | DDS_HEADER_FLAGS_MIPMAP; + header->height = desc.Height; + header->width = desc.Width; + header->mipMapCount = 1; + header->caps = DDS_SURFACE_FLAGS_TEXTURE; + + // Try to use a legacy .DDS pixel format for better tools support, otherwise fallback to 'DX10' header extension + DDS_HEADER_DXT10* extHeader = nullptr; + switch (desc.Format) + { + case DXGI_FORMAT_R8G8B8A8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_A8B8G8R8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R16G16_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_G16R16, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R8G8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_A8L8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R16_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_L16, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_L8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_A8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_A8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R8G8_B8G8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_R8G8_B8G8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_G8R8_G8B8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_G8R8_G8B8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC1_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_DXT1, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC2_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_DXT3, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC3_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_DXT5, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC4_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_BC4_UNORM, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC4_SNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_BC4_SNORM, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC5_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_BC5_UNORM, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_BC5_SNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_BC5_SNORM, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_B5G6R5_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_R5G6B5, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_B5G5R5A1_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_A1R5G5B5, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R8G8_SNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_V8U8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R8G8B8A8_SNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_Q8W8V8U8, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_R16G16_SNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_V16U16, sizeof(DDS_PIXELFORMAT)); break; + case DXGI_FORMAT_B8G8R8A8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_A8R8G8B8, sizeof(DDS_PIXELFORMAT)); break; // DXGI 1.1 + case DXGI_FORMAT_B8G8R8X8_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_X8R8G8B8, sizeof(DDS_PIXELFORMAT)); break; // DXGI 1.1 + case DXGI_FORMAT_YUY2: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_YUY2, sizeof(DDS_PIXELFORMAT)); break; // DXGI 1.2 + case DXGI_FORMAT_B4G4R4A4_UNORM: memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_A4R4G4B4, sizeof(DDS_PIXELFORMAT)); break; // DXGI 1.2 + + // Legacy D3DX formats using D3DFMT enum value as FourCC + case DXGI_FORMAT_R32G32B32A32_FLOAT: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 116; break; // D3DFMT_A32B32G32R32F + case DXGI_FORMAT_R16G16B16A16_FLOAT: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 113; break; // D3DFMT_A16B16G16R16F + case DXGI_FORMAT_R16G16B16A16_UNORM: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 36; break; // D3DFMT_A16B16G16R16 + case DXGI_FORMAT_R16G16B16A16_SNORM: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 110; break; // D3DFMT_Q16W16V16U16 + case DXGI_FORMAT_R32G32_FLOAT: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 115; break; // D3DFMT_G32R32F + case DXGI_FORMAT_R16G16_FLOAT: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 112; break; // D3DFMT_G16R16F + case DXGI_FORMAT_R32_FLOAT: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 114; break; // D3DFMT_R32F + case DXGI_FORMAT_R16_FLOAT: header->ddspf.size = sizeof(DDS_PIXELFORMAT); header->ddspf.flags = DDS_FOURCC; header->ddspf.fourCC = 111; break; // D3DFMT_R16F + + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: + case DXGI_FORMAT_A8P8: + DebugTrace("ERROR: ScreenGrab does not support video textures. Consider using DirectXTex.\n"); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + + default: + memcpy_s(&header->ddspf, sizeof(header->ddspf), &DDSPF_DX10, sizeof(DDS_PIXELFORMAT)); + + headerSize += sizeof(DDS_HEADER_DXT10); + extHeader = reinterpret_cast(fileHeader + sizeof(uint32_t) + sizeof(DDS_HEADER)); + extHeader->dxgiFormat = desc.Format; + extHeader->resourceDimension = D3D11_RESOURCE_DIMENSION_TEXTURE2D; + extHeader->arraySize = 1; + break; + } + + size_t rowPitch, slicePitch, rowCount; + hr = GetSurfaceInfo(desc.Width, desc.Height, desc.Format, &slicePitch, &rowPitch, &rowCount); + if (FAILED(hr)) + return hr; + + if (rowPitch > UINT32_MAX || slicePitch > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + + if (IsCompressed(desc.Format)) + { + header->flags |= DDS_HEADER_FLAGS_LINEARSIZE; + header->pitchOrLinearSize = static_cast(slicePitch); + } + else + { + header->flags |= DDS_HEADER_FLAGS_PITCH; + header->pitchOrLinearSize = static_cast(rowPitch); + } + + // Setup pixels + std::unique_ptr pixels(new (std::nothrow) uint8_t[slicePitch]); + if (!pixels) + return E_OUTOFMEMORY; + + D3D11_MAPPED_SUBRESOURCE mapped; + hr = pContext->Map(pStaging.Get(), 0, D3D11_MAP_READ, 0, &mapped); + if (FAILED(hr)) + return hr; + + auto sptr = static_cast(mapped.pData); + if (!sptr) + { + pContext->Unmap(pStaging.Get(), 0); + return E_POINTER; + } + + uint8_t* dptr = pixels.get(); + + size_t msize = std::min(rowPitch, mapped.RowPitch); + for (size_t h = 0; h < rowCount; ++h) + { + memcpy_s(dptr, rowPitch, sptr, msize); + sptr += mapped.RowPitch; + dptr += rowPitch; + } + + pContext->Unmap(pStaging.Get(), 0); + + // Write header & pixels + DWORD bytesWritten; + if (!WriteFile(hFile.get(), fileHeader, static_cast(headerSize), &bytesWritten, nullptr)) + return HRESULT_FROM_WIN32(GetLastError()); + + if (bytesWritten != headerSize) + return E_FAIL; + + if (!WriteFile(hFile.get(), pixels.get(), static_cast(slicePitch), &bytesWritten, nullptr)) + return HRESULT_FROM_WIN32(GetLastError()); + + if (bytesWritten != slicePitch) + return E_FAIL; + + delonfail.clear(); + + return S_OK; +} + +//-------------------------------------------------------------------------------------- +namespace DirectX +{ + extern bool _IsWIC2() noexcept; + extern IWICImagingFactory* _GetWIC() noexcept; +} + +_Use_decl_annotations_ +HRESULT DirectX::SaveWICTextureToFile( + ID3D11DeviceContext* pContext, + ID3D11Resource* pSource, + REFGUID guidContainerFormat, + const wchar_t* fileName, + const GUID* targetFormat, + std::function setCustomProps, + bool forceSRGB) +{ + if (!fileName) + return E_INVALIDARG; + + D3D11_TEXTURE2D_DESC desc = {}; + ComPtr pStaging; + HRESULT hr = CaptureTexture(pContext, pSource, desc, pStaging); + if (FAILED(hr)) + return hr; + + // Determine source format's WIC equivalent + WICPixelFormatGUID pfGuid = {}; + bool sRGB = forceSRGB; + switch (desc.Format) + { + case DXGI_FORMAT_R32G32B32A32_FLOAT: pfGuid = GUID_WICPixelFormat128bppRGBAFloat; break; + case DXGI_FORMAT_R16G16B16A16_FLOAT: pfGuid = GUID_WICPixelFormat64bppRGBAHalf; break; + case DXGI_FORMAT_R16G16B16A16_UNORM: pfGuid = GUID_WICPixelFormat64bppRGBA; break; + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: pfGuid = GUID_WICPixelFormat32bppRGBA1010102XR; break; // DXGI 1.1 + case DXGI_FORMAT_R10G10B10A2_UNORM: pfGuid = GUID_WICPixelFormat32bppRGBA1010102; break; + case DXGI_FORMAT_B5G5R5A1_UNORM: pfGuid = GUID_WICPixelFormat16bppBGRA5551; break; + case DXGI_FORMAT_B5G6R5_UNORM: pfGuid = GUID_WICPixelFormat16bppBGR565; break; + case DXGI_FORMAT_R32_FLOAT: pfGuid = GUID_WICPixelFormat32bppGrayFloat; break; + case DXGI_FORMAT_R16_FLOAT: pfGuid = GUID_WICPixelFormat16bppGrayHalf; break; + case DXGI_FORMAT_R16_UNORM: pfGuid = GUID_WICPixelFormat16bppGray; break; + case DXGI_FORMAT_R8_UNORM: pfGuid = GUID_WICPixelFormat8bppGray; break; + case DXGI_FORMAT_A8_UNORM: pfGuid = GUID_WICPixelFormat8bppAlpha; break; + + case DXGI_FORMAT_R8G8B8A8_UNORM: + pfGuid = GUID_WICPixelFormat32bppRGBA; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + pfGuid = GUID_WICPixelFormat32bppRGBA; + sRGB = true; + break; + + case DXGI_FORMAT_B8G8R8A8_UNORM: // DXGI 1.1 + pfGuid = GUID_WICPixelFormat32bppBGRA; + break; + + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: // DXGI 1.1 + pfGuid = GUID_WICPixelFormat32bppBGRA; + sRGB = true; + break; + + case DXGI_FORMAT_B8G8R8X8_UNORM: // DXGI 1.1 + pfGuid = GUID_WICPixelFormat32bppBGR; + break; + + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: // DXGI 1.1 + pfGuid = GUID_WICPixelFormat32bppBGR; + sRGB = true; + break; + + default: + DebugTrace("ERROR: ScreenGrab does not support all DXGI formats (%u). Consider using DirectXTex.\n", static_cast(desc.Format)); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + ComPtr stream; + hr = pWIC->CreateStream(stream.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = stream->InitializeFromFilename(fileName, GENERIC_WRITE); + if (FAILED(hr)) + return hr; + + auto_delete_file_wic delonfail(stream, fileName); + + ComPtr encoder; + hr = pWIC->CreateEncoder(guidContainerFormat, nullptr, encoder.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = encoder->Initialize(stream.Get(), WICBitmapEncoderNoCache); + if (FAILED(hr)) + return hr; + + ComPtr frame; + ComPtr props; + hr = encoder->CreateNewFrame(frame.GetAddressOf(), props.GetAddressOf()); + if (FAILED(hr)) + return hr; + + if (targetFormat && memcmp(&guidContainerFormat, &GUID_ContainerFormatBmp, sizeof(WICPixelFormatGUID)) == 0 && _IsWIC2()) + { + // Opt-in to the WIC2 support for writing 32-bit Windows BMP files with an alpha channel + PROPBAG2 option = {}; + option.pstrName = const_cast(L"EnableV5Header32bppBGRA"); + + VARIANT varValue; + varValue.vt = VT_BOOL; + varValue.boolVal = VARIANT_TRUE; + (void)props->Write(1, &option, &varValue); + } + + if (setCustomProps) + { + setCustomProps(props.Get()); + } + + hr = frame->Initialize(props.Get()); + if (FAILED(hr)) + return hr; + + hr = frame->SetSize(desc.Width, desc.Height); + if (FAILED(hr)) + return hr; + + hr = frame->SetResolution(72, 72); + if (FAILED(hr)) + return hr; + + // Pick a target format + WICPixelFormatGUID targetGuid = {}; + if (targetFormat) + { + targetGuid = *targetFormat; + } + else + { + // Screenshots don't typically include the alpha channel of the render target + switch (desc.Format) + { + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + if (_IsWIC2()) + { + targetGuid = GUID_WICPixelFormat96bppRGBFloat; + } + else + { + targetGuid = GUID_WICPixelFormat24bppBGR; + } + break; + #endif + + case DXGI_FORMAT_R16G16B16A16_UNORM: targetGuid = GUID_WICPixelFormat48bppBGR; break; + case DXGI_FORMAT_B5G5R5A1_UNORM: targetGuid = GUID_WICPixelFormat16bppBGR555; break; + case DXGI_FORMAT_B5G6R5_UNORM: targetGuid = GUID_WICPixelFormat16bppBGR565; break; + + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_A8_UNORM: + targetGuid = GUID_WICPixelFormat8bppGray; + break; + + default: + targetGuid = GUID_WICPixelFormat24bppBGR; + break; + } + } + + hr = frame->SetPixelFormat(&targetGuid); + if (FAILED(hr)) + return hr; + + if (targetFormat && memcmp(targetFormat, &targetGuid, sizeof(WICPixelFormatGUID)) != 0) + { + // Requested output pixel format is not supported by the WIC codec + return E_FAIL; + } + + // Encode WIC metadata + ComPtr metawriter; + if (SUCCEEDED(frame->GetMetadataQueryWriter(metawriter.GetAddressOf()))) + { + PROPVARIANT value; + PropVariantInit(&value); + + value.vt = VT_LPSTR; + value.pszVal = const_cast("DirectXTK"); + + if (memcmp(&guidContainerFormat, &GUID_ContainerFormatPng, sizeof(GUID)) == 0) + { + // Set Software name + (void)metawriter->SetMetadataByName(L"/tEXt/{str=Software}", &value); + + // Set sRGB chunk + if (sRGB) + { + value.vt = VT_UI1; + value.bVal = 0; + (void)metawriter->SetMetadataByName(L"/sRGB/RenderingIntent", &value); + } + else + { + // add gAMA chunk with gamma 1.0 + value.vt = VT_UI4; + value.uintVal = 100000; // gama value * 100,000 -- i.e. gamma 1.0 + (void)metawriter->SetMetadataByName(L"/gAMA/ImageGamma", &value); + + // remove sRGB chunk which is added by default. + (void)metawriter->RemoveMetadataByName(L"/sRGB/RenderingIntent"); + } + } + #if defined(_XBOX_ONE) && defined(_TITLE) + else if (memcmp(&guidContainerFormat, &GUID_ContainerFormatJpeg, sizeof(GUID)) == 0) + { + // Set Software name + (void)metawriter->SetMetadataByName(L"/app1/ifd/{ushort=305}", &value); + + if (sRGB) + { + // Set EXIF Colorspace of sRGB + value.vt = VT_UI2; + value.uiVal = 1; + (void)metawriter->SetMetadataByName(L"/app1/ifd/exif/{ushort=40961}", &value); + } + } + else if (memcmp(&guidContainerFormat, &GUID_ContainerFormatTiff, sizeof(GUID)) == 0) + { + // Set Software name + (void)metawriter->SetMetadataByName(L"/ifd/{ushort=305}", &value); + + if (sRGB) + { + // Set EXIF Colorspace of sRGB + value.vt = VT_UI2; + value.uiVal = 1; + (void)metawriter->SetMetadataByName(L"/ifd/exif/{ushort=40961}", &value); + } + } + #else + else + { + // Set Software name + (void)metawriter->SetMetadataByName(L"System.ApplicationName", &value); + + if (sRGB) + { + // Set EXIF Colorspace of sRGB + value.vt = VT_UI2; + value.uiVal = 1; + (void)metawriter->SetMetadataByName(L"System.Image.ColorSpace", &value); + } + } + #endif + } + + D3D11_MAPPED_SUBRESOURCE mapped; + hr = pContext->Map(pStaging.Get(), 0, D3D11_MAP_READ, 0, &mapped); + if (FAILED(hr)) + return hr; + + uint64_t imageSize = uint64_t(mapped.RowPitch) * uint64_t(desc.Height); + if (imageSize > UINT32_MAX) + { + pContext->Unmap(pStaging.Get(), 0); + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + } + + if (memcmp(&targetGuid, &pfGuid, sizeof(WICPixelFormatGUID)) != 0) + { + // Conversion required to write + ComPtr source; + hr = pWIC->CreateBitmapFromMemory(desc.Width, desc.Height, + pfGuid, + mapped.RowPitch, static_cast(imageSize), + static_cast(mapped.pData), source.GetAddressOf()); + if (FAILED(hr)) + { + pContext->Unmap(pStaging.Get(), 0); + return hr; + } + + ComPtr FC; + hr = pWIC->CreateFormatConverter(FC.GetAddressOf()); + if (FAILED(hr)) + { + pContext->Unmap(pStaging.Get(), 0); + return hr; + } + + BOOL canConvert = FALSE; + hr = FC->CanConvert(pfGuid, targetGuid, &canConvert); + if (FAILED(hr) || !canConvert) + { + pContext->Unmap(pStaging.Get(), 0); + return E_UNEXPECTED; + } + + hr = FC->Initialize(source.Get(), targetGuid, WICBitmapDitherTypeNone, nullptr, 0, WICBitmapPaletteTypeMedianCut); + if (FAILED(hr)) + { + pContext->Unmap(pStaging.Get(), 0); + return hr; + } + + WICRect rect = { 0, 0, static_cast(desc.Width), static_cast(desc.Height) }; + hr = frame->WriteSource(FC.Get(), &rect); + } + else + { + // No conversion required + hr = frame->WritePixels(desc.Height, + mapped.RowPitch, static_cast(imageSize), + static_cast(mapped.pData)); + } + + pContext->Unmap(pStaging.Get(), 0); + + if (FAILED(hr)) + return hr; + + hr = frame->Commit(); + if (FAILED(hr)) + return hr; + + hr = encoder->Commit(); + if (FAILED(hr)) + return hr; + + delonfail.clear(); + + return S_OK; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/AlphaTestEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/AlphaTestEffect.fx new file mode 100644 index 0000000..4388f5e --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/AlphaTestEffect.fx @@ -0,0 +1,129 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +Texture2D Texture : register(t0); +sampler Sampler : register(s0); + + +cbuffer Parameters : register(b0) +{ + float4 DiffuseColor : packoffset(c0); + float4 AlphaTest : packoffset(c1); + float3 FogColor : packoffset(c2); + float4 FogVector : packoffset(c3); + float4x4 WorldViewProj : packoffset(c4); +}; + + +#include "Structures.fxh" +#include "Common.fxh" + + +// Vertex shader: basic. +VSOutputTx VSAlphaTest(VSInputTx vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: no fog. +VSOutputTxNoFog VSAlphaTestNoFog(VSInputTx vin) +{ + VSOutputTxNoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: vertex color. +VSOutputTx VSAlphaTestVc(VSInputTxVc vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: vertex color, no fog. +VSOutputTxNoFog VSAlphaTestVcNoFog(VSInputTxVc vin) +{ + VSOutputTxNoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Pixel shader: less/greater compare function. +float4 PSAlphaTestLtGt(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + clip((color.a < AlphaTest.x) ? AlphaTest.z : AlphaTest.w); + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: less/greater compare function, no fog. +float4 PSAlphaTestLtGtNoFog(PSInputTxNoFog pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + clip((color.a < AlphaTest.x) ? AlphaTest.z : AlphaTest.w); + + return color; +} + + +// Pixel shader: equal/notequal compare function. +float4 PSAlphaTestEqNe(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + clip((abs(color.a - AlphaTest.x) < AlphaTest.y) ? AlphaTest.z : AlphaTest.w); + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: equal/notequal compare function, no fog. +float4 PSAlphaTestEqNeNoFog(PSInputTxNoFog pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + clip((abs(color.a - AlphaTest.x) < AlphaTest.y) ? AlphaTest.z : AlphaTest.w); + + return color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/BasicEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/BasicEffect.fx new file mode 100644 index 0000000..1a80f1b --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/BasicEffect.fx @@ -0,0 +1,607 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +Texture2D Texture : register(t0); +sampler Sampler : register(s0); + + +cbuffer Parameters : register(b0) +{ + float4 DiffuseColor : packoffset(c0); + float3 EmissiveColor : packoffset(c1); + float3 SpecularColor : packoffset(c2); + float SpecularPower : packoffset(c2.w); + + float3 LightDirection[3] : packoffset(c3); + float3 LightDiffuseColor[3] : packoffset(c6); + float3 LightSpecularColor[3] : packoffset(c9); + + float3 EyePosition : packoffset(c12); + + float3 FogColor : packoffset(c13); + float4 FogVector : packoffset(c14); + + float4x4 World : packoffset(c15); + float3x3 WorldInverseTranspose : packoffset(c19); + float4x4 WorldViewProj : packoffset(c22); +}; + + +#include "Structures.fxh" +#include "Common.fxh" +#include "Lighting.fxh" +#include "Utilities.fxh" + + +// Vertex shader: basic. +VSOutput VSBasic(VSInput vin) +{ + VSOutput vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + return vout; +} + + +// Vertex shader: no fog. +VSOutputNoFog VSBasicNoFog(VSInput vin) +{ + VSOutputNoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + return vout; +} + + +// Vertex shader: vertex color. +VSOutput VSBasicVc(VSInputVc vin) +{ + VSOutput vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: vertex color, no fog. +VSOutputNoFog VSBasicVcNoFog(VSInputVc vin) +{ + VSOutputNoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: texture. +VSOutputTx VSBasicTx(VSInputTx vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: texture, no fog. +VSOutputTxNoFog VSBasicTxNoFog(VSInputTx vin) +{ + VSOutputTxNoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: texture + vertex color. +VSOutputTx VSBasicTxVc(VSInputTxVc vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: texture + vertex color, no fog. +VSOutputTxNoFog VSBasicTxVcNoFog(VSInputTxVc vin) +{ + VSOutputTxNoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: vertex lighting. +VSOutput VSBasicVertexLighting(VSInputNm vin) +{ + VSOutput vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 3); + SetCommonVSOutputParams; + + return vout; +} + +VSOutput VSBasicVertexLightingBn(VSInputNm vin) +{ + VSOutput vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + return vout; +} + + +// Vertex shader: vertex lighting + vertex color. +VSOutput VSBasicVertexLightingVc(VSInputNmVc vin) +{ + VSOutput vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 3); + SetCommonVSOutputParams; + + vout.Diffuse *= vin.Color; + + return vout; +} + +VSOutput VSBasicVertexLightingVcBn(VSInputNmVc vin) +{ + VSOutput vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: vertex lighting + texture. +VSOutputTx VSBasicVertexLightingTx(VSInputNmTx vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSBasicVertexLightingTxBn(VSInputNmTx vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: vertex lighting + texture + vertex color. +VSOutputTx VSBasicVertexLightingTxVc(VSInputNmTxVc vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + +VSOutputTx VSBasicVertexLightingTxVcBn(VSInputNmTxVc vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: one light. +VSOutput VSBasicOneLight(VSInputNm vin) +{ + VSOutput vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 1); + SetCommonVSOutputParams; + + return vout; +} + +VSOutput VSBasicOneLightBn(VSInputNm vin) +{ + VSOutput vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + return vout; +} + + +// Vertex shader: one light + vertex color. +VSOutput VSBasicOneLightVc(VSInputNmVc vin) +{ + VSOutput vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 1); + SetCommonVSOutputParams; + + vout.Diffuse *= vin.Color; + + return vout; +} + +VSOutput VSBasicOneLightVcBn(VSInputNmVc vin) +{ + VSOutput vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: one light + texture. +VSOutputTx VSBasicOneLightTx(VSInputNmTx vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSBasicOneLightTxBn(VSInputNmTx vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: one light + texture + vertex color. +VSOutputTx VSBasicOneLightTxVc(VSInputNmTxVc vin) +{ + VSOutputTx vout; + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, vin.Normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + +VSOutputTx VSBasicOneLightTxVcBn(VSInputNmTxVc vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: pixel lighting. +VSOutputPixelLighting VSBasicPixelLighting(VSInputNm vin) +{ + VSOutputPixelLighting vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + + return vout; +} + +VSOutputPixelLighting VSBasicPixelLightingBn(VSInputNm vin) +{ + VSOutputPixelLighting vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + + return vout; +} + + +// Vertex shader: pixel lighting + vertex color. +VSOutputPixelLighting VSBasicPixelLightingVc(VSInputNmVc vin) +{ + VSOutputPixelLighting vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * DiffuseColor.a; + + return vout; +} + +VSOutputPixelLighting VSBasicPixelLightingVcBn(VSInputNmVc vin) +{ + VSOutputPixelLighting vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * DiffuseColor.a; + + return vout; +} + + +// Vertex shader: pixel lighting + texture. +VSOutputPixelLightingTx VSBasicPixelLightingTx(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSBasicPixelLightingTxBn(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pixel lighting + texture + vertex color. +VSOutputPixelLightingTx VSBasicPixelLightingTxVc(VSInputNmTxVc vin) +{ + VSOutputPixelLightingTx vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * DiffuseColor.a; + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSBasicPixelLightingTxVcBn(VSInputNmTxVc vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * DiffuseColor.a; + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Pixel shader: basic. +float4 PSBasic(PSInput pin) : SV_Target0 +{ + float4 color = pin.Diffuse; + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: no fog. +float4 PSBasicNoFog(PSInputNoFog pin) : SV_Target0 +{ + return pin.Diffuse; +} + + +// Pixel shader: texture. +float4 PSBasicTx(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: texture, no fog. +float4 PSBasicTxNoFog(PSInputTxNoFog pin) : SV_Target0 +{ + return Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; +} + + +// Pixel shader: vertex lighting. +float4 PSBasicVertexLighting(PSInput pin) : SV_Target0 +{ + float4 color = pin.Diffuse; + + AddSpecular(color, pin.Specular.rgb); + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: vertex lighting, no fog. +float4 PSBasicVertexLightingNoFog(PSInput pin) : SV_Target0 +{ + float4 color = pin.Diffuse; + + AddSpecular(color, pin.Specular.rgb); + + return color; +} + + +// Pixel shader: vertex lighting + texture. +float4 PSBasicVertexLightingTx(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + AddSpecular(color, pin.Specular.rgb); + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: vertex lighting + texture, no fog. +float4 PSBasicVertexLightingTxNoFog(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + AddSpecular(color, pin.Specular.rgb); + + return color; +} + + +// Pixel shader: pixel lighting. +float4 PSBasicPixelLighting(PSInputPixelLighting pin) : SV_Target0 +{ + float4 color = pin.Diffuse; + + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + float3 worldNormal = normalize(pin.NormalWS); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, 3); + + color.rgb *= lightResult.Diffuse; + + AddSpecular(color, lightResult.Specular); + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader: pixel lighting + texture. +float4 PSBasicPixelLightingTx(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + float3 worldNormal = normalize(pin.NormalWS); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, 3); + + color.rgb *= lightResult.Diffuse; + + AddSpecular(color, lightResult.Specular); + ApplyFog(color, pin.PositionWS.w); + + return color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/Common.fxh b/Sdk/External/DirectXTK/Src/Shaders/Common.fxh new file mode 100644 index 0000000..0e71229 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/Common.fxh @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +float ComputeFogFactor(float4 position) +{ + return saturate(dot(position, FogVector)); +} + + +void ApplyFog(inout float4 color, float fogFactor) +{ + color.rgb = lerp(color.rgb, FogColor * color.a, fogFactor); +} + + +void AddSpecular(inout float4 color, float3 specular) +{ + color.rgb += specular * color.a; +} + + +struct CommonVSOutput +{ + float4 Pos_ps; + float4 Diffuse; + float3 Specular; + float FogFactor; +}; + + +CommonVSOutput ComputeCommonVSOutput(float4 position) +{ + CommonVSOutput vout; + + vout.Pos_ps = mul(position, WorldViewProj); + vout.Diffuse = DiffuseColor; + vout.Specular = 0; + vout.FogFactor = ComputeFogFactor(position); + + return vout; +} + + +#define SetCommonVSOutputParams \ + vout.PositionPS = cout.Pos_ps; \ + vout.Diffuse = cout.Diffuse; \ + vout.Specular = float4(cout.Specular, cout.FogFactor); + + +#define SetCommonVSOutputParamsNoFog \ + vout.PositionPS = cout.Pos_ps; \ + vout.Diffuse = cout.Diffuse; diff --git a/Sdk/External/DirectXTK/Src/Shaders/CompileShaders.cmd b/Sdk/External/DirectXTK/Src/Shaders/CompileShaders.cmd new file mode 100644 index 0000000..c2f4be0 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/CompileShaders.cmd @@ -0,0 +1,310 @@ +@echo off +rem Copyright (c) Microsoft Corporation. All rights reserved. +rem Licensed under the MIT License. + +setlocal +set error=0 + +set FXCOPTS=/nologo /WX /Ges /Zi /Zpc /Qstrip_reflect /Qstrip_debug + +if %1.==xbox. goto continuexbox +if %1.==. goto continuepc +echo usage: CompileShaders [xbox] +exit /b + +:continuexbox +set XBOXOPTS=/D__XBOX_DISABLE_SHADER_NAME_EMPLACEMENT +if NOT %2.==noprecompile. goto skipnoprecompile +set XBOXOPTS=%XBOXOPTS% /D__XBOX_DISABLE_PRECOMPILE=1 +:skipnoprecompile + +set XBOXFXC="%XboxOneXDKLatest%\xdk\FXC\amd64\FXC.exe" +if exist %XBOXFXC% goto continue +set XBOXFXC="%XboxOneXDKLatest%xdk\FXC\amd64\FXC.exe" +if exist %XBOXFXC% goto continue +set XBOXFXC="%XboxOneXDKBuild%xdk\FXC\amd64\FXC.exe" +if exist %XBOXFXC% goto continue +set XBOXFXC="%DurangoXDK%xdk\FXC\amd64\FXC.exe" +if not exist %XBOXFXC% goto needxdk +goto continue + +:continuepc + +set PCFXC="%WindowsSdkVerBinPath%x86\fxc.exe" +if exist %PCFXC% goto continue +set PCFXC="%WindowsSdkBinPath%%WindowsSDKVersion%\x86\fxc.exe" +if exist %PCFXC% goto continue +set PCFXC="%WindowsSdkDir%bin\%WindowsSDKVersion%\x86\fxc.exe" +if exist %PCFXC% goto continue + +set PCFXC=fxc.exe + +:continue +@if not exist Compiled mkdir Compiled +call :CompileShader%1 AlphaTestEffect vs VSAlphaTest +call :CompileShader%1 AlphaTestEffect vs VSAlphaTestNoFog +call :CompileShader%1 AlphaTestEffect vs VSAlphaTestVc +call :CompileShader%1 AlphaTestEffect vs VSAlphaTestVcNoFog + +call :CompileShader%1 AlphaTestEffect ps PSAlphaTestLtGt +call :CompileShader%1 AlphaTestEffect ps PSAlphaTestLtGtNoFog +call :CompileShader%1 AlphaTestEffect ps PSAlphaTestEqNe +call :CompileShader%1 AlphaTestEffect ps PSAlphaTestEqNeNoFog + +call :CompileShader%1 BasicEffect vs VSBasic +call :CompileShader%1 BasicEffect vs VSBasicNoFog +call :CompileShader%1 BasicEffect vs VSBasicVc +call :CompileShader%1 BasicEffect vs VSBasicVcNoFog +call :CompileShader%1 BasicEffect vs VSBasicTx +call :CompileShader%1 BasicEffect vs VSBasicTxNoFog +call :CompileShader%1 BasicEffect vs VSBasicTxVc +call :CompileShader%1 BasicEffect vs VSBasicTxVcNoFog + +call :CompileShader%1 BasicEffect vs VSBasicVertexLighting +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingBn +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingVc +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingVcBn +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingTx +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingTxBn +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingTxVc +call :CompileShader%1 BasicEffect vs VSBasicVertexLightingTxVcBn + +call :CompileShader%1 BasicEffect vs VSBasicOneLight +call :CompileShader%1 BasicEffect vs VSBasicOneLightBn +call :CompileShader%1 BasicEffect vs VSBasicOneLightVc +call :CompileShader%1 BasicEffect vs VSBasicOneLightVcBn +call :CompileShader%1 BasicEffect vs VSBasicOneLightTx +call :CompileShader%1 BasicEffect vs VSBasicOneLightTxBn +call :CompileShader%1 BasicEffect vs VSBasicOneLightTxVc +call :CompileShader%1 BasicEffect vs VSBasicOneLightTxVcBn + +call :CompileShader%1 BasicEffect vs VSBasicPixelLighting +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingBn +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingVc +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingVcBn +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingTx +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingTxBn +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingTxVc +call :CompileShader%1 BasicEffect vs VSBasicPixelLightingTxVcBn + +call :CompileShader%1 BasicEffect ps PSBasic +call :CompileShader%1 BasicEffect ps PSBasicNoFog +call :CompileShader%1 BasicEffect ps PSBasicTx +call :CompileShader%1 BasicEffect ps PSBasicTxNoFog + +call :CompileShader%1 BasicEffect ps PSBasicVertexLighting +call :CompileShader%1 BasicEffect ps PSBasicVertexLightingNoFog +call :CompileShader%1 BasicEffect ps PSBasicVertexLightingTx +call :CompileShader%1 BasicEffect ps PSBasicVertexLightingTxNoFog + +call :CompileShader%1 BasicEffect ps PSBasicPixelLighting +call :CompileShader%1 BasicEffect ps PSBasicPixelLightingTx + +call :CompileShader%1 DualTextureEffect vs VSDualTexture +call :CompileShader%1 DualTextureEffect vs VSDualTextureNoFog +call :CompileShader%1 DualTextureEffect vs VSDualTextureVc +call :CompileShader%1 DualTextureEffect vs VSDualTextureVcNoFog + +call :CompileShader%1 DualTextureEffect ps PSDualTexture +call :CompileShader%1 DualTextureEffect ps PSDualTextureNoFog + +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMap +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapBn +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapFresnel +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapFresnelBn +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapOneLight +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapOneLightBn +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapOneLightFresnel +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapOneLightFresnelBn +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapPixelLighting +call :CompileShader%1 EnvironmentMapEffect vs VSEnvMapPixelLightingBn +call :CompileShaderSM4%1 EnvironmentMapEffect vs VSEnvMapPixelLightingSM4 +call :CompileShaderSM4%1 EnvironmentMapEffect vs VSEnvMapPixelLightingBnSM4 + +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMap +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapNoFog +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapSpecular +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapSpecularNoFog +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapPixelLighting +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapPixelLightingNoFog +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapPixelLightingFresnel +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapPixelLightingFresnelNoFog + +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapSpherePixelLighting +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapSpherePixelLightingNoFog +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapSpherePixelLightingFresnel +call :CompileShader%1 EnvironmentMapEffect ps PSEnvMapSpherePixelLightingFresnelNoFog + +call :CompileShaderSM4%1 EnvironmentMapEffect ps PSEnvMapDualParabolaPixelLighting +call :CompileShaderSM4%1 EnvironmentMapEffect ps PSEnvMapDualParabolaPixelLightingNoFog +call :CompileShaderSM4%1 EnvironmentMapEffect ps PSEnvMapDualParabolaPixelLightingFresnel +call :CompileShaderSM4%1 EnvironmentMapEffect ps PSEnvMapDualParabolaPixelLightingFresnelNoFog + +call :CompileShader%1 SkinnedEffect vs VSSkinnedVertexLightingOneBone +call :CompileShader%1 SkinnedEffect vs VSSkinnedVertexLightingOneBoneBn +call :CompileShader%1 SkinnedEffect vs VSSkinnedVertexLightingTwoBones +call :CompileShader%1 SkinnedEffect vs VSSkinnedVertexLightingTwoBonesBn +call :CompileShader%1 SkinnedEffect vs VSSkinnedVertexLightingFourBones +call :CompileShader%1 SkinnedEffect vs VSSkinnedVertexLightingFourBonesBn + +call :CompileShader%1 SkinnedEffect vs VSSkinnedOneLightOneBone +call :CompileShader%1 SkinnedEffect vs VSSkinnedOneLightOneBoneBn +call :CompileShader%1 SkinnedEffect vs VSSkinnedOneLightTwoBones +call :CompileShader%1 SkinnedEffect vs VSSkinnedOneLightTwoBonesBn +call :CompileShader%1 SkinnedEffect vs VSSkinnedOneLightFourBones +call :CompileShader%1 SkinnedEffect vs VSSkinnedOneLightFourBonesBn + +call :CompileShader%1 SkinnedEffect vs VSSkinnedPixelLightingOneBone +call :CompileShader%1 SkinnedEffect vs VSSkinnedPixelLightingOneBoneBn +call :CompileShader%1 SkinnedEffect vs VSSkinnedPixelLightingTwoBones +call :CompileShader%1 SkinnedEffect vs VSSkinnedPixelLightingTwoBonesBn +call :CompileShader%1 SkinnedEffect vs VSSkinnedPixelLightingFourBones +call :CompileShader%1 SkinnedEffect vs VSSkinnedPixelLightingFourBonesBn + +call :CompileShader%1 SkinnedEffect ps PSSkinnedVertexLighting +call :CompileShader%1 SkinnedEffect ps PSSkinnedVertexLightingNoFog +call :CompileShader%1 SkinnedEffect ps PSSkinnedPixelLighting + +call :CompileShaderSM4%1 NormalMapEffect vs VSNormalPixelLightingTx +call :CompileShaderSM4%1 NormalMapEffect vs VSNormalPixelLightingTxBn +call :CompileShaderSM4%1 NormalMapEffect vs VSNormalPixelLightingTxVc +call :CompileShaderSM4%1 NormalMapEffect vs VSNormalPixelLightingTxVcBn + +call :CompileShaderSM4%1 NormalMapEffect ps PSNormalPixelLightingTx +call :CompileShaderSM4%1 NormalMapEffect ps PSNormalPixelLightingTxNoFog +call :CompileShaderSM4%1 NormalMapEffect ps PSNormalPixelLightingTxNoSpec +call :CompileShaderSM4%1 NormalMapEffect ps PSNormalPixelLightingTxNoFogSpec + +call :CompileShaderSM4%1 PBREffect vs VSConstant +call :CompileShaderSM4%1 PBREffect vs VSConstantVelocity +call :CompileShaderSM4%1 PBREffect vs VSConstantBn +call :CompileShaderSM4%1 PBREffect vs VSConstantVelocityBn + +call :CompileShaderSM4%1 PBREffect ps PSConstant +call :CompileShaderSM4%1 PBREffect ps PSTextured +call :CompileShaderSM4%1 PBREffect ps PSTexturedEmissive +call :CompileShaderSM4%1 PBREffect ps PSTexturedVelocity +call :CompileShaderSM4%1 PBREffect ps PSTexturedEmissiveVelocity + +call :CompileShaderSM4%1 DebugEffect vs VSDebug +call :CompileShaderSM4%1 DebugEffect vs VSDebugBn +call :CompileShaderSM4%1 DebugEffect vs VSDebugVc +call :CompileShaderSM4%1 DebugEffect vs VSDebugVcBn + +call :CompileShaderSM4%1 DebugEffect ps PSHemiAmbient +call :CompileShaderSM4%1 DebugEffect ps PSRGBNormals +call :CompileShaderSM4%1 DebugEffect ps PSRGBTangents +call :CompileShaderSM4%1 DebugEffect ps PSRGBBiTangents + +call :CompileShader%1 SpriteEffect vs SpriteVertexShader +call :CompileShader%1 SpriteEffect ps SpritePixelShader + +call :CompileShader%1 DGSLEffect vs main +call :CompileShader%1 DGSLEffect vs mainVc +call :CompileShader%1 DGSLEffect vs main1Bones +call :CompileShader%1 DGSLEffect vs main1BonesVc +call :CompileShader%1 DGSLEffect vs main2Bones +call :CompileShader%1 DGSLEffect vs main2BonesVc +call :CompileShader%1 DGSLEffect vs main4Bones +call :CompileShader%1 DGSLEffect vs main4BonesVc + +call :CompileShaderHLSL%1 DGSLUnlit ps main +call :CompileShaderHLSL%1 DGSLLambert ps main +call :CompileShaderHLSL%1 DGSLPhong ps main + +call :CompileShaderHLSL%1 DGSLUnlit ps mainTk +call :CompileShaderHLSL%1 DGSLLambert ps mainTk +call :CompileShaderHLSL%1 DGSLPhong ps mainTk + +call :CompileShaderHLSL%1 DGSLUnlit ps mainTx +call :CompileShaderHLSL%1 DGSLLambert ps mainTx +call :CompileShaderHLSL%1 DGSLPhong ps mainTx + +call :CompileShaderHLSL%1 DGSLUnlit ps mainTxTk +call :CompileShaderHLSL%1 DGSLLambert ps mainTxTk +call :CompileShaderHLSL%1 DGSLPhong ps mainTxTk + +call :CompileShaderSM4%1 PostProcess vs VSQuad +call :CompileShaderSM4%1 PostProcess ps PSCopy +call :CompileShaderSM4%1 PostProcess ps PSMonochrome +call :CompileShaderSM4%1 PostProcess ps PSSepia +call :CompileShaderSM4%1 PostProcess ps PSDownScale2x2 +call :CompileShaderSM4%1 PostProcess ps PSDownScale4x4 +call :CompileShaderSM4%1 PostProcess ps PSGaussianBlur5x5 +call :CompileShaderSM4%1 PostProcess ps PSBloomExtract +call :CompileShaderSM4%1 PostProcess ps PSBloomBlur +call :CompileShaderSM4%1 PostProcess ps PSMerge +call :CompileShaderSM4%1 PostProcess ps PSBloomCombine + +call :CompileShaderSM4%1 ToneMap vs VSQuad +call :CompileShaderSM4%1 ToneMap ps PSCopy +call :CompileShaderSM4%1 ToneMap ps PSSaturate +call :CompileShaderSM4%1 ToneMap ps PSReinhard +call :CompileShaderSM4%1 ToneMap ps PSACESFilmic +call :CompileShaderSM4%1 ToneMap ps PS_SRGB +call :CompileShaderSM4%1 ToneMap ps PSSaturate_SRGB +call :CompileShaderSM4%1 ToneMap ps PSReinhard_SRGB +call :CompileShaderSM4%1 ToneMap ps PSACESFilmic_SRGB +call :CompileShaderSM4%1 ToneMap ps PSHDR10 + +if NOT %1.==xbox. goto skipxboxonly + +call :CompileShaderSM4xbox ToneMap ps PSHDR10_Saturate +call :CompileShaderSM4xbox ToneMap ps PSHDR10_Reinhard +call :CompileShaderSM4xbox ToneMap ps PSHDR10_ACESFilmic +call :CompileShaderSM4xbox ToneMap ps PSHDR10_Saturate_SRGB +call :CompileShaderSM4xbox ToneMap ps PSHDR10_Reinhard_SRGB +call :CompileShaderSM4xbox ToneMap ps PSHDR10_ACESFilmic_SRGB + +:skipxboxonly + +echo. + +if %error% == 0 ( + echo Shaders compiled ok +) else ( + echo There were shader compilation errors! +) + +endlocal +exit /b + +:CompileShader +set fxc=%PCFXC% %1.fx %FXCOPTS% /T%2_4_0_level_9_1 /E%3 /FhCompiled\%1_%3.inc /FdCompiled\%1_%3.pdb /Vn%1_%3 +echo. +echo %fxc% +%fxc% || set error=1 +exit /b + +:CompileShaderSM4 +set fxc=%PCFXC% %1.fx %FXCOPTS% /T%2_4_0 /E%3 /FhCompiled\%1_%3.inc /FdCompiled\%1_%3.pdb /Vn%1_%3 +echo. +echo %fxc% +%fxc% || set error=1 +exit /b + +:CompileShaderHLSL +set fxc=%PCFXC% %1.hlsl %FXCOPTS% /T%2_4_0_level_9_1 /E%3 /FhCompiled\%1_%3.inc /FdCompiled\%1_%3.pdb /Vn%1_%3 +echo. +echo %fxc% +%fxc% || set error=1 +exit /b + +:CompileShaderxbox +:CompileShaderSM4xbox +set fxc=%XBOXFXC% %1.fx %FXCOPTS% /T%2_5_0 %XBOXOPTS% /E%3 /FhCompiled\XboxOne%1_%3.inc /FdCompiled\XboxOne%1_%3.pdb /Vn%1_%3 +echo. +echo %fxc% +%fxc% || set error=1 +exit /b + +:CompileShaderHLSLxbox +set fxc=%XBOXFXC% %1.hlsl %FXCOPTS% /T%2_5_0 %XBOXOPTS% /E%3 /FhCompiled\XboxOne%1_%3.inc /FdCompiled\XboxOne%1_%3.pdb /Vn%1_%3 +echo. +echo %fxc% +%fxc% || set error=1 +exit /b + +:needxdk +echo ERROR: CompileShaders xbox requires the Microsoft Xbox One XDK +echo (try re-running from the XDK Command Prompt) diff --git a/Sdk/External/DirectXTK/Src/Shaders/DGSLEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/DGSLEffect.fx new file mode 100644 index 0000000..73c1f24 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/DGSLEffect.fx @@ -0,0 +1,290 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// +// Based on the Visual Studio 3D Starter Kit +// +// http://aka.ms/vs3dkit +// + +cbuffer MaterialVars : register (b0) +{ + float4 MaterialAmbient; + float4 MaterialDiffuse; + float4 MaterialSpecular; + float4 MaterialEmissive; + float MaterialSpecularPower; +}; + +cbuffer ObjectVars : register(b2) +{ + float4x4 LocalToWorld4x4; + float4x4 LocalToProjected4x4; + float4x4 WorldToLocal4x4; + float4x4 WorldToView4x4; + float4x4 UVTransform4x4; + float3 EyePosition; +}; + +cbuffer BoneVars : register(b4) +{ + float4x3 Bones[72]; +}; + +struct A2V +{ + float4 pos : SV_Position; + float3 normal : NORMAL0; + float4 tangent : TANGENT0; + float2 uv : TEXCOORD0; +}; + +struct A2V_Vc +{ + float4 pos : SV_Position; + float3 normal : NORMAL0; + float4 tangent : TANGENT0; + float4 color : COLOR0; + float2 uv : TEXCOORD0; +}; + +struct A2V_Weights +{ + float4 pos : SV_Position; + float3 normal : NORMAL0; + float4 tangent : TANGENT0; + float2 uv : TEXCOORD0; + uint4 boneIndices : BLENDINDICES0; + float4 blendWeights : BLENDWEIGHT0; +}; + +struct A2V_WeightsVc +{ + float4 pos : SV_Position; + float3 normal : NORMAL0; + float4 tangent : TANGENT0; + float4 color : COLOR0; + float2 uv : TEXCOORD0; + uint4 boneIndices : BLENDINDICES0; + float4 blendWeights : BLENDWEIGHT0; +}; + +struct V2P +{ + float4 pos : SV_POSITION; + float4 diffuse : COLOR; + float2 uv : TEXCOORD0; + float3 worldNorm : TEXCOORD1; + float3 worldPos : TEXCOORD2; + float3 toEye : TEXCOORD3; + float4 tangent : TEXCOORD4; + float3 normal : TEXCOORD5; +}; + + +// Skinning helper functions +void Skin(inout A2V_Weights vertex, uniform int boneCount) +{ + float4x3 skinning = 0; + + [unroll] + for (int i = 0; i < boneCount; i++) + { + skinning += Bones[ vertex.boneIndices[i] ] * vertex.blendWeights[ i ]; + } + + vertex.pos.xyz = mul(vertex.pos, skinning); + vertex.normal = mul(vertex.normal, (float3x3)skinning); + vertex.tangent.xyz = mul((float3)vertex.tangent, (float3x3)skinning); +} + +void SkinVc(inout A2V_WeightsVc vertex, uniform int boneCount) +{ + float4x3 skinning = 0; + + [unroll] + for (int i = 0; i < boneCount; i++) + { + skinning += Bones[vertex.boneIndices[i]] * vertex.blendWeights[i]; + } + + vertex.pos.xyz = mul(vertex.pos, skinning); + vertex.normal = mul(vertex.normal, (float3x3)skinning); + vertex.tangent.xyz = mul((float3)vertex.tangent, (float3x3)skinning); +} + + +// Vertex shader: no per-vertex-color, no skinning +V2P main(A2V vertex) +{ + V2P result; + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + + +// Vertex shader: per-vertex-color, no skinning +V2P mainVc(A2V_Vc vertex) +{ + V2P result; + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = vertex.color * MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + + +// Vertex shader: no per-vertex-color, 1-bone skinning +V2P main1Bones(A2V_Weights vertex) +{ + V2P result; + + Skin(vertex, 1); + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + +// Vertex shader: no per-vertex-color, 2-bone skinning +V2P main2Bones(A2V_Weights vertex) +{ + V2P result; + + Skin(vertex, 2); + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + +// Vertex shader: no per-vertex-color, 4-bone skinning +V2P main4Bones(A2V_Weights vertex) +{ + V2P result; + + Skin(vertex, 4); + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + + +// Vertex shader: per-vertex-color, 1-bone skinning +V2P main1BonesVc(A2V_WeightsVc vertex) +{ + V2P result; + + SkinVc(vertex, 1); + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = vertex.color * MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + +// Vertex shader: per-vertex-color, 2-bone skinning +V2P main2BonesVc(A2V_WeightsVc vertex) +{ + V2P result; + + SkinVc(vertex, 2); + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = vertex.color * MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} + +// Vertex shader: per-vertex-color, 4-bone skinning +V2P main4BonesVc(A2V_WeightsVc vertex) +{ + V2P result; + + SkinVc(vertex, 4); + + float3 wp = mul(vertex.pos, LocalToWorld4x4).xyz; + + // set output data + result.pos = mul(vertex.pos, LocalToProjected4x4); + result.diffuse = vertex.color * MaterialDiffuse; + result.uv = mul(float4(vertex.uv.x, vertex.uv.y, 0, 1), UVTransform4x4).xy; + result.worldNorm = mul(vertex.normal, (float3x3)LocalToWorld4x4); + result.worldPos = wp; + result.toEye = EyePosition - wp; + result.tangent = vertex.tangent; + result.normal = vertex.normal; + + return result; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/DGSLLambert.hlsl b/Sdk/External/DirectXTK/Src/Shaders/DGSLLambert.hlsl new file mode 100644 index 0000000..363fed2 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/DGSLLambert.hlsl @@ -0,0 +1,174 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// +// This file was generated by exporting HLSL from Visual Studio's default "Lambert" material, and then modified to handle both texture scenarios, multiple lights, and work with FL 9.x +// \Common7\IDE\Extensions\Microsoft\VsGraphics\Assets\Effects\Lambert.dgsl +// + +Texture2D Texture1 : register( t0 ); + +SamplerState TexSampler : register( s0 ); + +cbuffer MaterialVars : register (b0) +{ + float4 MaterialAmbient; + float4 MaterialDiffuse; + float4 MaterialSpecular; + float4 MaterialEmissive; + float MaterialSpecularPower; +}; + +cbuffer LightVars : register (b1) +{ + float4 AmbientLight; + float4 LightColor[4]; + float4 LightAttenuation[4]; + float3 LightDirection[4]; + float LightSpecularIntensity[4]; + uint IsPointLight[4]; + uint ActiveLights; +} + +cbuffer ObjectVars : register(b2) +{ + float4x4 LocalToWorld4x4; + float4x4 LocalToProjected4x4; + float4x4 WorldToLocal4x4; + float4x4 WorldToView4x4; + float4x4 UVTransform4x4; + float3 EyePosition; +}; + +cbuffer MiscVars : register(b3) +{ + float ViewportWidth; + float ViewportHeight; + float Time; +}; + +struct V2P +{ + float4 pos : SV_POSITION; + float4 diffuse : COLOR; + float2 uv : TEXCOORD0; + float3 worldNorm : TEXCOORD1; + float3 worldPos : TEXCOORD2; + float3 toEye : TEXCOORD3; + float4 tangent : TEXCOORD4; + float3 normal : TEXCOORD5; +}; + +struct P2F +{ + float4 fragment : SV_Target; +}; + +// +// lambert lighting function +// +float3 LambertLighting( + float3 lightNormal, + float3 surfaceNormal, + float3 lightColor, + float3 pixelColor + ) +{ + // compute amount of contribution per light + float diffuseAmount = saturate(dot(lightNormal, surfaceNormal)); + float3 diffuse = diffuseAmount * lightColor * pixelColor; + return diffuse; +} + +// +// combines a float3 RGB value with an alpha value into a float4 +// +float4 CombineRGBWithAlpha(float3 rgb, float a) +{ + return float4(rgb.r, rgb.g, rgb.b, a); +} + +P2F main(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + + float3 local3 = MaterialAmbient.rgb * AmbientLight.rgb; + [unroll] + for (int i = 0; i < 4; i++) + { + local3 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + } + + local3 = saturate(local3); + result.fragment = CombineRGBWithAlpha(local3, pixel.diffuse.a); + + return result; +} + +P2F mainTk(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + + float3 local3 = MaterialAmbient.rgb * AmbientLight.rgb; + [unroll] + for (int i = 0; i < 4; i++) + { + local3 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + } + + local3 = saturate(local3); + result.fragment = CombineRGBWithAlpha(local3, pixel.diffuse.a); + + if (result.fragment.a == 0.0f) discard; + + return result; +} + +P2F mainTx(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + + float3 local3 = MaterialAmbient.rgb * AmbientLight.rgb; + [unroll] + for (int i = 0; i < 4; i++) + { + local3 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + } + + local3 = saturate(local3); + float3 local4 = Texture1.Sample(TexSampler, pixel.uv).rgb * local3; + float local5 = Texture1.Sample(TexSampler, pixel.uv).a * pixel.diffuse.a; + result.fragment = CombineRGBWithAlpha(local4, local5); + + return result; +} + +P2F mainTxTk(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + + float3 local3 = MaterialAmbient.rgb * AmbientLight.rgb; + [unroll] + for (int i = 0; i < 4; i++) + { + local3 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + } + + local3 = saturate(local3); + float3 local4 = Texture1.Sample(TexSampler, pixel.uv).rgb * local3; + float local5 = Texture1.Sample(TexSampler, pixel.uv).a * pixel.diffuse.a; + result.fragment = CombineRGBWithAlpha(local4, local5); + + if (result.fragment.a == 0.0f) discard; + + return result; +} + diff --git a/Sdk/External/DirectXTK/Src/Shaders/DGSLPhong.hlsl b/Sdk/External/DirectXTK/Src/Shaders/DGSLPhong.hlsl new file mode 100644 index 0000000..c0010ea --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/DGSLPhong.hlsl @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// +// This file was generated by exporting HLSL from Visual Studio's default "Phong" material, and then modified to handle no texture scenarios, multiple lights, and work with FL 9.x +// \Common7\IDE\Extensions\Microsoft\VsGraphics\Assets\Effects\Phong.dgsl +// + +Texture2D Texture1 : register( t0 ); + +SamplerState TexSampler : register( s0 ); + +cbuffer MaterialVars : register (b0) +{ + float4 MaterialAmbient; + float4 MaterialDiffuse; + float4 MaterialSpecular; + float4 MaterialEmissive; + float MaterialSpecularPower; +}; + +cbuffer LightVars : register (b1) +{ + float4 AmbientLight; + float4 LightColor[4]; + float4 LightAttenuation[4]; + float3 LightDirection[4]; + float LightSpecularIntensity[4]; + uint IsPointLight[4]; + uint ActiveLights; +} + +cbuffer ObjectVars : register(b2) +{ + float4x4 LocalToWorld4x4; + float4x4 LocalToProjected4x4; + float4x4 WorldToLocal4x4; + float4x4 WorldToView4x4; + float4x4 UVTransform4x4; + float3 EyePosition; +}; + +cbuffer MiscVars : register(b3) +{ + float ViewportWidth; + float ViewportHeight; + float Time; +}; + +struct V2P +{ + float4 pos : SV_POSITION; + float4 diffuse : COLOR; + float2 uv : TEXCOORD0; + float3 worldNorm : TEXCOORD1; + float3 worldPos : TEXCOORD2; + float3 toEye : TEXCOORD3; + float4 tangent : TEXCOORD4; + float3 normal : TEXCOORD5; +}; + +struct P2F +{ + float4 fragment : SV_Target; +}; + +// +// lambert lighting function +// +float3 LambertLighting( + float3 lightNormal, + float3 surfaceNormal, + float3 lightColor, + float3 pixelColor + ) +{ + // compute amount of contribution per light + float diffuseAmount = saturate(dot(lightNormal, surfaceNormal)); + float3 diffuse = diffuseAmount * lightColor * pixelColor; + return diffuse; +} + +// +// specular contribution function +// +float3 SpecularContribution( + float3 toEye, + float3 lightNormal, + float3 surfaceNormal, + float3 materialSpecularColor, + float materialSpecularPower, + float lightSpecularIntensity, + float3 lightColor + ) +{ + // compute specular contribution + float3 vHalf = normalize(lightNormal + toEye); + float specularAmount = saturate(dot(surfaceNormal, vHalf)); + specularAmount = pow(specularAmount, max(materialSpecularPower,0.0001f)) * lightSpecularIntensity; + float3 specular = materialSpecularColor * lightColor * specularAmount; + + return specular; +} + +// +// combines a float3 RGB value with an alpha value into a float4 +// +float4 CombineRGBWithAlpha(float3 rgb, float a) +{ + return float4(rgb.r, rgb.g, rgb.b, a); +} + +P2F main(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + float3 toEyeVector = normalize(pixel.toEye); + + float3 local1 = MaterialAmbient.rgb * AmbientLight.rgb; + float3 local4 = 0; + [unroll] + for (int i = 0; i < 3; i++) + { + local1 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + local4 += SpecularContribution(toEyeVector, LightDirection[i], worldNormal, MaterialSpecular.rgb, MaterialSpecularPower, LightSpecularIntensity[i], LightColor[i].rgb); + } + + local1 = saturate(local1); + result.fragment = CombineRGBWithAlpha(local1 + local4, pixel.diffuse.a); + + return result; +} + +P2F mainTk(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + float3 toEyeVector = normalize(pixel.toEye); + + float3 local1 = MaterialAmbient.rgb * AmbientLight.rgb; + float3 local4 = 0; + [unroll] + for (int i = 0; i < 3; i++) + { + local1 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + local4 += SpecularContribution(toEyeVector, LightDirection[i], worldNormal, MaterialSpecular.rgb, MaterialSpecularPower, LightSpecularIntensity[i], LightColor[i].rgb); + } + + local1 = saturate(local1); + result.fragment = CombineRGBWithAlpha(local1 + local4, pixel.diffuse.a); + + if (result.fragment.a == 0.0f) discard; + + return result; +} + +P2F mainTx(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + float3 toEyeVector = normalize(pixel.toEye); + + float3 local1 = MaterialAmbient.rgb * AmbientLight.rgb; + float3 local4 = 0; + [unroll] + for (int i = 0; i < 3; i++) + { + local1 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + local4 += SpecularContribution(toEyeVector, LightDirection[i], worldNormal, MaterialSpecular.rgb, MaterialSpecularPower, LightSpecularIntensity[i], LightColor[i].rgb); + } + + local1 = saturate(local1); + float3 local5 = mad(local1, Texture1.Sample(TexSampler, pixel.uv).rgb, local4); + float local6 = Texture1.Sample(TexSampler, pixel.uv).a * pixel.diffuse.a; + result.fragment = CombineRGBWithAlpha(local5, local6); + + return result; +} + +P2F mainTxTk(V2P pixel) +{ + P2F result; + + float3 worldNormal = normalize(pixel.worldNorm); + float3 toEyeVector = normalize(pixel.toEye); + + float3 local1 = MaterialAmbient.rgb * AmbientLight.rgb; + float3 local4 = 0; + [unroll] + for (int i = 0; i < 3; i++) + { + local1 += LambertLighting(LightDirection[i], worldNormal, LightColor[i].rgb, pixel.diffuse.rgb); + local4 += SpecularContribution(toEyeVector, LightDirection[i], worldNormal, MaterialSpecular.rgb, MaterialSpecularPower, LightSpecularIntensity[i], LightColor[i].rgb); + } + + local1 = saturate(local1); + float3 local5 = mad(local1, Texture1.Sample(TexSampler, pixel.uv).rgb, local4); + float local6 = Texture1.Sample(TexSampler, pixel.uv).a * pixel.diffuse.a; + result.fragment = CombineRGBWithAlpha(local5, local6); + + if (result.fragment.a == 0.0f) discard; + + return result; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/DGSLUnlit.hlsl b/Sdk/External/DirectXTK/Src/Shaders/DGSLUnlit.hlsl new file mode 100644 index 0000000..eec1664 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/DGSLUnlit.hlsl @@ -0,0 +1,149 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// +// This file was generated by exporting HLSL from Visual Studio's default "Unlit" material, and then modified to handle both texture scenarios +// \Common7\IDE\Extensions\Microsoft\VsGraphics\Assets\Effects\Unlit.dgsl +// + +Texture2D Texture1 : register( t0 ); + +SamplerState TexSampler : register( s0 ); + +cbuffer MaterialVars : register (b0) +{ + float4 MaterialAmbient; + float4 MaterialDiffuse; + float4 MaterialSpecular; + float4 MaterialEmissive; + float MaterialSpecularPower; +}; + +cbuffer LightVars : register (b1) +{ + float4 AmbientLight; + float4 LightColor[4]; + float4 LightAttenuation[4]; + float3 LightDirection[4]; + float LightSpecularIntensity[4]; + uint IsPointLight[4]; + uint ActiveLights; +} + +cbuffer ObjectVars : register(b2) +{ + float4x4 LocalToWorld4x4; + float4x4 LocalToProjected4x4; + float4x4 WorldToLocal4x4; + float4x4 WorldToView4x4; + float4x4 UVTransform4x4; + float3 EyePosition; +}; + +cbuffer MiscVars : register(b3) +{ + float ViewportWidth; + float ViewportHeight; + float Time; +}; + +struct V2P +{ + float4 pos : SV_POSITION; + float4 diffuse : COLOR; + float2 uv : TEXCOORD0; + float3 worldNorm : TEXCOORD1; + float3 worldPos : TEXCOORD2; + float3 toEye : TEXCOORD3; + float4 tangent : TEXCOORD4; + float3 normal : TEXCOORD5; +}; + +struct P2F +{ + float4 fragment : SV_Target; +}; + +// +// combines a float3 RGB value with an alpha value into a float4 +// +float4 CombineRGBWithAlpha(float3 rgb, float a) +{ + return float4(rgb.r, rgb.g, rgb.b, a); +} + +P2F main(V2P pixel) +{ + P2F result; + + result.fragment = pixel.diffuse; + + return result; +} + +P2F mainTk(V2P pixel) +{ + P2F result; + + result.fragment = pixel.diffuse; + if (result.fragment.a == 0.0f) discard; + + return result; +} + +P2F mainTx(V2P pixel) +{ + P2F result; + + // we need to normalize incoming vectors + float3 surfaceNormal = normalize(pixel.normal); + float3 surfaceTangent = normalize(pixel.tangent.xyz); + float3 worldNormal = normalize(pixel.worldNorm); + float3 toEyeVector = normalize(pixel.toEye); + + // construct tangent matrix + float3x3 localToTangent = transpose(float3x3(surfaceTangent, cross(surfaceNormal, surfaceTangent) * pixel.tangent.w, surfaceNormal)); + float3x3 worldToTangent = mul((float3x3)WorldToLocal4x4, localToTangent); + + // transform some vectors into tangent space + float3 tangentLightDir = normalize(mul(LightDirection[0], worldToTangent)); + float3 tangentToEyeVec = normalize(mul(toEyeVector, worldToTangent)); + + // BEGIN GENERATED CODE + float3 local3 = pixel.diffuse.rgb * Texture1.Sample(TexSampler, pixel.uv).rgb; + float local4 = pixel.diffuse.a * Texture1.Sample(TexSampler, pixel.uv).a; + result.fragment = CombineRGBWithAlpha(local3, local4); + // END GENERATED CODE + + return result; +} + +P2F mainTxTk(V2P pixel) +{ + P2F result; + + // we need to normalize incoming vectors + float3 surfaceNormal = normalize(pixel.normal); + float3 surfaceTangent = normalize(pixel.tangent.xyz); + float3 worldNormal = normalize(pixel.worldNorm); + float3 toEyeVector = normalize(pixel.toEye); + + // construct tangent matrix + float3x3 localToTangent = transpose(float3x3(surfaceTangent, cross(surfaceNormal, surfaceTangent) * pixel.tangent.w, surfaceNormal)); + float3x3 worldToTangent = mul((float3x3)WorldToLocal4x4, localToTangent); + + // transform some vectors into tangent space + float3 tangentLightDir = normalize(mul(LightDirection[0], worldToTangent)); + float3 tangentToEyeVec = normalize(mul(toEyeVector, worldToTangent)); + + // BEGIN GENERATED CODE + float3 local3 = pixel.diffuse.rgb * Texture1.Sample(TexSampler, pixel.uv).rgb; + float local4 = pixel.diffuse.a * Texture1.Sample(TexSampler, pixel.uv).a; + result.fragment = CombineRGBWithAlpha(local3, local4); + // END GENERATED CODE + + if (result.fragment.a == 0.0f) discard; + + return result; +} + diff --git a/Sdk/External/DirectXTK/Src/Shaders/DebugEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/DebugEffect.fx new file mode 100644 index 0000000..cfc5191 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/DebugEffect.fx @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + + +cbuffer Parameters : register(b0) +{ + float3 AmbientDown : packoffset(c0); + float Alpha : packoffset(c0.w); + float3 AmbientRange : packoffset(c1); + + float4x4 World : packoffset(c2); + float3x3 WorldInverseTranspose : packoffset(c6); + float4x4 WorldViewProj : packoffset(c9); +}; + + +#include "Structures.fxh" +#include "Utilities.fxh" + + +// Vertex shader: basic +VSOutputPixelLightingTx VSDebug(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + vout.PositionPS = mul(vin.Position, WorldViewProj); + vout.PositionWS = float4(mul(vin.Position, World).xyz, 1); + vout.NormalWS = normalize(mul(vin.Normal, WorldInverseTranspose)); + vout.Diffuse = float4(1, 1, 1, Alpha); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSDebugBn(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + vout.PositionPS = mul(vin.Position, WorldViewProj); + vout.PositionWS = float4(mul(vin.Position, World).xyz, 1); + vout.NormalWS = normalize(mul(normal, WorldInverseTranspose)); + vout.Diffuse = float4(1, 1, 1, Alpha); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: vertex color. +VSOutputPixelLightingTx VSDebugVc(VSInputNmTxVc vin) +{ + VSOutputPixelLightingTx vout; + + vout.PositionPS = mul(vin.Position, WorldViewProj); + vout.PositionWS = float4(mul(vin.Position, World).xyz, 1); + vout.NormalWS = normalize(mul(vin.Normal, WorldInverseTranspose)); + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * Alpha; + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSDebugVcBn(VSInputNmTxVc vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + vout.PositionPS = mul(vin.Position, WorldViewProj); + vout.PositionWS = float4(mul(vin.Position, World).xyz, 1); + vout.NormalWS = normalize(mul(normal, WorldInverseTranspose)); + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * Alpha; + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Pixel shader: default +float3 CalcHemiAmbient(float3 normal, float3 color) +{ + float3 up = BiasD2(normal); + float3 ambient = AmbientDown + up.y * AmbientRange; + return ambient * color; +} + +float4 PSHemiAmbient(PSInputPixelLightingTx pin) : SV_Target0 +{ + float3 normal = normalize(pin.NormalWS); + + // Do lighting + float3 color = CalcHemiAmbient(normal, pin.Diffuse.rgb); + + return float4(color, pin.Diffuse.a); +} + + +// Pixel shader: RGB normals +float4 PSRGBNormals(PSInputPixelLightingTx pin) : SV_Target0 +{ + float3 normal = normalize(pin.NormalWS); + + float3 color = BiasD2(normal); + + return float4(color, pin.Diffuse.a); +} + +// Pixel shader: RGB tangents +float4 PSRGBTangents(PSInputPixelLightingTx pin) : SV_Target0 +{ + const float3x3 TBN = CalculateTBN(pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + float3 tangent = normalize(TBN[0]); + + float3 color = BiasD2(tangent); + + return float4(color, pin.Diffuse.a); +} + +// Pixel shader: RGB bi-tangents +float4 PSRGBBiTangents(PSInputPixelLightingTx pin) : SV_Target0 +{ + const float3x3 TBN = CalculateTBN(pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + float3 bitangent = normalize(TBN[1]); + + float3 color = BiasD2(bitangent); + + return float4(color, pin.Diffuse.a); +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/DualTextureEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/DualTextureEffect.fx new file mode 100644 index 0000000..22a4ca8 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/DualTextureEffect.fx @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +Texture2D Texture : register(t0); +Texture2D Texture2 : register(t1); + +sampler Sampler : register(s0); +sampler Sampler2 : register(s1); + + +cbuffer Parameters : register(b0) +{ + float4 DiffuseColor : packoffset(c0); + float3 FogColor : packoffset(c1); + float4 FogVector : packoffset(c2); + float4x4 WorldViewProj : packoffset(c3); +}; + + +#include "Structures.fxh" +#include "Common.fxh" + + +// Vertex shader: basic. +VSOutputTx2 VSDualTexture(VSInputTx2 vin) +{ + VSOutputTx2 vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.TexCoord2 = vin.TexCoord2; + + return vout; +} + + +// Vertex shader: no fog. +VSOutputTx2NoFog VSDualTextureNoFog(VSInputTx2 vin) +{ + VSOutputTx2NoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.TexCoord = vin.TexCoord; + vout.TexCoord2 = vin.TexCoord2; + + return vout; +} + + +// Vertex shader: vertex color. +VSOutputTx2 VSDualTextureVc(VSInputTx2Vc vin) +{ + VSOutputTx2 vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + vout.TexCoord2 = vin.TexCoord2; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Vertex shader: vertex color, no fog. +VSOutputTx2NoFog VSDualTextureVcNoFog(VSInputTx2Vc vin) +{ + VSOutputTx2NoFog vout; + + CommonVSOutput cout = ComputeCommonVSOutput(vin.Position); + SetCommonVSOutputParamsNoFog; + + vout.TexCoord = vin.TexCoord; + vout.TexCoord2 = vin.TexCoord2; + vout.Diffuse *= vin.Color; + + return vout; +} + + +// Pixel shader: basic. +float4 PSDualTexture(PSInputTx2 pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord); + float4 overlay = Texture2.Sample(Sampler2, pin.TexCoord2); + + color.rgb *= 2; + color *= overlay * pin.Diffuse; + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: no fog. +float4 PSDualTextureNoFog(PSInputTx2NoFog pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord); + float4 overlay = Texture2.Sample(Sampler2, pin.TexCoord2); + + color.rgb *= 2; + color *= overlay * pin.Diffuse; + + return color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/EnvironmentMapEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/EnvironmentMapEffect.fx new file mode 100644 index 0000000..a9e5231 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/EnvironmentMapEffect.fx @@ -0,0 +1,447 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +Texture2D Texture : register(t0); +TextureCube EnvironmentMap : register(t1); +Texture2D SphereMap : register(t1); +Texture2DArray DualParabolaMap : register(t1); + +sampler Sampler : register(s0); +sampler EnvMapSampler : register(s1); + + +cbuffer Parameters : register(b0) +{ + float3 EnvironmentMapSpecular : packoffset(c0); + float EnvironmentMapAmount : packoffset(c1.x); + float FresnelFactor : packoffset(c1.y); + + float4 DiffuseColor : packoffset(c2); + float3 EmissiveColor : packoffset(c3); + + float3 LightDirection[3] : packoffset(c4); + float3 LightDiffuseColor[3] : packoffset(c7); + + float3 EyePosition : packoffset(c10); + + float3 FogColor : packoffset(c11); + float4 FogVector : packoffset(c12); + + float4x4 World : packoffset(c13); + float3x3 WorldInverseTranspose : packoffset(c17); + float4x4 WorldViewProj : packoffset(c20); +}; + + +// We don't use these parameters, but Lighting.fxh won't compile without them. +#define SpecularPower 0 +#define SpecularColor 0 +#define LightSpecularColor float3(0, 0, 0) + + +#include "Structures.fxh" +#include "Common.fxh" +#include "Lighting.fxh" +#include "Utilities.fxh" + + +float ComputeFresnelFactor(float3 eyeVector, float3 worldNormal) +{ + float viewAngle = dot(eyeVector, worldNormal); + + return pow(max(1 - abs(viewAngle), 0), FresnelFactor) * EnvironmentMapAmount; +} + + +VSOutputTxEnvMap ComputeEnvMapVSOutput(VSInputNmTx vin, float3 normal, uniform bool useFresnel, uniform int numLights) +{ + VSOutputTxEnvMap vout; + + float4 pos_ws = mul(vin.Position, World); + float3 eyeVector = normalize(EyePosition - pos_ws.xyz); + float3 worldNormal = normalize(mul(normal, WorldInverseTranspose)); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, numLights); + + vout.PositionPS = mul(vin.Position, WorldViewProj); + vout.Diffuse = float4(lightResult.Diffuse, DiffuseColor.a); + + if (useFresnel) + vout.Specular.rgb = ComputeFresnelFactor(eyeVector, worldNormal); + else + vout.Specular.rgb = EnvironmentMapAmount; + + vout.Specular.a = ComputeFogFactor(vin.Position); + vout.TexCoord = vin.TexCoord; + vout.EnvCoord = reflect(-eyeVector, worldNormal); + + return vout; +} + + +// Cubic environment mapping +// Greene, "Environment Mapping and Other Applications of World Projections", IEEE Computer Graphics and Applications. 1986. +float4 ComputeEnvMapPSOutput(PSInputPixelLightingTx pin, uniform bool useFresnel) +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + float3 worldNormal = normalize(pin.NormalWS); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, 3); + + color.rgb *= lightResult.Diffuse; + + float3 envcoord = reflect(-eyeVector, worldNormal); + + float4 envmap = EnvironmentMap.Sample(EnvMapSampler, envcoord) * color.a; + + float3 amount; + if (useFresnel) + amount = ComputeFresnelFactor(eyeVector, worldNormal); + else + amount = EnvironmentMapAmount; + + color.rgb = lerp(color.rgb, envmap.rgb, amount.rgb); + color.rgb += EnvironmentMapSpecular * envmap.a; + + return color; +} + + +// Spherical environment mapping +// Blinn & Newell, "Texture and Reflection in Computer Generated Images", Communications of the ACM. 1976. +float4 ComputeEnvMapSpherePSOutput(PSInputPixelLightingTx pin, uniform bool useFresnel) +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + float3 worldNormal = normalize(pin.NormalWS); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, 3); + + color.rgb *= lightResult.Diffuse; + + float3 r = reflect(-eyeVector, worldNormal); + float m = 2.0 * sqrt(r.x*r.x + r.y*r.y + (r.z + 1.0)*(r.z + 1.0)); + float2 envcoord = float2(r.x / m + 0.5, r.y / m + 0.5); + + float4 envmap = SphereMap.Sample(EnvMapSampler, envcoord) * color.a; + + float3 amount; + if (useFresnel) + amount = ComputeFresnelFactor(eyeVector, worldNormal); + else + amount = EnvironmentMapAmount; + + color.rgb = lerp(color.rgb, envmap.rgb, amount.rgb); + color.rgb += EnvironmentMapSpecular * envmap.a; + + return color; +} + + +// Dual-parabola environment mapping +// Heidrich & Seidel, "View-independent Environment Maps", Eurographics Workshop on Graphics Hardware, 1998. +float4 ComputeEnvMapDualParabolaPSOutput(PSInputPixelLightingTx pin, uniform bool useFresnel) +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + float3 worldNormal = normalize(pin.NormalWS); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, 3); + + color.rgb *= lightResult.Diffuse; + + float3 r = reflect(-eyeVector, worldNormal); + float m = 2.0 * (1.0 + abs(r.z)); + float3 envcoord = float3(r.x / m + 0.5, r.y / m + 0.5, (r.z > 0) ? 0 : 1); + + float4 envmap = DualParabolaMap.Sample(EnvMapSampler, envcoord) * color.a; + + float3 amount; + if (useFresnel) + amount = ComputeFresnelFactor(eyeVector, worldNormal); + else + amount = EnvironmentMapAmount; + + color.rgb = lerp(color.rgb, envmap.rgb, amount.rgb); + color.rgb += EnvironmentMapSpecular * envmap.a; + + return color; +} + + +// Vertex shader: basic. +VSOutputTxEnvMap VSEnvMap(VSInputNmTx vin) +{ + return ComputeEnvMapVSOutput(vin, vin.Normal, false, 3); +} + +VSOutputTxEnvMap VSEnvMapBn(VSInputNmTx vin) +{ + float3 normal = BiasX2(vin.Normal); + + return ComputeEnvMapVSOutput(vin, normal, false, 3); +} + + +// Vertex shader: fresnel. +VSOutputTxEnvMap VSEnvMapFresnel(VSInputNmTx vin) +{ + return ComputeEnvMapVSOutput(vin, vin.Normal, true, 3); +} + +VSOutputTxEnvMap VSEnvMapFresnelBn(VSInputNmTx vin) +{ + float3 normal = BiasX2(vin.Normal); + + return ComputeEnvMapVSOutput(vin, normal, true, 3); +} + + +// Vertex shader: one light. +VSOutputTxEnvMap VSEnvMapOneLight(VSInputNmTx vin) +{ + return ComputeEnvMapVSOutput(vin, vin.Normal, false, 1); +} + +VSOutputTxEnvMap VSEnvMapOneLightBn(VSInputNmTx vin) +{ + float3 normal = BiasX2(vin.Normal); + + return ComputeEnvMapVSOutput(vin, normal, false, 1); +} + + +// Vertex shader: one light, fresnel. +VSOutputTxEnvMap VSEnvMapOneLightFresnel(VSInputNmTx vin) +{ + return ComputeEnvMapVSOutput(vin, vin.Normal, true, 1); +} + +VSOutputTxEnvMap VSEnvMapOneLightFresnelBn(VSInputNmTx vin) +{ + float3 normal = BiasX2(vin.Normal); + + return ComputeEnvMapVSOutput(vin, normal, true, 1); +} + + +// Vertex shader: pixel lighting. +VSOutputPixelLightingTx VSEnvMapPixelLighting(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSEnvMapPixelLightingSM4(VSInputNmTx vin) +{ + return VSEnvMapPixelLighting(vin); +} + +VSOutputPixelLightingTx VSEnvMapPixelLightingBn(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSEnvMapPixelLightingBnSM4(VSInputNmTx vin) +{ + return VSEnvMapPixelLightingBn(vin); +} + + +// Pixel shader (cube mapping): basic. +float4 PSEnvMap(PSInputTxEnvMap pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + float4 envmap = EnvironmentMap.Sample(EnvMapSampler, pin.EnvCoord) * color.a; + + color.rgb = lerp(color.rgb, envmap.rgb, pin.Specular.rgb); + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader (cube mapping): no fog. +float4 PSEnvMapNoFog(PSInputTxEnvMap pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + float4 envmap = EnvironmentMap.Sample(EnvMapSampler, pin.EnvCoord) * color.a; + + color.rgb = lerp(color.rgb, envmap.rgb, pin.Specular.rgb); + + return color; +} + + +// Pixel shader (cube mapping): specular. +float4 PSEnvMapSpecular(PSInputTxEnvMap pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + float4 envmap = EnvironmentMap.Sample(EnvMapSampler, pin.EnvCoord) * color.a; + + color.rgb = lerp(color.rgb, envmap.rgb, pin.Specular.rgb); + color.rgb += EnvironmentMapSpecular * envmap.a; + + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader (cube mapping): specular, no fog. +float4 PSEnvMapSpecularNoFog(PSInputTxEnvMap pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + float4 envmap = EnvironmentMap.Sample(EnvMapSampler, pin.EnvCoord) * color.a; + + color.rgb = lerp(color.rgb, envmap.rgb, pin.Specular.rgb); + color.rgb += EnvironmentMapSpecular * envmap.a; + + return color; +} + + +// Pixel shader (cube mapping): pixel lighting. +float4 PSEnvMapPixelLighting(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapPSOutput(pin, false); + + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader (cube mapping): pixel lighting + no fog. +float4 PSEnvMapPixelLightingNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapPSOutput(pin, false); + + return color; +} + + +// Pixel shader (cube mapping): pixel lighting + fresnel +float4 PSEnvMapPixelLightingFresnel(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapPSOutput(pin, true); + + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader (cube mapping): pixel lighting + fresnel + no fog. +float4 PSEnvMapPixelLightingFresnelNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapPSOutput(pin, true); + + return color; +} + + +// Pixel shader (sphere mapping): pixel lighting. +float4 PSEnvMapSpherePixelLighting(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapSpherePSOutput(pin, false); + + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader (sphere mapping): pixel lighting + no fog. +float4 PSEnvMapSpherePixelLightingNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapSpherePSOutput(pin, false); + + return color; +} + + +// Pixel shader (sphere mapping): pixel lighting + fresnel +float4 PSEnvMapSpherePixelLightingFresnel(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapSpherePSOutput(pin, true); + + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader (sphere mapping): pixel lighting + fresnel + no fog. +float4 PSEnvMapSpherePixelLightingFresnelNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapSpherePSOutput(pin, true); + + return color; +} + + +// Pixel shader (dual parabola mapping): pixel lighting. +float4 PSEnvMapDualParabolaPixelLighting(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapDualParabolaPSOutput(pin, false); + + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader (dual parabola mapping): pixel lighting + no fog. +float4 PSEnvMapDualParabolaPixelLightingNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapDualParabolaPSOutput(pin, false); + + return color; +} + + +// Pixel shader (dual parabola mapping): pixel lighting + fresnel +float4 PSEnvMapDualParabolaPixelLightingFresnel(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapDualParabolaPSOutput(pin, true); + + ApplyFog(color, pin.PositionWS.w); + + return color; +} + + +// Pixel shader (dual parabola mapping): pixel lighting + fresnel + no fog. +float4 PSEnvMapDualParabolaPixelLightingFresnelNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = ComputeEnvMapDualParabolaPSOutput(pin, true); + + return color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/Lighting.fxh b/Sdk/External/DirectXTK/Src/Shaders/Lighting.fxh new file mode 100644 index 0000000..a17187d --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/Lighting.fxh @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +struct ColorPair +{ + float3 Diffuse; + float3 Specular; +}; + + +ColorPair ComputeLights(float3 eyeVector, float3 worldNormal, uniform int numLights) +{ + float3x3 lightDirections = 0; + float3x3 lightDiffuse = 0; + float3x3 lightSpecular = 0; + float3x3 halfVectors = 0; + + [unroll] + for (int i = 0; i < numLights; i++) + { + lightDirections[i] = LightDirection[i]; + lightDiffuse[i] = LightDiffuseColor[i]; + lightSpecular[i] = LightSpecularColor[i]; + + halfVectors[i] = normalize(eyeVector - lightDirections[i]); + } + + float3 dotL = mul(-lightDirections, worldNormal); + float3 dotH = mul(halfVectors, worldNormal); + + float3 zeroL = step(0, dotL); + + float3 diffuse = zeroL * dotL; + float3 specular = pow(max(dotH, 0) * zeroL, SpecularPower) * dotL; + + ColorPair result; + + result.Diffuse = mul(diffuse, lightDiffuse) * DiffuseColor.rgb + EmissiveColor; + result.Specular = mul(specular, lightSpecular) * SpecularColor; + + return result; +} + + +CommonVSOutput ComputeCommonVSOutputWithLighting(float4 position, float3 normal, uniform int numLights) +{ + CommonVSOutput vout; + + float4 pos_ws = mul(position, World); + float3 eyeVector = normalize(EyePosition - pos_ws.xyz); + float3 worldNormal = normalize(mul(normal, WorldInverseTranspose)); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, numLights); + + vout.Pos_ps = mul(position, WorldViewProj); + vout.Diffuse = float4(lightResult.Diffuse, DiffuseColor.a); + vout.Specular = lightResult.Specular; + vout.FogFactor = ComputeFogFactor(position); + + return vout; +} + + +struct CommonVSOutputPixelLighting +{ + float4 Pos_ps; + float3 Pos_ws; + float3 Normal_ws; + float FogFactor; +}; + + +CommonVSOutputPixelLighting ComputeCommonVSOutputPixelLighting(float4 position, float3 normal) +{ + CommonVSOutputPixelLighting vout; + + vout.Pos_ps = mul(position, WorldViewProj); + vout.Pos_ws = mul(position, World).xyz; + vout.Normal_ws = normalize(mul(normal, WorldInverseTranspose)); + vout.FogFactor = ComputeFogFactor(position); + + return vout; +} + + +#define SetCommonVSOutputParamsPixelLighting \ + vout.PositionPS = cout.Pos_ps; \ + vout.PositionWS = float4(cout.Pos_ws, cout.FogFactor); \ + vout.NormalWS = cout.Normal_ws; + diff --git a/Sdk/External/DirectXTK/Src/Shaders/NormalMapEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/NormalMapEffect.fx new file mode 100644 index 0000000..f7a2d75 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/NormalMapEffect.fx @@ -0,0 +1,193 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + + +Texture2D Texture : register(t0); +Texture2D SpecularTexture : register(t1); +Texture2D NormalTexture : register(t2); + +sampler Sampler : register(s0); + +cbuffer Parameters : register(b0) +{ + float4 DiffuseColor : packoffset(c0); + float3 EmissiveColor : packoffset(c1); + float3 SpecularColor : packoffset(c2); + float SpecularPower : packoffset(c2.w); + + float3 LightDirection[3] : packoffset(c3); + float3 LightDiffuseColor[3] : packoffset(c6); + float3 LightSpecularColor[3] : packoffset(c9); + + float3 EyePosition : packoffset(c12); + + float3 FogColor : packoffset(c13); + float4 FogVector : packoffset(c14); + + float4x4 World : packoffset(c15); + float3x3 WorldInverseTranspose : packoffset(c19); + float4x4 WorldViewProj : packoffset(c22); +}; + + +#include "Structures.fxh" +#include "Common.fxh" +#include "Lighting.fxh" +#include "Utilities.fxh" + + +// Vertex shader: pixel lighting + texture. +VSOutputPixelLightingTx VSNormalPixelLightingTx(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSNormalPixelLightingTxBn(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pixel lighting + texture + vertex color. +VSOutputPixelLightingTx VSNormalPixelLightingTxVc(VSInputNmTxVc vin) +{ + VSOutputPixelLightingTx vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * DiffuseColor.a; + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSNormalPixelLightingTxVcBn(VSInputNmTxVc vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse.rgb = vin.Color.rgb; + vout.Diffuse.a = vin.Color.a * DiffuseColor.a; + vout.TexCoord = vin.TexCoord; + + return vout; +} + +// Pixel shader: pixel lighting + texture + no fog +float4 PSNormalPixelLightingTxNoFog(PSInputPixelLightingTx pin) : SV_Target0 +{ + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(Sampler, pin.TexCoord).xy); + float3 normal = PeturbNormal(localNormal, pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + + // Do lighting + ColorPair lightResult = ComputeLights(eyeVector, normal, 3); + + // Get color from albedo texture + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + color.rgb *= lightResult.Diffuse; + + // Apply specular, modulated by the intensity given in the specular map + float3 specIntensity = SpecularTexture.Sample(Sampler, pin.TexCoord); + AddSpecular(color, lightResult.Specular * specIntensity); + + return color; +} + +// Pixel shader: pixel lighting + texture +float4 PSNormalPixelLightingTx(PSInputPixelLightingTx pin) : SV_Target0 +{ + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(Sampler, pin.TexCoord).xy); + float3 normal = PeturbNormal(localNormal, pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + + // Do lighting + ColorPair lightResult = ComputeLights(eyeVector, normal, 3); + + // Get color from albedo texture + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + color.rgb *= lightResult.Diffuse; + + // Apply specular, modulated by the intensity given in the specular map + float3 specIntensity = SpecularTexture.Sample(Sampler, pin.TexCoord); + AddSpecular(color, lightResult.Specular * specIntensity); + + ApplyFog(color, pin.PositionWS.w); + return color; +} + + +// Pixel shader: pixel lighting + texture + no fog + no specular +float4 PSNormalPixelLightingTxNoFogSpec(PSInputPixelLightingTx pin) : SV_Target0 +{ + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(Sampler, pin.TexCoord).xy); + float3 normal = PeturbNormal(localNormal, pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + + // Do lighting + ColorPair lightResult = ComputeLights(eyeVector, normal, 3); + + // Get color from albedo texture + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + color.rgb *= lightResult.Diffuse; + + // Apply specular + AddSpecular(color, lightResult.Specular); + + return color; +} + +// Pixel shader: pixel lighting + texture + no specular +float4 PSNormalPixelLightingTxNoSpec(PSInputPixelLightingTx pin) : SV_Target0 +{ + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(Sampler, pin.TexCoord).xy); + float3 normal = PeturbNormal(localNormal, pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + + // Do lighting + ColorPair lightResult = ComputeLights(eyeVector, normal, 3); + + // Get color from albedo texture + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + color.rgb *= lightResult.Diffuse; + + // Apply specular + AddSpecular(color, lightResult.Specular); + + ApplyFog(color, pin.PositionWS.w); + return color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/PBRCommon.fxh b/Sdk/External/DirectXTK/Src/Shaders/PBRCommon.fxh new file mode 100644 index 0000000..d7aad5a --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/PBRCommon.fxh @@ -0,0 +1,170 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +struct CommonVSOutputPixelLighting +{ + float4 Pos_ps; + float3 Pos_ws; + float3 Normal_ws; +}; + +struct VSOut_Velocity +{ + VSOutputPixelLightingTx current; + float4 prevPosition : TEXCOORD4; +}; + +CommonVSOutputPixelLighting ComputeCommonVSOutputPixelLighting(float4 position, float3 normal) +{ + CommonVSOutputPixelLighting vout; + + vout.Pos_ps = mul(position, WorldViewProj); + vout.Pos_ws = mul(position, World).xyz; + vout.Normal_ws = normalize(mul(normal, WorldInverseTranspose)); + + return vout; +} + +static const float PI = 3.14159265f; +static const float EPSILON = 1e-6f; + +// Shlick's approximation of Fresnel +// https://en.wikipedia.org/wiki/Schlick%27s_approximation +float3 Fresnel_Shlick(in float3 f0, in float3 f90, in float x) +{ + return f0 + (f90 - f0) * pow(1.f - x, 5.f); +} + +// Burley B. "Physically Based Shading at Disney" +// SIGGRAPH 2012 Course: Practical Physically Based Shading in Film and Game Production, 2012. +float Diffuse_Burley(in float NdotL, in float NdotV, in float LdotH, in float roughness) +{ + float fd90 = 0.5f + 2.f * roughness * LdotH * LdotH; + return Fresnel_Shlick(1, fd90, NdotL).x * Fresnel_Shlick(1, fd90, NdotV).x; +} + +// GGX specular D (normal distribution) +// https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf +float Specular_D_GGX(in float alpha, in float NdotH) +{ + const float alpha2 = alpha * alpha; + const float lower = (NdotH * NdotH * (alpha2 - 1)) + 1; + return alpha2 / max(EPSILON, PI * lower * lower); +} + +// Schlick-Smith specular G (visibility) with Hable's LdotH optimization +// http://www.cs.virginia.edu/~jdl/bib/appearance/analytic%20models/schlick94b.pdf +// http://graphicrants.blogspot.se/2013/08/specular-brdf-reference.html +float G_Shlick_Smith_Hable(float alpha, float LdotH) +{ + return rcp(lerp(LdotH * LdotH, 1, alpha * alpha * 0.25f)); +} + +// A microfacet based BRDF. +// +// alpha: This is roughness * roughness as in the "Disney" PBR model by Burley et al. +// +// specularColor: The F0 reflectance value - 0.04 for non-metals, or RGB for metals. This follows model +// used by Unreal Engine 4. +// +// NdotV, NdotL, LdotH, NdotH: vector relationships between, +// N - surface normal +// V - eye normal +// L - light normal +// H - half vector between L & V. +float3 Specular_BRDF(in float alpha, in float3 specularColor, in float NdotV, in float NdotL, in float LdotH, in float NdotH) +{ + // Specular D (microfacet normal distribution) component + float specular_D = Specular_D_GGX(alpha, NdotH); + + // Specular Fresnel + float3 specular_F = Fresnel_Shlick(specularColor, 1, LdotH); + + // Specular G (visibility) component + float specular_G = G_Shlick_Smith_Hable(alpha, LdotH); + + return specular_D * specular_F * specular_G; +} + +// Diffuse irradiance +float3 Diffuse_IBL(in float3 N) +{ + return IrradianceTexture.Sample(IBLSampler, N); +} + +// Approximate specular image based lighting by sampling radiance map at lower mips +// according to roughness, then modulating by Fresnel term. +float3 Specular_IBL(in float3 N, in float3 V, in float lodBias) +{ + float mip = lodBias * NumRadianceMipLevels; + float3 dir = reflect(-V, N); + return RadianceTexture.SampleLevel(IBLSampler, dir, mip); +} + +// Apply Disney-style physically based rendering to a surface with: +// +// V, N: Eye and surface normals +// +// numLights: Number of directional lights. +// +// lightColor[]: Color and intensity of directional light. +// +// lightDirection[]: Light direction. +float3 LightSurface( + in float3 V, in float3 N, + in int numLights, in float3 lightColor[3], in float3 lightDirection[3], + in float3 albedo, in float roughness, in float metallic, in float ambientOcclusion) +{ + // Specular coefficiant - fixed reflectance value for non-metals + static const float kSpecularCoefficient = 0.04; + + const float NdotV = saturate(dot(N, V)); + + // Burley roughness bias + const float alpha = roughness * roughness; + + // Blend base colors + const float3 c_diff = lerp(albedo, float3(0, 0, 0), metallic) * ambientOcclusion; + const float3 c_spec = lerp(kSpecularCoefficient, albedo, metallic) * ambientOcclusion; + + // Output color + float3 acc_color = 0; + + // Accumulate light values + for (int i = 0; i < numLights; i++) + { + // light vector (to light) + const float3 L = normalize(-lightDirection[i]); + + // Half vector + const float3 H = normalize(L + V); + + // products + const float NdotL = saturate(dot(N, L)); + const float LdotH = saturate(dot(L, H)); + const float NdotH = saturate(dot(N, H)); + + // Diffuse & specular factors + float diffuse_factor = Diffuse_Burley(NdotL, NdotV, LdotH, roughness); + float3 specular = Specular_BRDF(alpha, c_spec, NdotV, NdotL, LdotH, NdotH); + + // Directional light + acc_color += NdotL * lightColor[i] * (((c_diff * diffuse_factor) + specular)); + } + + // Add diffuse irradiance + float3 diffuse_env = Diffuse_IBL(N); + acc_color += c_diff * diffuse_env; + + // Add specular radiance + float3 specular_env = Specular_IBL(N, V, roughness); + acc_color += c_spec * specular_env; + + return acc_color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/PBREffect.fx b/Sdk/External/DirectXTK/Src/Shaders/PBREffect.fx new file mode 100644 index 0000000..0f98b3f --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/PBREffect.fx @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + + +Texture2D AlbedoTexture : register(t0); +Texture2D NormalTexture : register(t1); +Texture2D RMATexture : register(t2); + +Texture2D EmissiveTexture : register(t3); + +TextureCube RadianceTexture : register(t4); +TextureCube IrradianceTexture : register(t5); + +sampler SurfaceSampler : register(s0); +sampler IBLSampler : register(s1); + +cbuffer Constants : register(b0) +{ + float3 EyePosition : packoffset(c0); + float4x4 World : packoffset(c1); + float3x3 WorldInverseTranspose : packoffset(c5); + float4x4 WorldViewProj : packoffset(c8); + float4x4 PrevWorldViewProj : packoffset(c12); + + float3 LightDirection[3] : packoffset(c16); + float3 LightColor[3] : packoffset(c19); // "Specular and diffuse light" in PBR + + float3 ConstantAlbedo : packoffset(c22); // Constant values if not a textured effect + float Alpha : packoffset(c22.w); + float ConstantMetallic : packoffset(c23.x); + float ConstantRoughness : packoffset(c23.y); + + int NumRadianceMipLevels : packoffset(c23.z); + + // Size of render target + float TargetWidth : packoffset(c23.w); + float TargetHeight : packoffset(c24.x); +}; + + +#include "Structures.fxh" +#include "PBRCommon.fxh" +#include "Utilities.fxh" + + +// Vertex shader: pbr +VSOutputPixelLightingTx VSConstant(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + + vout.PositionPS = cout.Pos_ps; + vout.PositionWS = float4(cout.Pos_ws, 1); + vout.NormalWS = cout.Normal_ws; + vout.Diffuse = float4(ConstantAlbedo, Alpha); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pbr + velocity +VSOut_Velocity VSConstantVelocity(VSInputNmTx vin) +{ + VSOut_Velocity vout; + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, vin.Normal); + + vout.current.PositionPS = cout.Pos_ps; + vout.current.PositionWS = float4(cout.Pos_ws, 1); + vout.current.NormalWS = cout.Normal_ws; + vout.current.Diffuse = float4(ConstantAlbedo, Alpha); + vout.current.TexCoord = vin.TexCoord; + vout.prevPosition = mul(vin.Position, PrevWorldViewProj); + + return vout; +} + + +// Vertex shader: pbr (biased normal) +VSOutputPixelLightingTx VSConstantBn(VSInputNmTx vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + + vout.PositionPS = cout.Pos_ps; + vout.PositionWS = float4(cout.Pos_ws, 1); + vout.NormalWS = cout.Normal_ws; + vout.Diffuse = float4(ConstantAlbedo, Alpha); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pbr + velocity (biased normal) +VSOut_Velocity VSConstantVelocityBn(VSInputNmTx vin) +{ + VSOut_Velocity vout; + + float3 normal = BiasX2(vin.Normal); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + + vout.current.PositionPS = cout.Pos_ps; + vout.current.PositionWS = float4(cout.Pos_ws, 1); + vout.current.NormalWS = cout.Normal_ws; + vout.current.Diffuse = float4(ConstantAlbedo, Alpha); + vout.current.TexCoord = vin.TexCoord; + + vout.prevPosition = mul(vin.Position, PrevWorldViewProj); + + return vout; +} + + +// Pixel shader: pbr (constants) + image-based lighting +float4 PSConstant(PSInputPixelLightingTx pin) : SV_Target0 +{ + // vectors + const float3 V = normalize(EyePosition - pin.PositionWS.xyz); // view vector + const float3 N = normalize(pin.NormalWS); // surface normal + const float AO = 1; // ambient term + + float3 color = LightSurface(V, N, 3, + LightColor, LightDirection, + ConstantAlbedo, ConstantRoughness, ConstantMetallic, AO); + + return float4(color, Alpha); +} + + +// Pixel shader: pbr (textures) + image-based lighting +float4 PSTextured(PSInputPixelLightingTx pin) : SV_Target0 +{ + const float3 V = normalize(EyePosition - pin.PositionWS.xyz); // view vector + const float3 L = normalize(-LightDirection[0]); // light vector ("to light" opposite of light's direction) + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(SurfaceSampler, pin.TexCoord).xy); + float3 N = PeturbNormal(localNormal, pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + + // Get albedo + float4 albedo = AlbedoTexture.Sample(SurfaceSampler, pin.TexCoord); + + // Get roughness, metalness, and ambient occlusion + float3 RMA = RMATexture.Sample(SurfaceSampler, pin.TexCoord); + + // glTF2 defines metalness as B channel, roughness as G channel, and occlusion as R channel + + // Shade surface + float3 color = LightSurface(V, N, 3, LightColor, LightDirection, albedo.rgb, RMA.g, RMA.b, RMA.r); + + return float4(color, albedo.w * Alpha); +} + + +// Pixel shader: pbr (textures) + emissive + image-based lighting +float4 PSTexturedEmissive(PSInputPixelLightingTx pin) : SV_Target0 +{ + const float3 V = normalize(EyePosition - pin.PositionWS.xyz); // view vector + const float3 L = normalize(-LightDirection[0]); // light vector ("to light" opposite of light's direction) + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(SurfaceSampler, pin.TexCoord).xy); + float3 N = PeturbNormal(localNormal, pin.PositionWS.xyz, pin.NormalWS, pin.TexCoord); + + // Get albedo + float4 albedo = AlbedoTexture.Sample(SurfaceSampler, pin.TexCoord); + + // Get roughness, metalness, and ambient occlusion + float3 RMA = RMATexture.Sample(SurfaceSampler, pin.TexCoord); + + // glTF2 defines metalness as B channel, roughness as G channel, and occlusion as R channel + + // Shade surface + float3 color = LightSurface(V, N, 3, LightColor, LightDirection, albedo.rgb, RMA.g, RMA.b, RMA.r); + + color += EmissiveTexture.Sample(SurfaceSampler, pin.TexCoord).rgb; + + return float4(color, albedo.w * Alpha); +} + + +// Pixel shader: pbr (textures) + image-based lighting + velocity +#include "PixelPacking_Velocity.hlsli" + +struct PSOut_Velocity +{ + float4 color : SV_Target0; + packed_velocity_t velocity : SV_Target1; +}; + +PSOut_Velocity PSTexturedVelocity(VSOut_Velocity pin) +{ + PSOut_Velocity output; + + const float3 V = normalize(EyePosition - pin.current.PositionWS.xyz); // view vector + const float3 L = normalize(-LightDirection[0]); // light vector ("to light" opposite of light's direction) + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(SurfaceSampler, pin.current.TexCoord).xy); + float3 N = PeturbNormal(localNormal, pin.current.PositionWS.xyz, pin.current.NormalWS, pin.current.TexCoord); + + // Get albedo + float4 albedo = AlbedoTexture.Sample(SurfaceSampler, pin.current.TexCoord); + + // Get roughness, metalness, and ambient occlusion + float3 RMA = RMATexture.Sample(SurfaceSampler, pin.current.TexCoord); + + // glTF2 defines metalness as B channel, roughness as G channel, and occlusion as R channel + + // Shade surface + float3 color = LightSurface(V, N, 3, LightColor, LightDirection, albedo.rgb, RMA.g, RMA.b, RMA.r); + + output.color = float4(color, albedo.w * Alpha); + + // Calculate velocity of this point + float4 prevPos = pin.prevPosition; + prevPos.xyz /= prevPos.w; + prevPos.xy *= float2(0.5f, -0.5f); + prevPos.xy += 0.5f; + prevPos.xy *= float2(TargetWidth, TargetHeight); + + output.velocity = PackVelocity(prevPos.xyz - pin.current.PositionPS.xyz); + + return output; +} + +PSOut_Velocity PSTexturedEmissiveVelocity(VSOut_Velocity pin) +{ + PSOut_Velocity output; + + const float3 V = normalize(EyePosition - pin.current.PositionWS.xyz); // view vector + const float3 L = normalize(-LightDirection[0]); // light vector ("to light" opposite of light's direction) + + // Before lighting, peturb the surface's normal by the one given in normal map. + float3 localNormal = TwoChannelNormalX2(NormalTexture.Sample(SurfaceSampler, pin.current.TexCoord).xy); + float3 N = PeturbNormal(localNormal, pin.current.PositionWS.xyz, pin.current.NormalWS, pin.current.TexCoord); + + // Get albedo + float4 albedo = AlbedoTexture.Sample(SurfaceSampler, pin.current.TexCoord); + + // Get roughness, metalness, and ambient occlusion + float3 RMA = RMATexture.Sample(SurfaceSampler, pin.current.TexCoord); + + // glTF2 defines metalness as B channel, roughness as G channel, and occlusion as R channel + + // Shade surface + float3 color = LightSurface(V, N, 3, LightColor, LightDirection, albedo.rgb, RMA.g, RMA.b, RMA.r); + + color += EmissiveTexture.Sample(SurfaceSampler, pin.current.TexCoord).rgb; + + output.color = float4(color, albedo.w * Alpha); + + // Calculate velocity of this point + float4 prevPos = pin.prevPosition; + prevPos.xyz /= prevPos.w; + prevPos.xy *= float2(0.5f, -0.5f); + prevPos.xy += 0.5f; + prevPos.xy *= float2(TargetWidth, TargetHeight); + + output.velocity = PackVelocity(prevPos.xyz - pin.current.PositionPS.xyz); + + return output; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/PixelPacking_Velocity.hlsli b/Sdk/External/DirectXTK/Src/Shaders/PixelPacking_Velocity.hlsli new file mode 100644 index 0000000..d28c020 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/PixelPacking_Velocity.hlsli @@ -0,0 +1,95 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard +// + +#ifndef __PIXEL_PACKING_VELOCITY_HLSLI__ +#define __PIXEL_PACKING_VELOCITY_HLSLI__ + +#if 1 +// This is a custom packing that devotes 10 bits each to X and Y velocity but 12 bits to Z velocity. Floats +// are used instead of SNORM to increase precision around small deltas, which are the majority of deltas. +// With TAA and Motion Blur, velocities are clamped, giving little reason to express them precisely in terms +// of the size of the screen. +#define packed_velocity_t uint + +// Designed to compress (-256.0, +256.0) with a signed 6e3 float +uint PackXY( float x ) +{ + uint signbit = asuint(x) >> 31; + x = clamp(abs(x / 32768.0), 0, asfloat(0x3BFFE000)); + return (f32tof16(x) + 8) >> 4 | signbit << 9; +} + +float UnpackXY( uint x ) +{ + return f16tof32((x & 0x1FF) << 4 | (x >> 9) << 15) * 32768.0; +} + +// Designed to compress (-1.0, 1.0) with a signed 8e3 float +uint PackZ( float x ) +{ + uint signbit = asuint(x) >> 31; + x = clamp(abs(x / 128.0), 0, asfloat(0x3BFFE000)); + return (f32tof16(x) + 2) >> 2 | signbit << 11; +} + +float UnpackZ( uint x ) +{ + return f16tof32((x & 0x7FF) << 2 | (x >> 11) << 15) * 128.0; +} + +// Pack the velocity to write to R10G10B10A2_UNORM +packed_velocity_t PackVelocity( float3 Velocity ) +{ + return PackXY(Velocity.x) | PackXY(Velocity.y) << 10 | PackZ(Velocity.z) << 20; +} + +// Unpack the velocity from R10G10B10A2_UNORM +float3 UnpackVelocity( packed_velocity_t Velocity ) +{ + return float3(UnpackXY(Velocity & 0x3FF), UnpackXY((Velocity >> 10) & 0x3FF), UnpackZ(Velocity >> 20)); +} + +#elif 1 +#define packed_velocity_t float4 + +// Pack the velocity to write to R10G10B10A2_UNORM +packed_velocity_t PackVelocity( float3 Velocity ) +{ + // Stretch dx,dy from [-64, 63.875] to [-512, 511] to [-0.5, 0.5) to [0, 1) + // Velocity.xy = (0,0) must be representable. + return float4(Velocity * float3(8, 8, 4096) / 1024.0 + 512 / 1023.0, 0); +} + +// Unpack the velocity from R10G10B10A2_UNORM +float3 UnpackVelocity( packed_velocity_t Velocity ) +{ + return (Velocity.xyz - 512.0 / 1023.0) * float3(1024, 1024, 2) / 8.0; +} +#else +#define packed_velocity_t float4 + +// Pack the velocity to write to R16G16B16A16_FLOAT +packed_velocity_t PackVelocity( float3 Velocity ) +{ + return float4(Velocity * float3(16, 16, 32*1024), 0); +} + +// Unpack the velocity from R10G10B10A2_UNORM +float3 UnpackVelocity( packed_velocity_t Velocity ) +{ + return Velocity.xyz / float3(16, 16, 32*1024); +} + +#endif + +#endif // __PIXEL_PACKING_HLSLI__ diff --git a/Sdk/External/DirectXTK/Src/Shaders/PostProcess.fx b/Sdk/External/DirectXTK/Src/Shaders/PostProcess.fx new file mode 100644 index 0000000..ee76824 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/PostProcess.fx @@ -0,0 +1,178 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +static const int MAX_SAMPLES = 16; + + +Texture2D Texture : register(t0); +sampler Sampler : register(s0); + + +cbuffer Parameters : register(b0) +{ + float4 sampleOffsets[MAX_SAMPLES]; + float4 sampleWeights[MAX_SAMPLES]; +}; + + +#include "Structures.fxh" + + +// Vertex shader: self-created quad. +VSInputTx VSQuad(uint vI : SV_VertexId) +{ + VSInputTx vout; + + // We use the 'big triangle' optimization so you only Draw 3 verticies instead of 4. + float2 texcoord = float2((vI << 1) & 2, vI & 2); + vout.TexCoord = texcoord; + + vout.Position = float4(texcoord.x * 2 - 1, -texcoord.y * 2 + 1, 0, 1); + return vout; +} + + +//-------------------------------------------------------------------------------------- +// Pixel shader: copy. +float4 PSCopy(VSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord); + return color; +} + + +// Pixel shader: monochrome. +float4 PSMonochrome(VSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord); + float3 grayscale = float3(0.2125f, 0.7154f, 0.0721f); + float3 output = dot(color.rgb, grayscale); + return float4(output, color.a); +} + + +// Pixel shader: sepia. +float4 PSSepia(VSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord); + + float3 red = float3(0.393f, 0.769f, 0.189f); + float3 green = float3(0.349f, 0.686f, 0.168f); + float3 blue = float3(0.272f, 0.534f, 0.131f); + + float3 output; + output.r = dot(color.rgb, red); + output.g = dot(color.rgb, green); + output.b = dot(color.rgb, blue); + return float4(output, color.a); +} + + +// Pixel shader: down-sample 2x2. +float4 PSDownScale2x2(VSInputTx pin) : SV_Target0 +{ + const int NUM_SAMPLES = 4; + float4 vColor = 0.0f; + + for( int i=0; i < NUM_SAMPLES; i++ ) + { + vColor += Texture.Sample(Sampler, pin.TexCoord + sampleOffsets[i].xy); + } + + return vColor / NUM_SAMPLES; +} + + +// Pixel shader: down-sample 4x4. +float4 PSDownScale4x4(VSInputTx pin) : SV_Target0 +{ + const int NUM_SAMPLES = 16; + float4 vColor = 0.0f; + + for (int i = 0; i < NUM_SAMPLES; i++) + { + vColor += Texture.Sample(Sampler, pin.TexCoord + sampleOffsets[i].xy); + } + + return vColor / NUM_SAMPLES; +} + + +// Pixel shader: gaussian blur 5x5. +float4 PSGaussianBlur5x5(VSInputTx pin) : SV_Target0 +{ + float4 vColor = 0.0f; + + for (int i = 0; i < 13; i++) + { + vColor += sampleWeights[i] * Texture.Sample(Sampler, pin.TexCoord + sampleOffsets[i].xy); + } + + return vColor; +} + + +// Pixel shader: bloom (extract) +float4 PSBloomExtract(VSInputTx pin) : SV_Target0 +{ + // Uses sampleWeights[0] as 'bloom threshold' + float4 c = Texture.Sample(Sampler, pin.TexCoord); + return saturate((c - sampleWeights[0]) / (1 - sampleWeights[0])); +} + + +// Pixel shader: bloom (blur) +float4 PSBloomBlur(VSInputTx pin) : SV_Target0 +{ + float4 vColor = 0.0f; + + // Perform a one-directional gaussian blur + for (int i = 0; i < 15; i++) + { + vColor += sampleWeights[i] * Texture.Sample(Sampler, pin.TexCoord + sampleOffsets[i].xy); + } + + return vColor; +} + + +//-------------------------------------------------------------------------------------- +Texture2D Texture2 : register(t1); + +// Pixel shader: merge +float4 PSMerge(VSInputTx pin) : SV_Target0 +{ + float4 vColor = sampleWeights[0] * Texture.Sample(Sampler, pin.TexCoord); + vColor += sampleWeights[1] * Texture2.Sample(Sampler, pin.TexCoord); + return vColor; +} + + +// Pixel shader: bloom (combine) +float4 AdjustSaturation(float4 color, float saturation) +{ + float3 grayscale = float3(0.2125f, 0.7154f, 0.0721f); + float gray = dot(color.rgb, grayscale); + return lerp(gray, color, saturation); +} + +float4 PSBloomCombine(VSInputTx pin) : SV_Target0 +{ + // Uses sampleWeights[0].x as base saturation, sampleWeights[0].y as bloom saturation + // Uses sampleWeights[1] as base intensity; sampleWeights[2] as bloom intensity + float4 base = Texture.Sample(Sampler, pin.TexCoord); + float4 bloom = Texture2.Sample(Sampler, pin.TexCoord); + + // Adjust color saturation and intensity. + base = AdjustSaturation(base, sampleWeights[0].x) * sampleWeights[1]; + bloom = AdjustSaturation(bloom, sampleWeights[0].y) * sampleWeights[2]; + + // Darken down the base image in areas where there is a lot of bloom, + // to prevent things looking excessively burned-out. + base *= (1 - saturate(bloom)); + + // Combine the two images. + return base + bloom; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/SkinnedEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/SkinnedEffect.fx new file mode 100644 index 0000000..2b482e1 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/SkinnedEffect.fx @@ -0,0 +1,389 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +Texture2D Texture : register(t0); +sampler Sampler : register(s0); + + +cbuffer Parameters : register(b0) +{ + float4 DiffuseColor : packoffset(c0); + float3 EmissiveColor : packoffset(c1); + float3 SpecularColor : packoffset(c2); + float SpecularPower : packoffset(c2.w); + + float3 LightDirection[3] : packoffset(c3); + float3 LightDiffuseColor[3] : packoffset(c6); + float3 LightSpecularColor[3] : packoffset(c9); + + float3 EyePosition : packoffset(c12); + + float3 FogColor : packoffset(c13); + float4 FogVector : packoffset(c14); + + float4x4 World : packoffset(c15); + float3x3 WorldInverseTranspose : packoffset(c19); + float4x4 WorldViewProj : packoffset(c22); + + float4x3 Bones[72] : packoffset(c26); +}; + + +#include "Structures.fxh" +#include "Common.fxh" +#include "Lighting.fxh" +#include "Utilities.fxh" + + +float3 Skin(inout VSInputNmTxWeights vin, float3 normal, uniform int boneCount) +{ + float4x3 skinning = 0; + + [unroll] + for (int i = 0; i < boneCount; i++) + { + skinning += Bones[vin.Indices[i]] * vin.Weights[i]; + } + + vin.Position.xyz = mul(vin.Position, skinning); + return mul(normal, (float3x3)skinning); +} + + +// Vertex shader: vertex lighting, one bone. +VSOutputTx VSSkinnedVertexLightingOneBone(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = Skin(vin, vin.Normal, 1); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSSkinnedVertexLightingOneBoneBn(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 1); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: vertex lighting, two bones. +VSOutputTx VSSkinnedVertexLightingTwoBones(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = Skin(vin, vin.Normal, 2); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSSkinnedVertexLightingTwoBonesBn(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 2); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: vertex lighting, four bones. +VSOutputTx VSSkinnedVertexLightingFourBones(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = Skin(vin, vin.Normal, 4); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSSkinnedVertexLightingFourBonesBn(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 4); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 3); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: one light, one bone. +VSOutputTx VSSkinnedOneLightOneBone(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = Skin(vin, vin.Normal, 1); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSSkinnedOneLightOneBoneBn(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 1); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: one light, two bones. +VSOutputTx VSSkinnedOneLightTwoBones(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = Skin(vin, vin.Normal, 2); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSSkinnedOneLightTwoBonesBn(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 2); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +// Vertex shader: one light, four bones. +VSOutputTx VSSkinnedOneLightFourBones(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = Skin(vin, vin.Normal, 4); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputTx VSSkinnedOneLightFourBonesBn(VSInputNmTxWeights vin) +{ + VSOutputTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 4); + + CommonVSOutput cout = ComputeCommonVSOutputWithLighting(vin.Position, normal, 1); + SetCommonVSOutputParams; + + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pixel lighting, one bone. +VSOutputPixelLightingTx VSSkinnedPixelLightingOneBone(VSInputNmTxWeights vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = Skin(vin, vin.Normal, 1); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSSkinnedPixelLightingOneBoneBn(VSInputNmTxWeights vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 1); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pixel lighting, two bones. +VSOutputPixelLightingTx VSSkinnedPixelLightingTwoBones(VSInputNmTxWeights vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = Skin(vin, vin.Normal, 2); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSSkinnedPixelLightingTwoBonesBn(VSInputNmTxWeights vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 2); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Vertex shader: pixel lighting, four bones. +VSOutputPixelLightingTx VSSkinnedPixelLightingFourBones(VSInputNmTxWeights vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = Skin(vin, vin.Normal, 4); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + +VSOutputPixelLightingTx VSSkinnedPixelLightingFourBonesBn(VSInputNmTxWeights vin) +{ + VSOutputPixelLightingTx vout; + + float3 normal = BiasX2(vin.Normal); + + normal = Skin(vin, normal, 4); + + CommonVSOutputPixelLighting cout = ComputeCommonVSOutputPixelLighting(vin.Position, normal); + SetCommonVSOutputParamsPixelLighting; + + vout.Diffuse = float4(1, 1, 1, DiffuseColor.a); + vout.TexCoord = vin.TexCoord; + + return vout; +} + + +// Pixel shader: vertex lighting. +float4 PSSkinnedVertexLighting(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + AddSpecular(color, pin.Specular.rgb); + ApplyFog(color, pin.Specular.w); + + return color; +} + + +// Pixel shader: vertex lighting, no fog. +float4 PSSkinnedVertexLightingNoFog(PSInputTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + AddSpecular(color, pin.Specular.rgb); + + return color; +} + + +// Pixel shader: pixel lighting. +float4 PSSkinnedPixelLighting(PSInputPixelLightingTx pin) : SV_Target0 +{ + float4 color = Texture.Sample(Sampler, pin.TexCoord) * pin.Diffuse; + + float3 eyeVector = normalize(EyePosition - pin.PositionWS.xyz); + float3 worldNormal = normalize(pin.NormalWS); + + ColorPair lightResult = ComputeLights(eyeVector, worldNormal, 3); + + color.rgb *= lightResult.Diffuse; + + AddSpecular(color, lightResult.Specular); + ApplyFog(color, pin.PositionWS.w); + + return color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/SpriteEffect.fx b/Sdk/External/DirectXTK/Src/Shaders/SpriteEffect.fx new file mode 100644 index 0000000..f885771 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/SpriteEffect.fx @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +Texture2D Texture : register(t0); +sampler TextureSampler : register(s0); + + +cbuffer Parameters : register(b0) +{ + row_major float4x4 MatrixTransform; +}; + + +void SpriteVertexShader(inout float4 color : COLOR0, + inout float2 texCoord : TEXCOORD0, + inout float4 position : SV_Position) +{ + position = mul(position, MatrixTransform); +} + + +float4 SpritePixelShader(float4 color : COLOR0, + float2 texCoord : TEXCOORD0) : SV_Target0 +{ + return Texture.Sample(TextureSampler, texCoord) * color; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/Structures.fxh b/Sdk/External/DirectXTK/Src/Shaders/Structures.fxh new file mode 100644 index 0000000..34e5e72 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/Structures.fxh @@ -0,0 +1,227 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +// http://create.msdn.com/en-US/education/catalog/sample/stock_effects + + +// Vertex shader input structures. + +struct VSInput +{ + float4 Position : SV_Position; +}; + +struct VSInputVc +{ + float4 Position : SV_Position; + float4 Color : COLOR; +}; + +struct VSInputTx +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; +}; + +struct VSInputTxVc +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; + float4 Color : COLOR; +}; + +struct VSInputNm +{ + float4 Position : SV_Position; + float3 Normal : NORMAL; +}; + +struct VSInputNmVc +{ + float4 Position : SV_Position; + float3 Normal : NORMAL; + float4 Color : COLOR; +}; + +struct VSInputNmTx +{ + float4 Position : SV_Position; + float3 Normal : NORMAL; + float2 TexCoord : TEXCOORD0; +}; + +struct VSInputNmTxVc +{ + float4 Position : SV_Position; + float3 Normal : NORMAL; + float2 TexCoord : TEXCOORD0; + float4 Color : COLOR; +}; + +struct VSInputTx2 +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; + float2 TexCoord2 : TEXCOORD1; +}; + +struct VSInputTx2Vc +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; + float2 TexCoord2 : TEXCOORD1; + float4 Color : COLOR; +}; + +struct VSInputNmTxWeights +{ + float4 Position : SV_Position; + float3 Normal : NORMAL; + float2 TexCoord : TEXCOORD0; + uint4 Indices : BLENDINDICES0; + float4 Weights : BLENDWEIGHT0; +}; + + + +// Vertex shader output structures. + +struct VSOutput +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float4 PositionPS : SV_Position; +}; + +struct VSOutputNoFog +{ + float4 Diffuse : COLOR0; + float4 PositionPS : SV_Position; +}; + +struct VSOutputTx +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float2 TexCoord : TEXCOORD0; + float4 PositionPS : SV_Position; +}; + +struct VSOutputTxNoFog +{ + float4 Diffuse : COLOR0; + float2 TexCoord : TEXCOORD0; + float4 PositionPS : SV_Position; +}; + +struct VSOutputPixelLighting +{ + float4 PositionWS : TEXCOORD0; + float3 NormalWS : TEXCOORD1; + float4 Diffuse : COLOR0; + float4 PositionPS : SV_Position; +}; + +struct VSOutputPixelLightingTx +{ + float2 TexCoord : TEXCOORD0; + float4 PositionWS : TEXCOORD1; + float3 NormalWS : TEXCOORD2; + float4 Diffuse : COLOR0; + float4 PositionPS : SV_Position; +}; + +struct VSOutputTx2 +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float2 TexCoord : TEXCOORD0; + float2 TexCoord2 : TEXCOORD1; + float4 PositionPS : SV_Position; +}; + +struct VSOutputTx2NoFog +{ + float4 Diffuse : COLOR0; + float2 TexCoord : TEXCOORD0; + float2 TexCoord2 : TEXCOORD1; + float4 PositionPS : SV_Position; +}; + +struct VSOutputTxEnvMap +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float2 TexCoord : TEXCOORD0; + float3 EnvCoord : TEXCOORD1; + float4 PositionPS : SV_Position; +}; + + + +// Pixel shader input structures. + +struct PSInput +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; +}; + +struct PSInputNoFog +{ + float4 Diffuse : COLOR0; +}; + +struct PSInputTx +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float2 TexCoord : TEXCOORD0; +}; + +struct PSInputTxNoFog +{ + float4 Diffuse : COLOR0; + float2 TexCoord : TEXCOORD0; +}; + +struct PSInputPixelLighting +{ + float4 PositionWS : TEXCOORD0; + float3 NormalWS : TEXCOORD1; + float4 Diffuse : COLOR0; +}; + +struct PSInputPixelLightingTx +{ + float2 TexCoord : TEXCOORD0; + float4 PositionWS : TEXCOORD1; + float3 NormalWS : TEXCOORD2; + float4 Diffuse : COLOR0; +}; + +struct PSInputTx2 +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float2 TexCoord : TEXCOORD0; + float2 TexCoord2 : TEXCOORD1; +}; + +struct PSInputTx2NoFog +{ + float4 Diffuse : COLOR0; + float2 TexCoord : TEXCOORD0; + float2 TexCoord2 : TEXCOORD1; +}; + +struct PSInputTxEnvMap +{ + float4 Diffuse : COLOR0; + float4 Specular : COLOR1; + float2 TexCoord : TEXCOORD0; + float3 EnvCoord : TEXCOORD1; +}; diff --git a/Sdk/External/DirectXTK/Src/Shaders/ToneMap.fx b/Sdk/External/DirectXTK/Src/Shaders/ToneMap.fx new file mode 100644 index 0000000..e055250 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/ToneMap.fx @@ -0,0 +1,228 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 + +Texture2D HDRTexture : register(t0); +sampler Sampler : register(s0); + + +cbuffer Parameters : register(b0) +{ + float linearExposure : packoffset(c0.x); + float paperWhiteNits : packoffset(c0.y); +}; + + + +#include "Structures.fxh" +#include "Utilities.fxh" + + +// Vertex shader: self-created quad. +VSInputTx VSQuad(uint vI : SV_VertexId) +{ + VSInputTx vout; + + // We use the 'big triangle' optimization so you only Draw 3 verticies instead of 4. + float2 texcoord = float2((vI << 1) & 2, vI & 2); + vout.TexCoord = texcoord; + + vout.Position = float4(texcoord.x * 2 - 1, -texcoord.y * 2 + 1, 0, 1); + return vout; +} + + +//-------------------------------------------------------------------------------------- +// Pixel shader: pass-through +float4 PSCopy(VSInputTx pin) : SV_Target0 +{ + return HDRTexture.Sample(Sampler, pin.TexCoord); +} + + +// Pixel shader: saturate (clips above 1.0) +float4 PSSaturate(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 sdr = saturate(hdr.xyz * linearExposure); + return float4(sdr, hdr.a); +} + + +// Pixel shader: reinhard operator +float4 PSReinhard(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 sdr = ToneMapReinhard(hdr.xyz * linearExposure); + return float4(sdr, hdr.a); +} + + +// Pixel shader: ACES filmic operator +float4 PSACESFilmic(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 sdr = ToneMapACESFilmic(hdr.xyz * linearExposure); + return float4(sdr, hdr.a); +} + + +//-------------------------------------------------------------------------------------- +// SRGB, using Rec.709 color primaries and a gamma 2.2 curve + +// Pixel shader: sRGB +float4 PS_SRGB(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 srgb = LinearToSRGBEst(hdr.xyz); + return float4(srgb, hdr.a); +} + + +// Pixel shader: saturate (clips above 1.0) +float4 PSSaturate_SRGB(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 sdr = saturate(hdr.xyz * linearExposure); + float3 srgb = LinearToSRGBEst(sdr); + return float4(srgb, hdr.a); +} + + +// Pixel shader: reinhard operator +float4 PSReinhard_SRGB(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 sdr = ToneMapReinhard(hdr.xyz * linearExposure); + float3 srgb = LinearToSRGBEst(sdr); + return float4(srgb, hdr.a); +} + + +// Pixel shader: ACES filmic operator +float4 PSACESFilmic_SRGB(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 sdr = ToneMapACESFilmic(hdr.xyz * linearExposure); + float3 srgb = LinearToSRGBEst(sdr); + return float4(srgb, hdr.a); +} + + +//-------------------------------------------------------------------------------------- +// HDR10, using Rec.2020 color primaries and ST.2084 curve + +float3 HDR10(float3 color) +{ + // Rotate from Rec.709 to Rec.2020 primaries + float3 rgb = mul(from709to2020, color); + + // ST.2084 spec defines max nits as 10,000 nits + float3 normalized = rgb * paperWhiteNits / 10000.f; + + // Apply ST.2084 curve + return LinearToST2084(normalized); +} + +float4 PSHDR10(VSInputTx pin) : SV_Target0 +{ + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + return float4(rgb, hdr.a); +} + + +//-------------------------------------------------------------------------------------- +struct MRTOut +{ + float4 hdr : SV_Target0; + float4 sdr : SV_Target1; +}; + +MRTOut PSHDR10_Saturate(VSInputTx pin) +{ + MRTOut output; + + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + output.hdr = float4(rgb, hdr.a); + + float3 sdr = saturate(hdr.xyz * linearExposure); + output.sdr = float4(sdr, hdr.a); + + return output; +} + +MRTOut PSHDR10_Reinhard(VSInputTx pin) +{ + MRTOut output; + + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + output.hdr = float4(rgb, hdr.a); + + float3 sdr = ToneMapReinhard(hdr.xyz * linearExposure); + output.sdr = float4(sdr, hdr.a); + + return output; +} + +MRTOut PSHDR10_ACESFilmic(VSInputTx pin) +{ + MRTOut output; + + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + output.hdr = float4(rgb, hdr.a); + + float3 sdr = ToneMapACESFilmic(hdr.xyz * linearExposure); + output.sdr = float4(sdr, hdr.a); + + return output; +} + +MRTOut PSHDR10_Saturate_SRGB(VSInputTx pin) +{ + MRTOut output; + + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + output.hdr = float4(rgb, hdr.a); + + float3 sdr = saturate(hdr.xyz * linearExposure); + float3 srgb = LinearToSRGBEst(sdr); + output.sdr = float4(srgb, hdr.a); + + return output; +} + +MRTOut PSHDR10_Reinhard_SRGB(VSInputTx pin) +{ + MRTOut output; + + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + output.hdr = float4(rgb, hdr.a); + + float3 sdr = ToneMapReinhard(hdr.xyz * linearExposure); + float3 srgb = LinearToSRGBEst(sdr); + output.sdr = float4(srgb, hdr.a); + + return output; +} + +MRTOut PSHDR10_ACESFilmic_SRGB(VSInputTx pin) +{ + MRTOut output; + + float4 hdr = HDRTexture.Sample(Sampler, pin.TexCoord); + float3 rgb = HDR10(hdr.xyz); + output.hdr = float4(rgb, hdr.a); + + float3 sdr = ToneMapACESFilmic(hdr.xyz * linearExposure); + float3 srgb = LinearToSRGBEst(sdr); + output.sdr = float4(srgb, hdr.a); + + return output; +} diff --git a/Sdk/External/DirectXTK/Src/Shaders/Utilities.fxh b/Sdk/External/DirectXTK/Src/Shaders/Utilities.fxh new file mode 100644 index 0000000..49ea06c --- /dev/null +++ b/Sdk/External/DirectXTK/Src/Shaders/Utilities.fxh @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 + + +float3 BiasX2(float3 x) +{ + return 2.0f * x - 1.0f; +} + +float3 BiasD2(float3 x) +{ + return 0.5f * x + 0.5f; +} + + +// Christian Schuler, "Normal Mapping without Precomputed Tangents", ShaderX 5, Chapter 2.6, pp. 131-140 +// See also follow-up blog post: http://www.thetenthplanet.de/archives/1180 +float3x3 CalculateTBN(float3 p, float3 n, float2 tex) +{ + float3 dp1 = ddx(p); + float3 dp2 = ddy(p); + float2 duv1 = ddx(tex); + float2 duv2 = ddy(tex); + + float3x3 M = float3x3(dp1, dp2, cross(dp1, dp2)); + float2x3 inverseM = float2x3(cross(M[1], M[2]), cross(M[2], M[0])); + float3 t = normalize(mul(float2(duv1.x, duv2.x), inverseM)); + float3 b = normalize(mul(float2(duv1.y, duv2.y), inverseM)); + return float3x3(t, b, n); +} + +float3 PeturbNormal(float3 localNormal, float3 position, float3 normal, float2 texCoord) +{ + const float3x3 TBN = CalculateTBN(position, normal, texCoord); + return normalize(mul(localNormal, TBN)); +} + +float3 TwoChannelNormalX2(float2 normal) +{ + float2 xy = 2.0f * normal - 1.0f; + float z = sqrt(1 - dot(xy, xy)); + return float3(xy.x, xy.y, z); +} + + +// sRGB +// https://en.wikipedia.org/wiki/SRGB + +// Apply the (approximate) sRGB curve to linear values +float3 LinearToSRGBEst(float3 color) +{ + return pow(abs(color), 1/2.2f); +} + + +// (Approximate) sRGB to linear +float3 SRGBToLinearEst(float3 srgb) +{ + return pow(abs(srgb), 2.2f); +} + + +// HDR10 Media Profile +// https://en.wikipedia.org/wiki/High-dynamic-range_video#HDR10 + + +// Color rotation matrix to rotate Rec.709 color primaries into Rec.2020 +static const float3x3 from709to2020 = +{ + { 0.6274040f, 0.3292820f, 0.0433136f }, + { 0.0690970f, 0.9195400f, 0.0113612f }, + { 0.0163916f, 0.0880132f, 0.8955950f } +}; + + +// Apply the ST.2084 curve to normalized linear values and outputs normalized non-linear values +float3 LinearToST2084(float3 normalizedLinearValue) +{ + return pow((0.8359375f + 18.8515625f * pow(abs(normalizedLinearValue), 0.1593017578f)) / (1.0f + 18.6875f * pow(abs(normalizedLinearValue), 0.1593017578f)), 78.84375f); +} + + +// ST.2084 to linear, resulting in a linear normalized value +float3 ST2084ToLinear(float3 ST2084) +{ + return pow(max(pow(abs(ST2084), 1.0f / 78.84375f) - 0.8359375f, 0.0f) / (18.8515625f - 18.6875f * pow(abs(ST2084), 1.0f / 78.84375f)), 1.0f / 0.1593017578f); +} + + +// Reinhard tonemap operator +// Reinhard et al. "Photographic tone reproduction for digital images." ACM Transactions on Graphics. 21. 2002. +// http://www.cs.utah.edu/~reinhard/cdrom/tonemap.pdf +float3 ToneMapReinhard(float3 color) +{ + return color / (1.0f + color); +} + + +// ACES Filmic tonemap operator +// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ +float3 ToneMapACESFilmic(float3 x) +{ + float a = 2.51f; + float b = 0.03f; + float c = 2.43f; + float d = 0.59f; + float e = 0.14f; + return saturate((x*(a*x+b))/(x*(c*x+d)+e)); +} diff --git a/Sdk/External/DirectXTK/Src/SharedResourcePool.h b/Sdk/External/DirectXTK/Src/SharedResourcePool.h new file mode 100644 index 0000000..4848c13 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/SharedResourcePool.h @@ -0,0 +1,109 @@ +//-------------------------------------------------------------------------------------- +// File: SharedResourcePool.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include +#include + +#include "PlatformHelpers.h" + + +namespace DirectX +{ + // Pool manager ensures that only a single TData instance is created for each unique TKey. + // This is used to avoid duplicate resource creation, so that for instance a caller can + // create any number of SpriteBatch instances, but these can internally share shaders and + // vertex buffer if more than one SpriteBatch uses the same underlying D3D device. + template + class SharedResourcePool + { + public: + SharedResourcePool() noexcept(false) + : mResourceMap(std::make_shared()) + { } + + SharedResourcePool(SharedResourcePool const&) = delete; + SharedResourcePool& operator= (SharedResourcePool const&) = delete; + + // Allocates or looks up the shared TData instance for the specified key. + std::shared_ptr DemandCreate(TKey key, TConstructorArgs... args) + { + std::lock_guard lock(mResourceMap->mutex); + + // Return an existing instance? + auto pos = mResourceMap->find(key); + + if (pos != mResourceMap->end()) + { + auto existingValue = pos->second.lock(); + + if (existingValue) + return existingValue; + else + mResourceMap->erase(pos); + } + + // Allocate a new instance. + auto newValue = std::make_shared(key, mResourceMap, args...); + + auto entry = std::make_pair(key, newValue); + mResourceMap->insert(entry); + + return std::move(newValue); + } + + + private: + // Keep track of all allocated TData instances. + struct ResourceMap : public std::map> + { + std::mutex mutex; + }; + + std::shared_ptr mResourceMap; + + + // Wrap TData with our own subclass, so we can hook the destructor + // to remove instances from our pool before they are freed. + struct WrappedData : public TData + { + WrappedData(TKey key, std::shared_ptr const& resourceMap, TConstructorArgs... args) + : TData(key, args...), + mKey(key), + mResourceMap(resourceMap) + { } + + WrappedData(WrappedData&&) = default; + WrappedData& operator= (WrappedData&&) = default; + + WrappedData(WrappedData const&) = delete; + WrappedData& operator= (WrappedData const&) = delete; + + ~WrappedData() + { + std::lock_guard lock(mResourceMap->mutex); + + auto pos = mResourceMap->find(mKey); + + // Check for weak reference expiry before erasing, in case DemandCreate runs on + // a different thread at the same time as a previous instance is being destroyed. + // We mustn't erase replacement objects that have just been added! + if (pos != mResourceMap->end() && pos->second.expired()) + { + mResourceMap->erase(pos); + } + } + + TKey mKey; + std::shared_ptr mResourceMap; + }; + }; +} diff --git a/Sdk/External/DirectXTK/Src/SimpleMath.cpp b/Sdk/External/DirectXTK/Src/SimpleMath.cpp new file mode 100644 index 0000000..76e03cb --- /dev/null +++ b/Sdk/External/DirectXTK/Src/SimpleMath.cpp @@ -0,0 +1,156 @@ +//------------------------------------------------------------------------------------- +// SimpleMath.cpp -- Simplified C++ Math wrapper for DirectXMath +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//------------------------------------------------------------------------------------- + +#include "pch.h" +#include "SimpleMath.h" + +/**************************************************************************** + * + * Constants + * + ****************************************************************************/ + +namespace DirectX +{ + namespace SimpleMath + { + const Vector2 Vector2::Zero = { 0.f, 0.f }; + const Vector2 Vector2::One = { 1.f, 1.f }; + const Vector2 Vector2::UnitX = { 1.f, 0.f }; + const Vector2 Vector2::UnitY = { 0.f, 1.f }; + + const Vector3 Vector3::Zero = { 0.f, 0.f, 0.f }; + const Vector3 Vector3::One = { 1.f, 1.f, 1.f }; + const Vector3 Vector3::UnitX = { 1.f, 0.f, 0.f }; + const Vector3 Vector3::UnitY = { 0.f, 1.f, 0.f }; + const Vector3 Vector3::UnitZ = { 0.f, 0.f, 1.f }; + const Vector3 Vector3::Up = { 0.f, 1.f, 0.f }; + const Vector3 Vector3::Down = { 0.f, -1.f, 0.f }; + const Vector3 Vector3::Right = { 1.f, 0.f, 0.f }; + const Vector3 Vector3::Left = { -1.f, 0.f, 0.f }; + const Vector3 Vector3::Forward = { 0.f, 0.f, -1.f }; + const Vector3 Vector3::Backward = { 0.f, 0.f, 1.f }; + + const Vector4 Vector4::Zero = { 0.f, 0.f, 0.f, 0.f }; + const Vector4 Vector4::One = { 1.f, 1.f, 1.f, 1.f }; + const Vector4 Vector4::UnitX = { 1.f, 0.f, 0.f, 0.f }; + const Vector4 Vector4::UnitY = { 0.f, 1.f, 0.f, 0.f }; + const Vector4 Vector4::UnitZ = { 0.f, 0.f, 1.f, 0.f }; + const Vector4 Vector4::UnitW = { 0.f, 0.f, 0.f, 1.f }; + + const Matrix Matrix::Identity = { 1.f, 0.f, 0.f, 0.f, + 0.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f }; + + const Quaternion Quaternion::Identity = { 0.f, 0.f, 0.f, 1.f }; + } +} + + +/**************************************************************************** + * + * Viewport + * + ****************************************************************************/ + +#if defined(__d3d11_h__) || defined(__d3d11_x_h__) +static_assert(sizeof(DirectX::SimpleMath::Viewport) == sizeof(D3D11_VIEWPORT), "Size mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, x) == offsetof(D3D11_VIEWPORT, TopLeftX), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, y) == offsetof(D3D11_VIEWPORT, TopLeftY), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, width) == offsetof(D3D11_VIEWPORT, Width), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, height) == offsetof(D3D11_VIEWPORT, Height), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, minDepth) == offsetof(D3D11_VIEWPORT, MinDepth), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, maxDepth) == offsetof(D3D11_VIEWPORT, MaxDepth), "Layout mismatch"); +#endif + +#if defined(__d3d12_h__) || defined(__d3d12_x_h__) || defined(__XBOX_D3D12_X__) +static_assert(sizeof(DirectX::SimpleMath::Viewport) == sizeof(D3D12_VIEWPORT), "Size mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, x) == offsetof(D3D12_VIEWPORT, TopLeftX), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, y) == offsetof(D3D12_VIEWPORT, TopLeftY), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, width) == offsetof(D3D12_VIEWPORT, Width), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, height) == offsetof(D3D12_VIEWPORT, Height), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, minDepth) == offsetof(D3D12_VIEWPORT, MinDepth), "Layout mismatch"); +static_assert(offsetof(DirectX::SimpleMath::Viewport, maxDepth) == offsetof(D3D12_VIEWPORT, MaxDepth), "Layout mismatch"); +#endif + +RECT DirectX::SimpleMath::Viewport::ComputeDisplayArea(DXGI_SCALING scaling, UINT backBufferWidth, UINT backBufferHeight, int outputWidth, int outputHeight) noexcept +{ + RECT rct = {}; + + switch (int(scaling)) + { + case DXGI_SCALING_STRETCH: + // Output fills the entire window area + rct.top = 0; + rct.left = 0; + rct.right = outputWidth; + rct.bottom = outputHeight; + break; + + case 2 /*DXGI_SCALING_ASPECT_RATIO_STRETCH*/: + // Output fills the window area but respects the original aspect ratio, using pillar boxing or letter boxing as required + // Note: This scaling option is not supported for legacy Win32 windows swap chains + { + assert(backBufferHeight > 0); + float aspectRatio = float(backBufferWidth) / float(backBufferHeight); + + // Horizontal fill + float scaledWidth = float(outputWidth); + float scaledHeight = float(outputWidth) / aspectRatio; + if (scaledHeight >= float(outputHeight)) + { + // Do vertical fill + scaledWidth = float(outputHeight) * aspectRatio; + scaledHeight = float(outputHeight); + } + + float offsetX = (float(outputWidth) - scaledWidth) * 0.5f; + float offsetY = (float(outputHeight) - scaledHeight) * 0.5f; + + rct.left = static_cast(offsetX); + rct.top = static_cast(offsetY); + rct.right = static_cast(offsetX + scaledWidth); + rct.bottom = static_cast(offsetY + scaledHeight); + + // Clip to display window + rct.left = std::max(0, rct.left); + rct.top = std::max(0, rct.top); + rct.right = std::min(outputWidth, rct.right); + rct.bottom = std::min(outputHeight, rct.bottom); + } + break; + + case DXGI_SCALING_NONE: + default: + // Output is displayed in the upper left corner of the window area + rct.top = 0; + rct.left = 0; + rct.right = std::min(static_cast(backBufferWidth), outputWidth); + rct.bottom = std::min(static_cast(backBufferHeight), outputHeight); + break; + } + + return rct; +} + +RECT DirectX::SimpleMath::Viewport::ComputeTitleSafeArea(UINT backBufferWidth, UINT backBufferHeight) noexcept +{ + float safew = (float(backBufferWidth) + 19.f) / 20.f; + float safeh = (float(backBufferHeight) + 19.f) / 20.f; + + RECT rct; + rct.left = static_cast(safew); + rct.top = static_cast(safeh); + rct.right = static_cast(float(backBufferWidth) - safew + 0.5f); + rct.bottom = static_cast(float(backBufferHeight) - safeh + 0.5f); + + return rct; +} diff --git a/Sdk/External/DirectXTK/Src/SkinnedEffect.cpp b/Sdk/External/DirectXTK/Src/SkinnedEffect.cpp new file mode 100644 index 0000000..a31818b --- /dev/null +++ b/Sdk/External/DirectXTK/Src/SkinnedEffect.cpp @@ -0,0 +1,648 @@ +//-------------------------------------------------------------------------------------- +// File: SkinnedEffect.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "EffectCommon.h" + +using namespace DirectX; + + +// Constant buffer layout. Must match the shader! +struct SkinnedEffectConstants +{ + XMVECTOR diffuseColor; + XMVECTOR emissiveColor; + XMVECTOR specularColorAndPower; + + XMVECTOR lightDirection[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightDiffuseColor[IEffectLights::MaxDirectionalLights]; + XMVECTOR lightSpecularColor[IEffectLights::MaxDirectionalLights]; + + XMVECTOR eyePosition; + + XMVECTOR fogColor; + XMVECTOR fogVector; + + XMMATRIX world; + XMVECTOR worldInverseTranspose[3]; + XMMATRIX worldViewProj; + + XMVECTOR bones[SkinnedEffect::MaxBones][3]; +}; + +static_assert((sizeof(SkinnedEffectConstants) % 16) == 0, "CB size not padded correctly"); + + +// Traits type describes our characteristics to the EffectBase template. +struct SkinnedEffectTraits +{ + using ConstantBufferType = SkinnedEffectConstants; + + static constexpr int VertexShaderCount = 18; + static constexpr int PixelShaderCount = 3; + static constexpr int ShaderPermutationCount = 36; +}; + + +// Internal SkinnedEffect implementation class. +class SkinnedEffect::Impl : public EffectBase +{ +public: + Impl(_In_ ID3D11Device* device); + + bool preferPerPixelLighting; + bool biasedVertexNormals; + int weightsPerVertex; + + EffectLights lights; + + int GetCurrentShaderPermutation() const noexcept; + + void Apply(_In_ ID3D11DeviceContext* deviceContext); +}; + + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedVertexLightingOneBone.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedVertexLightingTwoBones.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedVertexLightingFourBones.inc" + + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedOneLightOneBone.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedOneLightTwoBones.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedOneLightFourBones.inc" + + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedPixelLightingOneBone.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedPixelLightingTwoBones.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedPixelLightingFourBones.inc" + + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedVertexLightingOneBoneBn.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedVertexLightingTwoBonesBn.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedVertexLightingFourBonesBn.inc" + + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedOneLightOneBoneBn.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedOneLightTwoBonesBn.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedOneLightFourBonesBn.inc" + + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedPixelLightingOneBoneBn.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedPixelLightingTwoBonesBn.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_VSSkinnedPixelLightingFourBonesBn.inc" + + #include "Shaders/Compiled/XboxOneSkinnedEffect_PSSkinnedVertexLighting.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_PSSkinnedVertexLightingNoFog.inc" + #include "Shaders/Compiled/XboxOneSkinnedEffect_PSSkinnedPixelLighting.inc" +#else + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedVertexLightingOneBone.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedVertexLightingTwoBones.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedVertexLightingFourBones.inc" + + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedOneLightOneBone.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedOneLightTwoBones.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedOneLightFourBones.inc" + + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedPixelLightingOneBone.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedPixelLightingTwoBones.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedPixelLightingFourBones.inc" + + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedVertexLightingOneBoneBn.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedVertexLightingTwoBonesBn.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedVertexLightingFourBonesBn.inc" + + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedOneLightOneBoneBn.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedOneLightTwoBonesBn.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedOneLightFourBonesBn.inc" + + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedPixelLightingOneBoneBn.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedPixelLightingTwoBonesBn.inc" + #include "Shaders/Compiled/SkinnedEffect_VSSkinnedPixelLightingFourBonesBn.inc" + + #include "Shaders/Compiled/SkinnedEffect_PSSkinnedVertexLighting.inc" + #include "Shaders/Compiled/SkinnedEffect_PSSkinnedVertexLightingNoFog.inc" + #include "Shaders/Compiled/SkinnedEffect_PSSkinnedPixelLighting.inc" +#endif +} + + +template<> +const ShaderBytecode EffectBase::VertexShaderBytecode[] = +{ + { SkinnedEffect_VSSkinnedVertexLightingOneBone, sizeof(SkinnedEffect_VSSkinnedVertexLightingOneBone) }, + { SkinnedEffect_VSSkinnedVertexLightingTwoBones, sizeof(SkinnedEffect_VSSkinnedVertexLightingTwoBones) }, + { SkinnedEffect_VSSkinnedVertexLightingFourBones, sizeof(SkinnedEffect_VSSkinnedVertexLightingFourBones) }, + + { SkinnedEffect_VSSkinnedOneLightOneBone, sizeof(SkinnedEffect_VSSkinnedOneLightOneBone) }, + { SkinnedEffect_VSSkinnedOneLightTwoBones, sizeof(SkinnedEffect_VSSkinnedOneLightTwoBones) }, + { SkinnedEffect_VSSkinnedOneLightFourBones, sizeof(SkinnedEffect_VSSkinnedOneLightFourBones) }, + + { SkinnedEffect_VSSkinnedPixelLightingOneBone, sizeof(SkinnedEffect_VSSkinnedPixelLightingOneBone) }, + { SkinnedEffect_VSSkinnedPixelLightingTwoBones, sizeof(SkinnedEffect_VSSkinnedPixelLightingTwoBones) }, + { SkinnedEffect_VSSkinnedPixelLightingFourBones, sizeof(SkinnedEffect_VSSkinnedPixelLightingFourBones) }, + + { SkinnedEffect_VSSkinnedVertexLightingOneBoneBn, sizeof(SkinnedEffect_VSSkinnedVertexLightingOneBoneBn) }, + { SkinnedEffect_VSSkinnedVertexLightingTwoBonesBn, sizeof(SkinnedEffect_VSSkinnedVertexLightingTwoBonesBn) }, + { SkinnedEffect_VSSkinnedVertexLightingFourBonesBn, sizeof(SkinnedEffect_VSSkinnedVertexLightingFourBonesBn) }, + + { SkinnedEffect_VSSkinnedOneLightOneBoneBn, sizeof(SkinnedEffect_VSSkinnedOneLightOneBoneBn) }, + { SkinnedEffect_VSSkinnedOneLightTwoBonesBn, sizeof(SkinnedEffect_VSSkinnedOneLightTwoBonesBn) }, + { SkinnedEffect_VSSkinnedOneLightFourBonesBn, sizeof(SkinnedEffect_VSSkinnedOneLightFourBonesBn) }, + + { SkinnedEffect_VSSkinnedPixelLightingOneBoneBn, sizeof(SkinnedEffect_VSSkinnedPixelLightingOneBoneBn) }, + { SkinnedEffect_VSSkinnedPixelLightingTwoBonesBn, sizeof(SkinnedEffect_VSSkinnedPixelLightingTwoBonesBn) }, + { SkinnedEffect_VSSkinnedPixelLightingFourBonesBn, sizeof(SkinnedEffect_VSSkinnedPixelLightingFourBonesBn) }, + +}; + + +template<> +const int EffectBase::VertexShaderIndices[] = +{ + 0, // vertex lighting, one bone + 0, // vertex lighting, one bone, no fog + 1, // vertex lighting, two bones + 1, // vertex lighting, two bones, no fog + 2, // vertex lighting, four bones + 2, // vertex lighting, four bones, no fog + + 3, // one light, one bone + 3, // one light, one bone, no fog + 4, // one light, two bones + 4, // one light, two bones, no fog + 5, // one light, four bones + 5, // one light, four bones, no fog + + 6, // pixel lighting, one bone + 6, // pixel lighting, one bone, no fog + 7, // pixel lighting, two bones + 7, // pixel lighting, two bones, no fog + 8, // pixel lighting, four bones + 8, // pixel lighting, four bones, no fog + + 9, // vertex lighting (biased vertex normals), one bone + 9, // vertex lighting (biased vertex normals), one bone, no fog + 10, // vertex lighting (biased vertex normals), two bones + 10, // vertex lighting (biased vertex normals), two bones, no fog + 11, // vertex lighting (biased vertex normals), four bones + 11, // vertex lighting (biased vertex normals), four bones, no fog + + 12, // one light (biased vertex normals), one bone + 12, // one light (biased vertex normals), one bone, no fog + 13, // one light (biased vertex normals), two bones + 13, // one light (biased vertex normals), two bones, no fog + 14, // one light (biased vertex normals), four bones + 14, // one light (biased vertex normals), four bones, no fog + + 15, // pixel lighting (biased vertex normals), one bone + 15, // pixel lighting (biased vertex normals), one bone, no fog + 16, // pixel lighting (biased vertex normals), two bones + 16, // pixel lighting (biased vertex normals), two bones, no fog + 17, // pixel lighting (biased vertex normals), four bones + 17, // pixel lighting (biased vertex normals), four bones, no fog +}; + + +template<> +const ShaderBytecode EffectBase::PixelShaderBytecode[] = +{ + { SkinnedEffect_PSSkinnedVertexLighting, sizeof(SkinnedEffect_PSSkinnedVertexLighting) }, + { SkinnedEffect_PSSkinnedVertexLightingNoFog, sizeof(SkinnedEffect_PSSkinnedVertexLightingNoFog) }, + { SkinnedEffect_PSSkinnedPixelLighting, sizeof(SkinnedEffect_PSSkinnedPixelLighting) }, +}; + + +template<> +const int EffectBase::PixelShaderIndices[] = +{ + 0, // vertex lighting, one bone + 1, // vertex lighting, one bone, no fog + 0, // vertex lighting, two bones + 1, // vertex lighting, two bones, no fog + 0, // vertex lighting, four bones + 1, // vertex lighting, four bones, no fog + + 0, // one light, one bone + 1, // one light, one bone, no fog + 0, // one light, two bones + 1, // one light, two bones, no fog + 0, // one light, four bones + 1, // one light, four bones, no fog + + 2, // pixel lighting, one bone + 2, // pixel lighting, one bone, no fog + 2, // pixel lighting, two bones + 2, // pixel lighting, two bones, no fog + 2, // pixel lighting, four bones + 2, // pixel lighting, four bones, no fog + + 0, // vertex lighting (biased vertex normals), one bone + 1, // vertex lighting (biased vertex normals), one bone, no fog + 0, // vertex lighting (biased vertex normals), two bones + 1, // vertex lighting (biased vertex normals), two bones, no fog + 0, // vertex lighting (biased vertex normals), four bones + 1, // vertex lighting (biased vertex normals), four bones, no fog + + 0, // one light (biased vertex normals), one bone + 1, // one light (biased vertex normals), one bone, no fog + 0, // one light (biased vertex normals), two bones + 1, // one light (biased vertex normals), two bones, no fog + 0, // one light (biased vertex normals), four bones + 1, // one light (biased vertex normals), four bones, no fog + + 2, // pixel lighting (biased vertex normals), one bone + 2, // pixel lighting (biased vertex normals), one bone, no fog + 2, // pixel lighting (biased vertex normals), two bones + 2, // pixel lighting (biased vertex normals), two bones, no fog + 2, // pixel lighting (biased vertex normals), four bones + 2, // pixel lighting (biased vertex normals), four bones, no fog +}; + + +// Global pool of per-device SkinnedEffect resources. +template<> +SharedResourcePool::DeviceResources> EffectBase::deviceResourcesPool = {}; + + +// Constructor. +SkinnedEffect::Impl::Impl(_In_ ID3D11Device* device) + : EffectBase(device), + preferPerPixelLighting(false), + biasedVertexNormals(false), + weightsPerVertex(4) +{ + static_assert(_countof(EffectBase::VertexShaderIndices) == SkinnedEffectTraits::ShaderPermutationCount, "array/max mismatch"); + static_assert(_countof(EffectBase::VertexShaderBytecode) == SkinnedEffectTraits::VertexShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderBytecode) == SkinnedEffectTraits::PixelShaderCount, "array/max mismatch"); + static_assert(_countof(EffectBase::PixelShaderIndices) == SkinnedEffectTraits::ShaderPermutationCount, "array/max mismatch"); + + lights.InitializeConstants(constants.specularColorAndPower, constants.lightDirection, constants.lightDiffuseColor, constants.lightSpecularColor); + + for (int i = 0; i < MaxBones; i++) + { + constants.bones[i][0] = g_XMIdentityR0; + constants.bones[i][1] = g_XMIdentityR1; + constants.bones[i][2] = g_XMIdentityR2; + } +} + + +int SkinnedEffect::Impl::GetCurrentShaderPermutation() const noexcept +{ + int permutation = 0; + + // Use optimized shaders if fog is disabled. + if (!fog.enabled) + { + permutation += 1; + } + + // Evaluate 1, 2, or 4 weights per vertex? + if (weightsPerVertex == 2) + { + permutation += 2; + } + else if (weightsPerVertex == 4) + { + permutation += 4; + } + + if (preferPerPixelLighting) + { + // Do lighting in the pixel shader. + permutation += 12; + } + else if (!lights.lightEnabled[1] && !lights.lightEnabled[2]) + { + // Use the only-bother-with-the-first-light shader optimization. + permutation += 6; + } + + if (biasedVertexNormals) + { + // Compressed normals need to be scaled and biased in the vertex shader. + permutation += 18; + } + + return permutation; +} + + +// Sets our state onto the D3D device. +void SkinnedEffect::Impl::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + // Compute derived parameter values. + matrices.SetConstants(dirtyFlags, constants.worldViewProj); + + fog.SetConstants(dirtyFlags, matrices.worldView, constants.fogVector); + + lights.SetConstants(dirtyFlags, matrices, constants.world, constants.worldInverseTranspose, constants.eyePosition, constants.diffuseColor, constants.emissiveColor, true); + + // Set the texture. + auto textures = texture.Get(); + if (!textures) + textures = GetDefaultTexture(); + + deviceContext->PSSetShaderResources(0, 1, &textures); + + // Set shaders and constant buffers. + ApplyShaders(deviceContext, GetCurrentShaderPermutation()); +} + + +// Public constructor. +SkinnedEffect::SkinnedEffect(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +SkinnedEffect::SkinnedEffect(SkinnedEffect&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +SkinnedEffect& SkinnedEffect::operator= (SkinnedEffect&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +SkinnedEffect::~SkinnedEffect() +{ +} + + +// IEffect methods. +void SkinnedEffect::Apply(_In_ ID3D11DeviceContext* deviceContext) +{ + pImpl->Apply(deviceContext); +} + + +void SkinnedEffect::GetVertexShaderBytecode(_Out_ void const** pShaderByteCode, _Out_ size_t* pByteCodeLength) +{ + pImpl->GetVertexShaderBytecode(pImpl->GetCurrentShaderPermutation(), pShaderByteCode, pByteCodeLength); +} + + +// Camera settings. +void XM_CALLCONV SkinnedEffect::SetWorld(FXMMATRIX value) +{ + pImpl->matrices.world = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV SkinnedEffect::SetView(FXMMATRIX value) +{ + pImpl->matrices.view = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV SkinnedEffect::SetProjection(FXMMATRIX value) +{ + pImpl->matrices.projection = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj; +} + + +void XM_CALLCONV SkinnedEffect::SetMatrices(FXMMATRIX world, CXMMATRIX view, CXMMATRIX projection) +{ + pImpl->matrices.world = world; + pImpl->matrices.view = view; + pImpl->matrices.projection = projection; + + pImpl->dirtyFlags |= EffectDirtyFlags::WorldViewProj | EffectDirtyFlags::WorldInverseTranspose | EffectDirtyFlags::EyePosition | EffectDirtyFlags::FogVector; +} + + +// Material settings. +void XM_CALLCONV SkinnedEffect::SetDiffuseColor(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV SkinnedEffect::SetEmissiveColor(FXMVECTOR value) +{ + pImpl->lights.emissiveColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV SkinnedEffect::SetSpecularColor(FXMVECTOR value) +{ + // Set xyz to new value, but preserve existing w (specular power). + pImpl->constants.specularColorAndPower = XMVectorSelect(pImpl->constants.specularColorAndPower, value, g_XMSelect1110); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void SkinnedEffect::SetSpecularPower(float value) +{ + // Set w to new value, but preserve existing xyz (specular color). + pImpl->constants.specularColorAndPower = XMVectorSetW(pImpl->constants.specularColorAndPower, value); + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void SkinnedEffect::DisableSpecular() +{ + // Set specular color to black, power to 1 + // Note: Don't use a power of 0 or the shader will generate strange highlights on non-specular materials + + pImpl->constants.specularColorAndPower = g_XMIdentityR3; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void SkinnedEffect::SetAlpha(float value) +{ + pImpl->lights.alpha = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void XM_CALLCONV SkinnedEffect::SetColorAndAlpha(FXMVECTOR value) +{ + pImpl->lights.diffuseColor = value; + pImpl->lights.alpha = XMVectorGetW(value); + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +// Light settings. +void SkinnedEffect::SetLightingEnabled(bool value) +{ + if (!value) + { + throw std::exception("SkinnedEffect does not support turning off lighting"); + } +} + + +void SkinnedEffect::SetPerPixelLighting(bool value) +{ + pImpl->preferPerPixelLighting = value; +} + + +void XM_CALLCONV SkinnedEffect::SetAmbientLightColor(FXMVECTOR value) +{ + pImpl->lights.ambientLightColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::MaterialColor; +} + + +void SkinnedEffect::SetLightEnabled(int whichLight, bool value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightEnabled(whichLight, value, pImpl->constants.lightDiffuseColor, pImpl->constants.lightSpecularColor); +} + + +void XM_CALLCONV SkinnedEffect::SetLightDirection(int whichLight, FXMVECTOR value) +{ + EffectLights::ValidateLightIndex(whichLight); + + pImpl->constants.lightDirection[whichLight] = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void XM_CALLCONV SkinnedEffect::SetLightDiffuseColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightDiffuseColor(whichLight, value, pImpl->constants.lightDiffuseColor); +} + + +void XM_CALLCONV SkinnedEffect::SetLightSpecularColor(int whichLight, FXMVECTOR value) +{ + pImpl->dirtyFlags |= pImpl->lights.SetLightSpecularColor(whichLight, value, pImpl->constants.lightSpecularColor); +} + + +void SkinnedEffect::EnableDefaultLighting() +{ + EffectLights::EnableDefaultLighting(this); +} + + +// Fog settings. +void SkinnedEffect::SetFogEnabled(bool value) +{ + pImpl->fog.enabled = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogEnable; +} + + +void SkinnedEffect::SetFogStart(float value) +{ + pImpl->fog.start = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void SkinnedEffect::SetFogEnd(float value) +{ + pImpl->fog.end = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::FogVector; +} + + +void XM_CALLCONV SkinnedEffect::SetFogColor(FXMVECTOR value) +{ + pImpl->constants.fogColor = value; + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Texture settings. +void SkinnedEffect::SetTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->texture = value; +} + + +// Animation settings. +void SkinnedEffect::SetWeightsPerVertex(int value) +{ + if ((value != 1) && + (value != 2) && + (value != 4)) + { + throw std::out_of_range("WeightsPerVertex must be 1, 2, or 4"); + } + + pImpl->weightsPerVertex = value; +} + + +void SkinnedEffect::SetBoneTransforms(_In_reads_(count) XMMATRIX const* value, size_t count) +{ + if (count > MaxBones) + throw std::out_of_range("count parameter out of range"); + + auto boneConstant = pImpl->constants.bones; + + for (size_t i = 0; i < count; i++) + { + XMMATRIX boneMatrix = XMMatrixTranspose(value[i]); + + boneConstant[i][0] = boneMatrix.r[0]; + boneConstant[i][1] = boneMatrix.r[1]; + boneConstant[i][2] = boneMatrix.r[2]; + } + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +void SkinnedEffect::ResetBoneTransforms() +{ + auto boneConstant = pImpl->constants.bones; + + for(size_t i = 0; i < MaxBones; ++i) + { + boneConstant[i][0] = g_XMIdentityR0; + boneConstant[i][1] = g_XMIdentityR1; + boneConstant[i][2] = g_XMIdentityR2; + } + + pImpl->dirtyFlags |= EffectDirtyFlags::ConstantBuffer; +} + + +// Normal compression settings. +void SkinnedEffect::SetBiasedVertexNormals(bool value) +{ + pImpl->biasedVertexNormals = value; +} diff --git a/Sdk/External/DirectXTK/Src/SpriteBatch.cpp b/Sdk/External/DirectXTK/Src/SpriteBatch.cpp new file mode 100644 index 0000000..15ab1f2 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/SpriteBatch.cpp @@ -0,0 +1,1200 @@ +//-------------------------------------------------------------------------------------- +// File: SpriteBatch.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include "SpriteBatch.h" +#include "BufferHelpers.h" +#include "CommonStates.h" +#include "DirectXHelpers.h" +#include "VertexTypes.h" +#include "AlignedNew.h" +#include "SharedResourcePool.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +namespace +{ + // Include the precompiled shader code. + #if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneSpriteEffect_SpriteVertexShader.inc" + #include "Shaders/Compiled/XboxOneSpriteEffect_SpritePixelShader.inc" + #else + #include "Shaders/Compiled/SpriteEffect_SpriteVertexShader.inc" + #include "Shaders/Compiled/SpriteEffect_SpritePixelShader.inc" + #endif + + + // Helper looks up the D3D device corresponding to a context interface. + inline ComPtr GetDevice(_In_ ID3D11DeviceContext* deviceContext) + { + ComPtr device; + + deviceContext->GetDevice(&device); + + return device; + } + + + // Helper converts a RECT to XMVECTOR. + inline XMVECTOR LoadRect(_In_ RECT const* rect) + { + XMVECTOR v = XMLoadInt4(reinterpret_cast(rect)); + + v = XMConvertVectorIntToFloat(v, 0); + + // Convert right/bottom to width/height. + v = XMVectorSubtract(v, XMVectorPermute<0, 1, 4, 5>(g_XMZero, v)); + + return v; + } +} + + +// Internal SpriteBatch implementation class. +__declspec(align(16)) class SpriteBatch::Impl : public AlignedNew +{ +public: + Impl(_In_ ID3D11DeviceContext* deviceContext); + + void XM_CALLCONV Begin(SpriteSortMode sortMode, + _In_opt_ ID3D11BlendState* blendState, + _In_opt_ ID3D11SamplerState* samplerState, + _In_opt_ ID3D11DepthStencilState* depthStencilState, + _In_opt_ ID3D11RasterizerState* rasterizerState, + std::function& setCustomShaders, + FXMMATRIX transformMatrix); + void End(); + + void XM_CALLCONV Draw(_In_ ID3D11ShaderResourceView* texture, + FXMVECTOR destination, + _In_opt_ RECT const* sourceRectangle, + FXMVECTOR color, + FXMVECTOR originRotationDepth, + unsigned int flags); + + + // Info about a single sprite that is waiting to be drawn. + __declspec(align(16)) struct SpriteInfo : public AlignedNew + { + XMFLOAT4A source; + XMFLOAT4A destination; + XMFLOAT4A color; + XMFLOAT4A originRotationDepth; + ID3D11ShaderResourceView* texture; + unsigned int flags; + + + // Combine values from the public SpriteEffects enum with these internal-only flags. + static const unsigned int SourceInTexels = 4; + static const unsigned int DestSizeInPixels = 8; + + static_assert((SpriteEffects_FlipBoth & (SourceInTexels | DestSizeInPixels)) == 0, "Flag bits must not overlap"); + }; + + DXGI_MODE_ROTATION mRotation; + + bool mSetViewport; + D3D11_VIEWPORT mViewPort; + +private: + // Implementation helper methods. + void GrowSpriteQueue(); + void PrepareForRendering(); + void FlushBatch(); + void SortSprites(); + void GrowSortedSprites(); + + void RenderBatch(_In_ ID3D11ShaderResourceView* texture, _In_reads_(count) SpriteInfo const* const* sprites, size_t count); + + static void XM_CALLCONV RenderSprite(_In_ SpriteInfo const* sprite, + _Out_writes_(VerticesPerSprite) VertexPositionColorTexture* vertices, + FXMVECTOR textureSize, + FXMVECTOR inverseTextureSize); + + static XMVECTOR GetTextureSize(_In_ ID3D11ShaderResourceView* texture); + XMMATRIX GetViewportTransform(_In_ ID3D11DeviceContext* deviceContext, DXGI_MODE_ROTATION rotation ); + + + // Constants. + static const size_t MaxBatchSize = 2048; + static const size_t MinBatchSize = 128; + static const size_t InitialQueueSize = 64; + static const size_t VerticesPerSprite = 4; + static const size_t IndicesPerSprite = 6; + + + // Queue of sprites waiting to be drawn. + std::unique_ptr mSpriteQueue; + + size_t mSpriteQueueCount; + size_t mSpriteQueueArraySize; + + + // To avoid needlessly copying around bulky SpriteInfo structures, we leave that + // actual data alone and just sort this array of pointers instead. But we want contiguous + // memory for cache efficiency, so these pointers are just shortcuts into the single + // mSpriteQueue array, and we take care to keep them in order when sorting is disabled. + std::vector mSortedSprites; + + + // If each SpriteInfo instance held a refcount on its texture, could end up with + // many redundant AddRef/Release calls on the same object, so instead we use + // this separate list to hold just a single refcount each time we change texture. + std::vector> mSpriteTextureReferences; + + + // Mode settings from the last Begin call. + bool mInBeginEndPair; + + SpriteSortMode mSortMode; + ComPtr mBlendState; + ComPtr mSamplerState; + ComPtr mDepthStencilState; + ComPtr mRasterizerState; + std::function mSetCustomShaders; + XMMATRIX mTransformMatrix; + + + // Only one of these helpers is allocated per D3D device, even if there are multiple SpriteBatch instances. + struct DeviceResources + { + DeviceResources(_In_ ID3D11Device* device); + + ComPtr vertexShader; + ComPtr pixelShader; + ComPtr inputLayout; + ComPtr indexBuffer; + + CommonStates stateObjects; + + private: + void CreateShaders(_In_ ID3D11Device* device); + void CreateIndexBuffer(_In_ ID3D11Device* device); + + static std::vector CreateIndexValues(); + }; + + + // Only one of these helpers is allocated per D3D device context, even if there are multiple SpriteBatch instances. + struct ContextResources + { + ContextResources(_In_ ID3D11DeviceContext* deviceContext); + +#if defined(_XBOX_ONE) && defined(_TITLE) + ComPtr deviceContext; +#else + ComPtr deviceContext; +#endif + + ComPtr vertexBuffer; + + ConstantBuffer constantBuffer; + + size_t vertexBufferPosition; + + bool inImmediateMode; + + private: + void CreateVertexBuffer(); + }; + + + // Per-device and per-context data. + std::shared_ptr mDeviceResources; + std::shared_ptr mContextResources; + + static SharedResourcePool deviceResourcesPool; + static SharedResourcePool contextResourcesPool; +}; + + +// Global pools of per-device and per-context SpriteBatch resources. +SharedResourcePool SpriteBatch::Impl::deviceResourcesPool; +SharedResourcePool SpriteBatch::Impl::contextResourcesPool; + + +// Constants. +const XMMATRIX SpriteBatch::MatrixIdentity = XMMatrixIdentity(); +const XMFLOAT2 SpriteBatch::Float2Zero(0, 0); + +// Per-device constructor. +SpriteBatch::Impl::DeviceResources::DeviceResources(_In_ ID3D11Device* device) + : stateObjects(device) +{ + CreateShaders(device); + CreateIndexBuffer(device); +} + + +// Creates the SpriteBatch shaders and input layout. +void SpriteBatch::Impl::DeviceResources::CreateShaders(_In_ ID3D11Device* device) +{ + ThrowIfFailed( + device->CreateVertexShader(SpriteEffect_SpriteVertexShader, + sizeof(SpriteEffect_SpriteVertexShader), + nullptr, + &vertexShader) + ); + + ThrowIfFailed( + device->CreatePixelShader(SpriteEffect_SpritePixelShader, + sizeof(SpriteEffect_SpritePixelShader), + nullptr, + &pixelShader) + ); + + ThrowIfFailed( + device->CreateInputLayout(VertexPositionColorTexture::InputElements, + VertexPositionColorTexture::InputElementCount, + SpriteEffect_SpriteVertexShader, + sizeof(SpriteEffect_SpriteVertexShader), + &inputLayout) + ); + + SetDebugObjectName(vertexShader.Get(), "DirectXTK:SpriteBatch"); + SetDebugObjectName(pixelShader.Get(), "DirectXTK:SpriteBatch"); + SetDebugObjectName(inputLayout.Get(), "DirectXTK:SpriteBatch"); +} + + +// Creates the SpriteBatch index buffer. +void SpriteBatch::Impl::DeviceResources::CreateIndexBuffer(_In_ ID3D11Device* device) +{ + D3D11_BUFFER_DESC indexBufferDesc = {}; + + static_assert((MaxBatchSize * VerticesPerSprite) < USHRT_MAX, "MaxBatchSize too large for 16-bit indices"); + + indexBufferDesc.ByteWidth = sizeof(short) * MaxBatchSize * IndicesPerSprite; + indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; + indexBufferDesc.Usage = D3D11_USAGE_DEFAULT; + + auto indexValues = CreateIndexValues(); + + D3D11_SUBRESOURCE_DATA indexDataDesc = { indexValues.data(), 0, 0 }; + + ThrowIfFailed( + device->CreateBuffer(&indexBufferDesc, &indexDataDesc, &indexBuffer) + ); + + SetDebugObjectName(indexBuffer.Get(), "DirectXTK:SpriteBatch"); +} + + +// Helper for populating the SpriteBatch index buffer. +std::vector SpriteBatch::Impl::DeviceResources::CreateIndexValues() +{ + std::vector indices; + + indices.reserve(MaxBatchSize * IndicesPerSprite); + + for (size_t j = 0; j < MaxBatchSize * VerticesPerSprite; j += VerticesPerSprite) + { + short i = static_cast(j); + + indices.push_back(i); + indices.push_back(i + 1); + indices.push_back(i + 2); + + indices.push_back(i + 1); + indices.push_back(i + 3); + indices.push_back(i + 2); + } + + return indices; +} + + +// Per-context constructor. +SpriteBatch::Impl::ContextResources::ContextResources(_In_ ID3D11DeviceContext* context) + :constantBuffer(GetDevice(context).Get()), + vertexBufferPosition(0), + inImmediateMode(false) +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + ThrowIfFailed(context->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceContext.GetAddressOf()))); +#else + deviceContext = context; +#endif + + CreateVertexBuffer(); +} + + +// Creates the SpriteBatch vertex buffer. +void SpriteBatch::Impl::ContextResources::CreateVertexBuffer() +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + D3D11_BUFFER_DESC vertexBufferDesc = {}; + + vertexBufferDesc.ByteWidth = sizeof(VertexPositionColorTexture) * MaxBatchSize * VerticesPerSprite; + vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + vertexBufferDesc.Usage = D3D11_USAGE_DEFAULT; + vertexBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + auto device = GetDevice(deviceContext.Get()); + + ComPtr deviceX; + ThrowIfFailed(device.As(&deviceX)); + + ThrowIfFailed( + deviceX->CreatePlacementBuffer(&vertexBufferDesc, nullptr, &vertexBuffer) + ); + + SetDebugObjectName(vertexBuffer.Get(), "DirectXTK:SpriteBatch"); +#else + D3D11_BUFFER_DESC vertexBufferDesc = {}; + + vertexBufferDesc.ByteWidth = sizeof(VertexPositionColorTexture) * MaxBatchSize * VerticesPerSprite; + vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + vertexBufferDesc.Usage = D3D11_USAGE_DYNAMIC; + vertexBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + ThrowIfFailed( + GetDevice(deviceContext.Get())->CreateBuffer(&vertexBufferDesc, nullptr, &vertexBuffer) + ); + + SetDebugObjectName(vertexBuffer.Get(), "DirectXTK:SpriteBatch"); +#endif +} + + +// Per-SpriteBatch constructor. +SpriteBatch::Impl::Impl(_In_ ID3D11DeviceContext* deviceContext) + : mRotation(DXGI_MODE_ROTATION_IDENTITY), + mSetViewport(false), + mViewPort{}, + mSpriteQueueCount(0), + mSpriteQueueArraySize(0), + mInBeginEndPair(false), + mSortMode(SpriteSortMode_Deferred), + mTransformMatrix(MatrixIdentity), + mDeviceResources(deviceResourcesPool.DemandCreate(GetDevice(deviceContext).Get())), + mContextResources(contextResourcesPool.DemandCreate(deviceContext)) +{ +} + + +// Begins a batch of sprite drawing operations. +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Impl::Begin(SpriteSortMode sortMode, + ID3D11BlendState* blendState, + ID3D11SamplerState* samplerState, + ID3D11DepthStencilState* depthStencilState, + ID3D11RasterizerState* rasterizerState, + std::function& setCustomShaders, + FXMMATRIX transformMatrix) +{ + if (mInBeginEndPair) + throw std::exception("Cannot nest Begin calls on a single SpriteBatch"); + + mSortMode = sortMode; + mBlendState = blendState; + mSamplerState = samplerState; + mDepthStencilState = depthStencilState; + mRasterizerState = rasterizerState; + mSetCustomShaders = setCustomShaders; + mTransformMatrix = transformMatrix; + + if (sortMode == SpriteSortMode_Immediate) + { + // If we are in immediate mode, set device state ready for drawing. + if (mContextResources->inImmediateMode) + throw std::exception("Only one SpriteBatch at a time can use SpriteSortMode_Immediate"); + + PrepareForRendering(); + + mContextResources->inImmediateMode = true; + } + + mInBeginEndPair = true; +} + + +// Ends a batch of sprite drawing operations. +void SpriteBatch::Impl::End() +{ + if (!mInBeginEndPair) + throw std::exception("Begin must be called before End"); + + if (mSortMode == SpriteSortMode_Immediate) + { + // If we are in immediate mode, sprites have already been drawn. + mContextResources->inImmediateMode = false; + } + else + { + // Draw the queued sprites now. + if (mContextResources->inImmediateMode) + throw std::exception("Cannot end one SpriteBatch while another is using SpriteSortMode_Immediate"); + + PrepareForRendering(); + FlushBatch(); + } + + // Break circular reference chains, in case the state lambda closed + // over an object that holds a reference to this SpriteBatch. + mSetCustomShaders = nullptr; + + mInBeginEndPair = false; +} + + +// Adds a single sprite to the queue. +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Impl::Draw(ID3D11ShaderResourceView* texture, + FXMVECTOR destination, + RECT const* sourceRectangle, + FXMVECTOR color, + FXMVECTOR originRotationDepth, + unsigned int flags) +{ + if (!texture) + throw std::exception("Texture cannot be null"); + + if (!mInBeginEndPair) + throw std::exception("Begin must be called before Draw"); + + // Get a pointer to the output sprite. + if (mSpriteQueueCount >= mSpriteQueueArraySize) + { + GrowSpriteQueue(); + } + + SpriteInfo* sprite = &mSpriteQueue[mSpriteQueueCount]; + + XMVECTOR dest = destination; + + if (sourceRectangle) + { + // User specified an explicit source region. + XMVECTOR source = LoadRect(sourceRectangle); + + XMStoreFloat4A(&sprite->source, source); + + // If the destination size is relative to the source region, convert it to pixels. + if (!(flags & SpriteInfo::DestSizeInPixels)) + { + dest = XMVectorPermute<0, 1, 6, 7>(dest, XMVectorMultiply(dest, source)); // dest.zw *= source.zw + } + + flags |= SpriteInfo::SourceInTexels | SpriteInfo::DestSizeInPixels; + } + else + { + // No explicit source region, so use the entire texture. + static const XMVECTORF32 wholeTexture = { { { 0, 0, 1, 1 } } }; + + XMStoreFloat4A(&sprite->source, wholeTexture); + } + + // Store sprite parameters. + XMStoreFloat4A(&sprite->destination, dest); + XMStoreFloat4A(&sprite->color, color); + XMStoreFloat4A(&sprite->originRotationDepth, originRotationDepth); + + sprite->texture = texture; + sprite->flags = flags; + + if (mSortMode == SpriteSortMode_Immediate) + { + // If we are in immediate mode, draw this sprite straight away. + RenderBatch(texture, &sprite, 1); + } + else + { + // Queue this sprite for later sorting and batched rendering. + mSpriteQueueCount++; + + // Make sure we hold a refcount on this texture until the sprite has been drawn. Only checking the + // back of the vector means we will add duplicate references if the caller switches back and forth + // between multiple repeated textures, but calling AddRef more times than strictly necessary hurts + // nothing, and is faster than scanning the whole list or using a map to detect all duplicates. + if (mSpriteTextureReferences.empty() || texture != mSpriteTextureReferences.back().Get()) + { + mSpriteTextureReferences.emplace_back(texture); + } + } +} + + +// Dynamically expands the array used to store pending sprite information. +void SpriteBatch::Impl::GrowSpriteQueue() +{ + // Grow by a factor of 2. + size_t newSize = std::max(InitialQueueSize, mSpriteQueueArraySize * 2); + + // Allocate the new array. + auto newArray = std::make_unique(newSize); + + // Copy over any existing sprites. + for (size_t i = 0; i < mSpriteQueueCount; i++) + { + newArray[i] = mSpriteQueue[i]; + } + + // Replace the previous array with the new one. + mSpriteQueue = std::move(newArray); + mSpriteQueueArraySize = newSize; + + // Clear any dangling SpriteInfo pointers left over from previous rendering. + mSortedSprites.clear(); +} + + +// Sets up D3D device state ready for drawing sprites. +void SpriteBatch::Impl::PrepareForRendering() +{ + auto deviceContext = mContextResources->deviceContext.Get(); + + // Set state objects. + auto blendState = mBlendState ? mBlendState.Get() : mDeviceResources->stateObjects.AlphaBlend(); + auto depthStencilState = mDepthStencilState ? mDepthStencilState.Get() : mDeviceResources->stateObjects.DepthNone(); + auto rasterizerState = mRasterizerState ? mRasterizerState.Get() : mDeviceResources->stateObjects.CullCounterClockwise(); + auto samplerState = mSamplerState ? mSamplerState.Get() : mDeviceResources->stateObjects.LinearClamp(); + + deviceContext->OMSetBlendState(blendState, nullptr, 0xFFFFFFFF); + deviceContext->OMSetDepthStencilState(depthStencilState, 0); + deviceContext->RSSetState(rasterizerState); + deviceContext->PSSetSamplers(0, 1, &samplerState); + + // Set shaders. + deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + deviceContext->IASetInputLayout(mDeviceResources->inputLayout.Get()); + deviceContext->VSSetShader(mDeviceResources->vertexShader.Get(), nullptr, 0); + deviceContext->PSSetShader(mDeviceResources->pixelShader.Get(), nullptr, 0); + + // Set the vertex and index buffer. +#if !defined(_XBOX_ONE) || !defined(_TITLE) + auto vertexBuffer = mContextResources->vertexBuffer.Get(); + UINT vertexStride = sizeof(VertexPositionColorTexture); + UINT vertexOffset = 0; + + deviceContext->IASetVertexBuffers(0, 1, &vertexBuffer, &vertexStride, &vertexOffset); +#endif + + deviceContext->IASetIndexBuffer(mDeviceResources->indexBuffer.Get(), DXGI_FORMAT_R16_UINT, 0); + + // Set the transform matrix. + XMMATRIX transformMatrix = (mRotation == DXGI_MODE_ROTATION_UNSPECIFIED) + ? mTransformMatrix + : (mTransformMatrix * GetViewportTransform(deviceContext, mRotation)); + +#if defined(_XBOX_ONE) && defined(_TITLE) + void* grfxMemory; + mContextResources->constantBuffer.SetData(deviceContext, transformMatrix, &grfxMemory); + + deviceContext->VSSetPlacementConstantBuffer(0, mContextResources->constantBuffer.GetBuffer(), grfxMemory); +#else + mContextResources->constantBuffer.SetData(deviceContext, transformMatrix); + + ID3D11Buffer* constantBuffer = mContextResources->constantBuffer.GetBuffer(); + + deviceContext->VSSetConstantBuffers(0, 1, &constantBuffer); +#endif + + // If this is a deferred D3D context, reset position so the first Map call will use D3D11_MAP_WRITE_DISCARD. + if (deviceContext->GetType() == D3D11_DEVICE_CONTEXT_DEFERRED) + { + mContextResources->vertexBufferPosition = 0; + } + + // Hook lets the caller replace our settings with their own custom shaders. + if (mSetCustomShaders) + { + mSetCustomShaders(); + } +} + + +// Sends queued sprites to the graphics device. +void SpriteBatch::Impl::FlushBatch() +{ + if (!mSpriteQueueCount) + return; + + SortSprites(); + + // Walk through the sorted sprite list, looking for adjacent entries that share a texture. + ID3D11ShaderResourceView* batchTexture = nullptr; + size_t batchStart = 0; + + for (size_t pos = 0; pos < mSpriteQueueCount; pos++) + { + ID3D11ShaderResourceView* texture = mSortedSprites[pos]->texture; + + _Analysis_assume_(texture != nullptr); + + // Flush whenever the texture changes. + if (texture != batchTexture) + { + if (pos > batchStart) + { + RenderBatch(batchTexture, &mSortedSprites[batchStart], pos - batchStart); + } + + batchTexture = texture; + batchStart = pos; + } + } + + // Flush the final batch. + RenderBatch(batchTexture, &mSortedSprites[batchStart], mSpriteQueueCount - batchStart); + + // Reset the queue. + mSpriteQueueCount = 0; + mSpriteTextureReferences.clear(); + + // When sorting is disabled, we persist mSortedSprites data from one batch to the next, to avoid + // uneccessary work in GrowSortedSprites. But we never reuse these when sorting, because re-sorting + // previously sorted items gives unstable ordering if some sprites have identical sort keys. + if (mSortMode != SpriteSortMode_Deferred) + { + mSortedSprites.clear(); + } +} + + +// Sorts the array of queued sprites. +void SpriteBatch::Impl::SortSprites() +{ + // Fill the mSortedSprites vector. + if (mSortedSprites.size() < mSpriteQueueCount) + { + GrowSortedSprites(); + } + + switch (mSortMode) + { + case SpriteSortMode_Texture: + // Sort by texture. + std::sort(mSortedSprites.begin(), mSortedSprites.begin() + static_cast(mSpriteQueueCount), + [](SpriteInfo const* x, SpriteInfo const* y) noexcept -> bool + { + return x->texture < y->texture; + }); + break; + + case SpriteSortMode_BackToFront: + // Sort back to front. + std::sort(mSortedSprites.begin(), mSortedSprites.begin() + static_cast(mSpriteQueueCount), + [](SpriteInfo const* x, SpriteInfo const* y) noexcept -> bool + { + return x->originRotationDepth.w > y->originRotationDepth.w; + }); + break; + + case SpriteSortMode_FrontToBack: + // Sort front to back. + std::sort(mSortedSprites.begin(), mSortedSprites.begin() + static_cast(mSpriteQueueCount), + [](SpriteInfo const* x, SpriteInfo const* y) noexcept -> bool + { + return x->originRotationDepth.w < y->originRotationDepth.w; + }); + break; + + default: + break; + } +} + + +// Populates the mSortedSprites vector with pointers to individual elements of the mSpriteQueue array. +void SpriteBatch::Impl::GrowSortedSprites() +{ + size_t previousSize = mSortedSprites.size(); + + mSortedSprites.resize(mSpriteQueueCount); + + for (size_t i = previousSize; i < mSpriteQueueCount; i++) + { + mSortedSprites[i] = &mSpriteQueue[i]; + } +} + + +// Submits a batch of sprites to the GPU. +_Use_decl_annotations_ +void SpriteBatch::Impl::RenderBatch(ID3D11ShaderResourceView* texture, SpriteInfo const* const* sprites, size_t count) +{ + auto deviceContext = mContextResources->deviceContext.Get(); + + // Draw using the specified texture. + deviceContext->PSSetShaderResources(0, 1, &texture); + + XMVECTOR textureSize = GetTextureSize(texture); + XMVECTOR inverseTextureSize = XMVectorReciprocal(textureSize); + + while (count > 0) + { + // How many sprites do we want to draw? + size_t batchSize = count; + + // How many sprites does the D3D vertex buffer have room for? + size_t remainingSpace = MaxBatchSize - mContextResources->vertexBufferPosition; + + if (batchSize > remainingSpace) + { + if (remainingSpace < MinBatchSize) + { + // If we are out of room, or about to submit an excessively small batch, wrap back to the start of the vertex buffer. + mContextResources->vertexBufferPosition = 0; + + batchSize = std::min(count, MaxBatchSize); + } + else + { + // Take however many sprites fit in what's left of the vertex buffer. + batchSize = remainingSpace; + } + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + void *grfxMemory = GraphicsMemory::Get().Allocate(deviceContext, sizeof(VertexPositionColorTexture) * batchSize * VerticesPerSprite, 64); + + auto vertices = static_cast(grfxMemory); +#else + // Lock the vertex buffer. + D3D11_MAP mapType = (mContextResources->vertexBufferPosition == 0) ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE; + + D3D11_MAPPED_SUBRESOURCE mappedBuffer; + + ThrowIfFailed( + deviceContext->Map(mContextResources->vertexBuffer.Get(), 0, mapType, 0, &mappedBuffer) + ); + + auto vertices = static_cast(mappedBuffer.pData) + mContextResources->vertexBufferPosition * VerticesPerSprite; +#endif + + // Generate sprite vertex data. + for (size_t i = 0; i < batchSize; i++) + { + assert(i < count); + _Analysis_assume_(i < count); + RenderSprite(sprites[i], vertices, textureSize, inverseTextureSize); + + vertices += VerticesPerSprite; + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + deviceContext->IASetPlacementVertexBuffer(0, mContextResources->vertexBuffer.Get(), grfxMemory, sizeof(VertexPositionColorTexture)); +#else + deviceContext->Unmap(mContextResources->vertexBuffer.Get(), 0); +#endif + + // Ok lads, the time has come for us draw ourselves some sprites! + auto startIndex = static_cast(mContextResources->vertexBufferPosition * IndicesPerSprite); + auto indexCount = static_cast(batchSize * IndicesPerSprite); + + deviceContext->DrawIndexed(indexCount, startIndex, 0); + + // Advance the buffer position. +#if !defined(_XBOX_ONE) || !defined(_TITLE) + mContextResources->vertexBufferPosition += batchSize; +#endif + + sprites += batchSize; + count -= batchSize; + } +} + + +// Generates vertex data for drawing a single sprite. +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Impl::RenderSprite(SpriteInfo const* sprite, + VertexPositionColorTexture* vertices, + FXMVECTOR textureSize, + FXMVECTOR inverseTextureSize) +{ + // Load sprite parameters into SIMD registers. + XMVECTOR source = XMLoadFloat4A(&sprite->source); + XMVECTOR destination = XMLoadFloat4A(&sprite->destination); + XMVECTOR color = XMLoadFloat4A(&sprite->color); + XMVECTOR originRotationDepth = XMLoadFloat4A(&sprite->originRotationDepth); + + float rotation = sprite->originRotationDepth.z; + unsigned int flags = sprite->flags; + + // Extract the source and destination sizes into separate vectors. + XMVECTOR sourceSize = XMVectorSwizzle<2, 3, 2, 3>(source); + XMVECTOR destinationSize = XMVectorSwizzle<2, 3, 2, 3>(destination); + + // Scale the origin offset by source size, taking care to avoid overflow if the source region is zero. + XMVECTOR isZeroMask = XMVectorEqual(sourceSize, XMVectorZero()); + XMVECTOR nonZeroSourceSize = XMVectorSelect(sourceSize, g_XMEpsilon, isZeroMask); + + XMVECTOR origin = XMVectorDivide(originRotationDepth, nonZeroSourceSize); + + // Convert the source region from texels to mod-1 texture coordinate format. + if (flags & SpriteInfo::SourceInTexels) + { + source = XMVectorMultiply(source, inverseTextureSize); + sourceSize = XMVectorMultiply(sourceSize, inverseTextureSize); + } + else + { + origin = XMVectorMultiply(origin, inverseTextureSize); + } + + // If the destination size is relative to the source region, convert it to pixels. + if (!(flags & SpriteInfo::DestSizeInPixels)) + { + destinationSize = XMVectorMultiply(destinationSize, textureSize); + } + + // Compute a 2x2 rotation matrix. + XMVECTOR rotationMatrix1; + XMVECTOR rotationMatrix2; + + if (rotation != 0) + { + float sin, cos; + + XMScalarSinCos(&sin, &cos, rotation); + + XMVECTOR sinV = XMLoadFloat(&sin); + XMVECTOR cosV = XMLoadFloat(&cos); + + rotationMatrix1 = XMVectorMergeXY(cosV, sinV); + rotationMatrix2 = XMVectorMergeXY(XMVectorNegate(sinV), cosV); + } + else + { + rotationMatrix1 = g_XMIdentityR0; + rotationMatrix2 = g_XMIdentityR1; + } + + // The four corner vertices are computed by transforming these unit-square positions. + static XMVECTORF32 cornerOffsets[VerticesPerSprite] = + { + { { { 0, 0, 0, 0 } } }, + { { { 1, 0, 0, 0 } } }, + { { { 0, 1, 0, 0 } } }, + { { { 1, 1, 0, 0 } } }, + }; + + // Tricksy alert! Texture coordinates are computed from the same cornerOffsets + // table as vertex positions, but if the sprite is mirrored, this table + // must be indexed in a different order. This is done as follows: + // + // position = cornerOffsets[i] + // texcoord = cornerOffsets[i ^ SpriteEffects] + + static_assert(SpriteEffects_FlipHorizontally == 1 && + SpriteEffects_FlipVertically == 2, "If you change these enum values, the mirroring implementation must be updated to match"); + + const unsigned int mirrorBits = flags & 3u; + + // Generate the four output vertices. + for (size_t i = 0; i < VerticesPerSprite; i++) + { + // Calculate position. + XMVECTOR cornerOffset = XMVectorMultiply(XMVectorSubtract(cornerOffsets[i], origin), destinationSize); + + // Apply 2x2 rotation matrix. + XMVECTOR position1 = XMVectorMultiplyAdd(XMVectorSplatX(cornerOffset), rotationMatrix1, destination); + XMVECTOR position2 = XMVectorMultiplyAdd(XMVectorSplatY(cornerOffset), rotationMatrix2, position1); + + // Set z = depth. + XMVECTOR position = XMVectorPermute<0, 1, 7, 6>(position2, originRotationDepth); + + // Write position as a Float4, even though VertexPositionColor::position is an XMFLOAT3. + // This is faster, and harmless as we are just clobbering the first element of the + // following color field, which will immediately be overwritten with its correct value. + XMStoreFloat4(reinterpret_cast(&vertices[i].position), position); + + // Write the color. + XMStoreFloat4(&vertices[i].color, color); + + // Compute and write the texture coordinate. + XMVECTOR textureCoordinate = XMVectorMultiplyAdd(cornerOffsets[static_cast(i) ^ mirrorBits], sourceSize, source); + + XMStoreFloat2(&vertices[i].textureCoordinate, textureCoordinate); + } +} + + +// Helper looks up the size of the specified texture. +XMVECTOR SpriteBatch::Impl::GetTextureSize(_In_ ID3D11ShaderResourceView* texture) +{ + // Convert resource view to underlying resource. + ComPtr resource; + + texture->GetResource(&resource); + + // Cast to texture. + ComPtr texture2D; + + if (FAILED(resource.As(&texture2D))) + { + throw std::exception("SpriteBatch can only draw Texture2D resources"); + } + + // Query the texture size. + D3D11_TEXTURE2D_DESC desc; + + texture2D->GetDesc(&desc); + + // Convert to vector format. + XMVECTOR size = XMVectorMergeXY(XMLoadInt(&desc.Width), + XMLoadInt(&desc.Height)); + + return XMConvertVectorUIntToFloat(size, 0); +} + + +// Generates a viewport transform matrix for rendering sprites using x-right y-down screen pixel coordinates. +XMMATRIX SpriteBatch::Impl::GetViewportTransform(_In_ ID3D11DeviceContext* deviceContext, DXGI_MODE_ROTATION rotation) +{ + // Look up the current viewport. + if (!mSetViewport) + { + UINT viewportCount = 1; + + deviceContext->RSGetViewports(&viewportCount, &mViewPort); + + if (viewportCount != 1) + throw std::exception("No viewport is set"); + } + + // Compute the matrix. + float xScale = (mViewPort.Width > 0) ? 2.0f / mViewPort.Width : 0.0f; + float yScale = (mViewPort.Height > 0) ? 2.0f / mViewPort.Height : 0.0f; + + switch (rotation) + { + case DXGI_MODE_ROTATION_ROTATE90: + return XMMATRIX + ( + 0, -yScale, 0, 0, + -xScale, 0, 0, 0, + 0, 0, 1, 0, + 1, 1, 0, 1 + ); + + case DXGI_MODE_ROTATION_ROTATE270: + return XMMATRIX + ( + 0, yScale, 0, 0, + xScale, 0, 0, 0, + 0, 0, 1, 0, + -1, -1, 0, 1 + ); + + case DXGI_MODE_ROTATION_ROTATE180: + return XMMATRIX + ( + -xScale, 0, 0, 0, + 0, yScale, 0, 0, + 0, 0, 1, 0, + 1, -1, 0, 1 + ); + + default: + return XMMATRIX + ( + xScale, 0, 0, 0, + 0, -yScale, 0, 0, + 0, 0, 1, 0, + -1, 1, 0, 1 + ); + } +} + + +// Public constructor. +SpriteBatch::SpriteBatch(_In_ ID3D11DeviceContext* deviceContext) + : pImpl(std::make_unique(deviceContext)) +{ +} + + +// Move constructor. +SpriteBatch::SpriteBatch(SpriteBatch&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +SpriteBatch& SpriteBatch::operator= (SpriteBatch&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +SpriteBatch::~SpriteBatch() +{ +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Begin(SpriteSortMode sortMode, + ID3D11BlendState* blendState, + ID3D11SamplerState* samplerState, + ID3D11DepthStencilState* depthStencilState, + ID3D11RasterizerState* rasterizerState, + std::function setCustomShaders, + FXMMATRIX transformMatrix) +{ + pImpl->Begin(sortMode, blendState, samplerState, depthStencilState, rasterizerState, setCustomShaders, transformMatrix); +} + + +void SpriteBatch::End() +{ + pImpl->End(); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, XMFLOAT2 const& position, FXMVECTOR color) +{ + XMVECTOR destination = XMVectorPermute<0, 1, 4, 5>(XMLoadFloat2(&position), g_XMOne); // x, y, 1, 1 + + pImpl->Draw(texture, destination, nullptr, color, g_XMZero, 0); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, + XMFLOAT2 const& position, + RECT const* sourceRectangle, + FXMVECTOR color, + float rotation, + XMFLOAT2 const& origin, + float scale, + SpriteEffects effects, + float layerDepth) +{ + XMVECTOR destination = XMVectorPermute<0, 1, 4, 4>(XMLoadFloat2(&position), XMLoadFloat(&scale)); // x, y, scale, scale + + XMVECTOR originRotationDepth = XMVectorSet(origin.x, origin.y, rotation, layerDepth); + + pImpl->Draw(texture, destination, sourceRectangle, color, originRotationDepth, static_cast(effects)); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, + XMFLOAT2 const& position, + RECT const* sourceRectangle, + FXMVECTOR color, + float rotation, + XMFLOAT2 const& origin, + XMFLOAT2 const& scale, + SpriteEffects effects, + float layerDepth) +{ + XMVECTOR destination = XMVectorPermute<0, 1, 4, 5>(XMLoadFloat2(&position), XMLoadFloat2(&scale)); // x, y, scale.x, scale.y + + XMVECTOR originRotationDepth = XMVectorSet(origin.x, origin.y, rotation, layerDepth); + + pImpl->Draw(texture, destination, sourceRectangle, color, originRotationDepth, static_cast(effects)); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, FXMVECTOR position, FXMVECTOR color) +{ + XMVECTOR destination = XMVectorPermute<0, 1, 4, 5>(position, g_XMOne); // x, y, 1, 1 + + pImpl->Draw(texture, destination, nullptr, color, g_XMZero, 0); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, + FXMVECTOR position, + RECT const* sourceRectangle, + FXMVECTOR color, + float rotation, + FXMVECTOR origin, + float scale, + SpriteEffects effects, + float layerDepth) +{ + XMVECTOR destination = XMVectorPermute<0, 1, 4, 4>(position, XMLoadFloat(&scale)); // x, y, scale, scale + + XMVECTOR rotationDepth = XMVectorMergeXY(XMVectorReplicate(rotation), XMVectorReplicate(layerDepth)); + + XMVECTOR originRotationDepth = XMVectorPermute<0, 1, 4, 5>(origin, rotationDepth); + + pImpl->Draw(texture, destination, sourceRectangle, color, originRotationDepth, static_cast(effects)); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, + FXMVECTOR position, + RECT const* sourceRectangle, + FXMVECTOR color, + float rotation, + FXMVECTOR origin, + GXMVECTOR scale, + SpriteEffects effects, + float layerDepth) +{ + XMVECTOR destination = XMVectorPermute<0, 1, 4, 5>(position, scale); // x, y, scale.x, scale.y + + XMVECTOR rotationDepth = XMVectorMergeXY(XMVectorReplicate(rotation), XMVectorReplicate(layerDepth)); + + XMVECTOR originRotationDepth = XMVectorPermute<0, 1, 4, 5>(origin, rotationDepth); + + pImpl->Draw(texture, destination, sourceRectangle, color, originRotationDepth, static_cast(effects)); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, RECT const& destinationRectangle, FXMVECTOR color) +{ + XMVECTOR destination = LoadRect(&destinationRectangle); // x, y, w, h + + pImpl->Draw(texture, destination, nullptr, color, g_XMZero, Impl::SpriteInfo::DestSizeInPixels); +} + + +_Use_decl_annotations_ +void XM_CALLCONV SpriteBatch::Draw(ID3D11ShaderResourceView* texture, + RECT const& destinationRectangle, + RECT const* sourceRectangle, + FXMVECTOR color, + float rotation, + XMFLOAT2 const& origin, + SpriteEffects effects, + float layerDepth) +{ + XMVECTOR destination = LoadRect(&destinationRectangle); // x, y, w, h + + XMVECTOR originRotationDepth = XMVectorSet(origin.x, origin.y, rotation, layerDepth); + + pImpl->Draw(texture, destination, sourceRectangle, color, originRotationDepth, static_cast(effects) | Impl::SpriteInfo::DestSizeInPixels); +} + + +void SpriteBatch::SetRotation(DXGI_MODE_ROTATION mode) +{ + pImpl->mRotation = mode; +} + + +DXGI_MODE_ROTATION SpriteBatch::GetRotation() const noexcept +{ + return pImpl->mRotation; +} + + +void SpriteBatch::SetViewport(const D3D11_VIEWPORT& viewPort) +{ + pImpl->mSetViewport = true; + pImpl->mViewPort = viewPort; +} diff --git a/Sdk/External/DirectXTK/Src/SpriteFont.cpp b/Sdk/External/DirectXTK/Src/SpriteFont.cpp new file mode 100644 index 0000000..1c9db5e --- /dev/null +++ b/Sdk/External/DirectXTK/Src/SpriteFont.cpp @@ -0,0 +1,654 @@ +//-------------------------------------------------------------------------------------- +// File: SpriteFont.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include +#include + +#include "SpriteFont.h" +#include "DirectXHelpers.h" +#include "BinaryReader.h" +#include "LoaderHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + + +// Internal SpriteFont implementation class. +class SpriteFont::Impl +{ +public: + Impl(_In_ ID3D11Device* device, + _In_ BinaryReader* reader, + bool forceSRGB) noexcept(false); + Impl(_In_ ID3D11ShaderResourceView* texture, + _In_reads_(glyphCount) Glyph const* glyphs, + size_t glyphCount, + float lineSpacing) noexcept(false); + + Glyph const* FindGlyph(wchar_t character) const; + + void SetDefaultCharacter(wchar_t character); + + template + void ForEachGlyph(_In_z_ wchar_t const* text, TAction action, bool ignoreWhitespace) const; + + void CreateTextureResource(_In_ ID3D11Device* device, + uint32_t width, uint32_t height, + DXGI_FORMAT format, + uint32_t stride, uint32_t rows, + _In_reads_(stride * rows) const uint8_t* data) noexcept(false); + + const wchar_t* ConvertUTF8(_In_z_ const char *text) noexcept(false); + + // Fields. + ComPtr texture; + std::vector glyphs; + std::vector glyphsIndex; + Glyph const* defaultGlyph; + float lineSpacing; + +private: + size_t utfBufferSize; + std::unique_ptr utfBuffer; +}; + + +// Constants. +const XMFLOAT2 SpriteFont::Float2Zero(0, 0); + +static const char spriteFontMagic[] = "DXTKfont"; + + +// Comparison operators make our sorted glyph vector work with std::binary_search and lower_bound. +namespace DirectX +{ + static inline bool operator< (SpriteFont::Glyph const& left, SpriteFont::Glyph const& right) noexcept + { + return left.Character < right.Character; + } + + static inline bool operator< (wchar_t left, SpriteFont::Glyph const& right) noexcept + { + return left < right.Character; + } + + static inline bool operator< (SpriteFont::Glyph const& left, wchar_t right) noexcept + { + return left.Character < right; + } +} + + +// Reads a SpriteFont from the binary format created by the MakeSpriteFont utility. +_Use_decl_annotations_ +SpriteFont::Impl::Impl( + ID3D11Device* device, + BinaryReader* reader, + bool forceSRGB) noexcept(false) : + defaultGlyph(nullptr), + lineSpacing(0), + utfBufferSize(0) +{ + // Validate the header. + for (char const* magic = spriteFontMagic; *magic; magic++) + { + if (reader->Read() != *magic) + { + DebugTrace("ERROR: SpriteFont provided with an invalid .spritefont file\n"); + throw std::exception("Not a MakeSpriteFont output binary"); + } + } + + // Read the glyph data. + auto glyphCount = reader->Read(); + auto glyphData = reader->ReadArray(glyphCount); + + glyphs.assign(glyphData, glyphData + glyphCount); + glyphsIndex.reserve(glyphs.size()); + + for (auto& glyph : glyphs) + { + glyphsIndex.emplace_back(glyph.Character); + } + + // Read font properties. + lineSpacing = reader->Read(); + + SetDefaultCharacter(static_cast(reader->Read())); + + // Read the texture data. + auto textureWidth = reader->Read(); + auto textureHeight = reader->Read(); + auto textureFormat = reader->Read(); + auto textureStride = reader->Read(); + auto textureRows = reader->Read(); + + uint64_t dataSize = uint64_t(textureStride) * uint64_t(textureRows); + if (dataSize > UINT32_MAX) + { + DebugTrace("ERROR: SpriteFont provided with an invalid .spritefont file\n"); + throw std::overflow_error("Invalid .spritefont file"); + } + + auto textureData = reader->ReadArray(static_cast(dataSize)); + + if (forceSRGB) + { + textureFormat = LoaderHelpers::MakeSRGB(textureFormat); + } + + // Create the D3D texture. + CreateTextureResource( + device, + textureWidth, textureHeight, + textureFormat, + textureStride, textureRows, + textureData); +} + + +// Constructs a SpriteFont from arbitrary user specified glyph data. +_Use_decl_annotations_ +SpriteFont::Impl::Impl( + ID3D11ShaderResourceView* itexture, + Glyph const* iglyphs, + size_t glyphCount, + float ilineSpacing) noexcept(false) : + texture(itexture), + glyphs(iglyphs, iglyphs + glyphCount), + defaultGlyph(nullptr), + lineSpacing(ilineSpacing), + utfBufferSize(0) +{ + if (!std::is_sorted(iglyphs, iglyphs + glyphCount)) + { + throw std::exception("Glyphs must be in ascending codepoint order"); + } + + glyphsIndex.reserve(glyphs.size()); + + for (auto& glyph : glyphs) + { + glyphsIndex.emplace_back(glyph.Character); + } +} + + +// Looks up the requested glyph, falling back to the default character if it is not in the font. +SpriteFont::Glyph const* SpriteFont::Impl::FindGlyph(wchar_t character) const +{ + // Rather than use std::lower_bound (which includes a slow debug path when built for _DEBUG), + // we implement a binary search inline to ensure sufficient Debug build performance to be useful + // for text-heavy applications. + + size_t lower = 0; + size_t higher = glyphs.size() - 1; + size_t index = higher / 2; + const size_t size = glyphs.size(); + + while (index < size) + { + const auto curChar = glyphsIndex[index]; + if (curChar == character) { return &glyphs[index]; } + if (curChar < character) + { + lower = index + 1; + } + else + { + higher = index - 1; + } + if (higher < lower) { break; } + else if (higher - lower <= 4) + { + for (index = lower; index <= higher; index++) + { + if (glyphsIndex[index] == character) + { + return &glyphs[index]; + } + } + } + index = lower + ((higher - lower) / 2); + } + + if (defaultGlyph) + { + return defaultGlyph; + } + + DebugTrace("ERROR: SpriteFont encountered a character not in the font (%u, %C), and no default glyph was provided\n", character, character); + throw std::exception("Character not in font"); +} + + +// Sets the missing-character fallback glyph. +void SpriteFont::Impl::SetDefaultCharacter(wchar_t character) +{ + defaultGlyph = nullptr; + + if (character) + { + defaultGlyph = FindGlyph(character); + } +} + + +// The core glyph layout algorithm, shared between DrawString and MeasureString. +template +void SpriteFont::Impl::ForEachGlyph(_In_z_ wchar_t const* text, TAction action, bool ignoreWhitespace) const +{ + float x = 0; + float y = 0; + + for (; *text; text++) + { + wchar_t character = *text; + + switch (character) + { + case '\r': + // Skip carriage returns. + continue; + + case '\n': + // New line. + x = 0; + y += lineSpacing; + break; + + default: + // Output this character. + auto glyph = FindGlyph(character); + + x += glyph->XOffset; + + if (x < 0) + x = 0; + + float advance = float(glyph->Subrect.right) - float(glyph->Subrect.left) + glyph->XAdvance; + + if (!ignoreWhitespace + || !iswspace(character) + || ((glyph->Subrect.right - glyph->Subrect.left) > 1) + || ((glyph->Subrect.bottom - glyph->Subrect.top) > 1)) + { + action(glyph, x, y, advance); + } + + x += advance; + break; + } + } +} + + +_Use_decl_annotations_ +void SpriteFont::Impl::CreateTextureResource( + ID3D11Device* device, + uint32_t width, uint32_t height, + DXGI_FORMAT format, + uint32_t stride, uint32_t rows, + const uint8_t* data) noexcept(false) +{ + uint64_t sliceBytes = uint64_t(stride) * uint64_t(rows); + if (sliceBytes > UINT32_MAX) + { + DebugTrace("ERROR: SpriteFont provided with an invalid .spritefont file\n"); + throw std::overflow_error("Invalid .spritefont file"); + } + + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = width; + desc.Height = height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_IMMUTABLE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData = { data, stride, static_cast(sliceBytes) }; + + ComPtr texture2D; + ThrowIfFailed( + device->CreateTexture2D(&desc, &initData, &texture2D) + ); + + CD3D11_SHADER_RESOURCE_VIEW_DESC viewDesc(D3D11_SRV_DIMENSION_TEXTURE2D, format); + ThrowIfFailed( + device->CreateShaderResourceView(texture2D.Get(), &viewDesc, texture.ReleaseAndGetAddressOf()) + ); + + SetDebugObjectName(texture.Get(), "DirectXTK:SpriteFont"); + SetDebugObjectName(texture2D.Get(), "DirectXTK:SpriteFont"); +} + + +const wchar_t* SpriteFont::Impl::ConvertUTF8(_In_z_ const char *text) noexcept(false) +{ + if (!utfBuffer) + { + utfBufferSize = 1024; + utfBuffer.reset(new wchar_t[1024]); + } + + int result = MultiByteToWideChar(CP_UTF8, 0, text, -1, utfBuffer.get(), static_cast(utfBufferSize)); + if (!result && (GetLastError() == ERROR_INSUFFICIENT_BUFFER)) + { + // Compute required buffer size + result = MultiByteToWideChar(CP_UTF8, 0, text, -1, nullptr, 0); + utfBufferSize = AlignUp(static_cast(result), 1024u); + utfBuffer.reset(new wchar_t[utfBufferSize]); + + // Retry conversion + result = MultiByteToWideChar(CP_UTF8, 0, text, -1, utfBuffer.get(), static_cast(utfBufferSize)); + } + + if (!result) + { + DebugTrace("ERROR: MultiByteToWideChar failed with error %u.\n", GetLastError()); + throw std::exception("MultiByteToWideChar"); + } + + return utfBuffer.get(); +} + + +// Construct from a binary file created by the MakeSpriteFont utility. +_Use_decl_annotations_ +SpriteFont::SpriteFont(ID3D11Device* device, wchar_t const* fileName, bool forceSRGB) +{ + BinaryReader reader(fileName); + + pImpl = std::make_unique(device, &reader, forceSRGB); +} + + +// Construct from a binary blob created by the MakeSpriteFont utility and already loaded into memory. +_Use_decl_annotations_ +SpriteFont::SpriteFont(ID3D11Device* device, uint8_t const* dataBlob, size_t dataSize, bool forceSRGB) +{ + BinaryReader reader(dataBlob, dataSize); + + pImpl = std::make_unique(device, &reader, forceSRGB); +} + + +// Construct from arbitrary user specified glyph data (for those not using the MakeSpriteFont utility). +_Use_decl_annotations_ +SpriteFont::SpriteFont(ID3D11ShaderResourceView* texture, Glyph const* glyphs, size_t glyphCount, float lineSpacing) + : pImpl(std::make_unique(texture, glyphs, glyphCount, lineSpacing)) +{ +} + + +// Move constructor. +SpriteFont::SpriteFont(SpriteFont&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +SpriteFont& SpriteFont::operator= (SpriteFont&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +SpriteFont::~SpriteFont() +{ +} + + +// Wide-character / UTF-16LE +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, float scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, text, XMLoadFloat2(&position), color, rotation, XMLoadFloat2(&origin), XMVectorReplicate(scale), effects, layerDepth); +} + + +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, text, XMLoadFloat2(&position), color, rotation, XMLoadFloat2(&origin), XMLoadFloat2(&scale), effects, layerDepth); +} + + +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, float scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, text, position, color, rotation, origin, XMVectorReplicate(scale), effects, layerDepth); +} + + +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ wchar_t const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects, float layerDepth) const +{ + static_assert(SpriteEffects_FlipHorizontally == 1 && + SpriteEffects_FlipVertically == 2, "If you change these enum values, the following tables must be updated to match"); + + // Lookup table indicates which way to move along each axis per SpriteEffects enum value. + static XMVECTORF32 axisDirectionTable[4] = + { + { { { -1, -1, 0, 0 } } }, + { { { 1, -1, 0, 0 } } }, + { { { -1, 1, 0, 0 } } }, + { { { 1, 1, 0, 0 } } }, + }; + + // Lookup table indicates which axes are mirrored for each SpriteEffects enum value. + static XMVECTORF32 axisIsMirroredTable[4] = + { + { { { 0, 0, 0, 0 } } }, + { { { 1, 0, 0, 0 } } }, + { { { 0, 1, 0, 0 } } }, + { { { 1, 1, 0, 0 } } }, + }; + + XMVECTOR baseOffset = origin; + + // If the text is mirrored, offset the start position accordingly. + if (effects) + { + baseOffset = XMVectorNegativeMultiplySubtract( + MeasureString(text), + axisIsMirroredTable[effects & 3], + baseOffset); + } + + // Draw each character in turn. + pImpl->ForEachGlyph(text, [&](Glyph const* glyph, float x, float y, float advance) + { + UNREFERENCED_PARAMETER(advance); + + XMVECTOR offset = XMVectorMultiplyAdd(XMVectorSet(x, y + glyph->YOffset, 0, 0), axisDirectionTable[effects & 3], baseOffset); + + if (effects) + { + // For mirrored characters, specify bottom and/or right instead of top left. + XMVECTOR glyphRect = XMConvertVectorIntToFloat(XMLoadInt4(reinterpret_cast(&glyph->Subrect)), 0); + + // xy = glyph width/height. + glyphRect = XMVectorSubtract(XMVectorSwizzle<2, 3, 0, 1>(glyphRect), glyphRect); + + offset = XMVectorMultiplyAdd(glyphRect, axisIsMirroredTable[effects & 3], offset); + } + + spriteBatch->Draw(pImpl->texture.Get(), position, &glyph->Subrect, color, rotation, offset, scale, effects, layerDepth); + }, true); +} + + +XMVECTOR XM_CALLCONV SpriteFont::MeasureString(_In_z_ wchar_t const* text, bool ignoreWhitespace) const +{ + XMVECTOR result = XMVectorZero(); + + pImpl->ForEachGlyph(text, [&](Glyph const* glyph, float x, float y, float advance) + { + UNREFERENCED_PARAMETER(advance); + + auto w = static_cast(glyph->Subrect.right - glyph->Subrect.left); + auto h = static_cast(glyph->Subrect.bottom - glyph->Subrect.top) + glyph->YOffset; + + h = iswspace(wchar_t(glyph->Character)) ? + pImpl->lineSpacing : + std::max(h, pImpl->lineSpacing); + + result = XMVectorMax(result, XMVectorSet(x + w, y + h, 0, 0)); + }, ignoreWhitespace); + + return result; +} + + +RECT SpriteFont::MeasureDrawBounds(_In_z_ wchar_t const* text, XMFLOAT2 const& position, bool ignoreWhitespace) const +{ + RECT result = { LONG_MAX, LONG_MAX, 0, 0 }; + + pImpl->ForEachGlyph(text, [&](Glyph const* glyph, float x, float y, float advance) noexcept + { + auto isWhitespace = iswspace(wchar_t(glyph->Character)); + auto w = static_cast(glyph->Subrect.right - glyph->Subrect.left); + auto h = isWhitespace ? + pImpl->lineSpacing : + static_cast(glyph->Subrect.bottom - glyph->Subrect.top); + + float minX = position.x + x; + float minY = position.y + y + (isWhitespace ? 0.0f : glyph->YOffset); + + float maxX = std::max(minX + advance, minX + w); + float maxY = minY + h; + + if (minX < float(result.left)) + result.left = long(minX); + + if (minY < float(result.top)) + result.top = long(minY); + + if (float(result.right) < maxX) + result.right = long(maxX); + + if (float(result.bottom) < maxY) + result.bottom = long(maxY); + }, ignoreWhitespace); + + if (result.left == LONG_MAX) + { + result.left = 0; + result.top = 0; + } + + return result; +} + + +RECT XM_CALLCONV SpriteFont::MeasureDrawBounds(_In_z_ wchar_t const* text, FXMVECTOR position, bool ignoreWhitespace) const +{ + XMFLOAT2 pos; + XMStoreFloat2(&pos, position); + + return MeasureDrawBounds(text, pos, ignoreWhitespace); +} + + +// UTF-8 +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, float scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, pImpl->ConvertUTF8(text), XMLoadFloat2(&position), color, rotation, XMLoadFloat2(&origin), XMVectorReplicate(scale), effects, layerDepth); +} + + +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, XMFLOAT2 const& position, FXMVECTOR color, float rotation, XMFLOAT2 const& origin, XMFLOAT2 const& scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, pImpl->ConvertUTF8(text), XMLoadFloat2(&position), color, rotation, XMLoadFloat2(&origin), XMLoadFloat2(&scale), effects, layerDepth); +} + + +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, float scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, pImpl->ConvertUTF8(text), position, color, rotation, origin, XMVectorReplicate(scale), effects, layerDepth); +} + + +void XM_CALLCONV SpriteFont::DrawString(_In_ SpriteBatch* spriteBatch, _In_z_ char const* text, FXMVECTOR position, FXMVECTOR color, float rotation, FXMVECTOR origin, GXMVECTOR scale, SpriteEffects effects, float layerDepth) const +{ + DrawString(spriteBatch, pImpl->ConvertUTF8(text), position, color, rotation, origin, scale, effects, layerDepth); +} + + +XMVECTOR XM_CALLCONV SpriteFont::MeasureString(_In_z_ char const* text, bool ignoreWhitespace) const +{ + return MeasureString(pImpl->ConvertUTF8(text), ignoreWhitespace); +} + + +RECT SpriteFont::MeasureDrawBounds(_In_z_ char const* text, XMFLOAT2 const& position, bool ignoreWhitespace) const +{ + return MeasureDrawBounds(pImpl->ConvertUTF8(text), position, ignoreWhitespace); +} + + +RECT XM_CALLCONV SpriteFont::MeasureDrawBounds(_In_z_ char const* text, FXMVECTOR position, bool ignoreWhitespace) const +{ + XMFLOAT2 pos; + XMStoreFloat2(&pos, position); + + return MeasureDrawBounds(pImpl->ConvertUTF8(text), pos, ignoreWhitespace); +} + + +// Spacing properties +float SpriteFont::GetLineSpacing() const noexcept +{ + return pImpl->lineSpacing; +} + + +void SpriteFont::SetLineSpacing(float spacing) +{ + pImpl->lineSpacing = spacing; +} + + +// Font properties +wchar_t SpriteFont::GetDefaultCharacter() const noexcept +{ + return static_cast(pImpl->defaultGlyph ? pImpl->defaultGlyph->Character : 0); +} + + +void SpriteFont::SetDefaultCharacter(wchar_t character) +{ + pImpl->SetDefaultCharacter(character); +} + + +bool SpriteFont::ContainsCharacter(wchar_t character) const +{ + return std::binary_search(pImpl->glyphs.begin(), pImpl->glyphs.end(), character); +} + + +// Custom layout/rendering +SpriteFont::Glyph const* SpriteFont::FindGlyph(wchar_t character) const +{ + return pImpl->FindGlyph(character); +} + + +void SpriteFont::GetSpriteSheet(ID3D11ShaderResourceView** texture) const +{ + if (!texture) + return; + + ThrowIfFailed(pImpl->texture.CopyTo(texture)); +} diff --git a/Sdk/External/DirectXTK/Src/TeapotData.inc b/Sdk/External/DirectXTK/Src/TeapotData.inc new file mode 100644 index 0000000..39e9a82 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/TeapotData.inc @@ -0,0 +1,182 @@ +//-------------------------------------------------------------------------------------- +// File: TeapotData.inc +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + + +// The teapot model consists of 10 bezier patches. Each patch has 16 control +// points, plus a flag indicating whether it should be mirrored in the Z axis +// as well as in X (all of the teapot is symmetrical from left to right, but +// only some parts are symmetrical from front to back). The control points +// are stored as integer indices into the TeapotControlPoints array. + +struct TeapotPatch +{ + bool mirrorZ; + int indices[16]; +}; + + +// Static data array defines the bezier patches that make up the teapot. +const TeapotPatch TeapotPatches[] = +{ + // Rim. + { true, { 102, 103, 104, 105, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }, + + // Body. + { true, { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 } }, + { true, { 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 } }, + + // Lid. + { true, { 96, 96, 96, 96, 97, 98, 99, 100, 101, 101, 101, 101, 0, 1, 2, 3 } }, + { true, { 0, 1, 2, 3, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117 } }, + + // Handle. + { false, { 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56 } }, + { false, { 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 28, 65, 66, 67 } }, + + // Spout. + { false, { 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83 } }, + { false, { 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95 } }, + + // Bottom. + { true, { 118, 118, 118, 118, 124, 122, 119, 121, 123, 126, 125, 120, 40, 39, 38, 37 } }, +}; + + +// Static array defines the control point positions that make up the teapot. +const DirectX::XMVECTORF32 TeapotControlPoints[] = +{ + { { { 0, 0.345f, -0.05f, 0 } } }, + { { { -0.028f, 0.345f, -0.05f, 0 } } }, + { { { -0.05f, 0.345f, -0.028f, 0 } } }, + { { { -0.05f, 0.345f, -0, 0 } } }, + { { { 0, 0.3028125f, -0.334375f, 0 } } }, + { { { -0.18725f, 0.3028125f, -0.334375f, 0 } } }, + { { { -0.334375f, 0.3028125f, -0.18725f, 0 } } }, + { { { -0.334375f, 0.3028125f, -0, 0 } } }, + { { { 0, 0.3028125f, -0.359375f, 0 } } }, + { { { -0.20125f, 0.3028125f, -0.359375f, 0 } } }, + { { { -0.359375f, 0.3028125f, -0.20125f, 0 } } }, + { { { -0.359375f, 0.3028125f, -0, 0 } } }, + { { { 0, 0.27f, -0.375f, 0 } } }, + { { { -0.21f, 0.27f, -0.375f, 0 } } }, + { { { -0.375f, 0.27f, -0.21f, 0 } } }, + { { { -0.375f, 0.27f, -0, 0 } } }, + { { { 0, 0.13875f, -0.4375f, 0 } } }, + { { { -0.245f, 0.13875f, -0.4375f, 0 } } }, + { { { -0.4375f, 0.13875f, -0.245f, 0 } } }, + { { { -0.4375f, 0.13875f, -0, 0 } } }, + { { { 0, 0.007499993f, -0.5f, 0 } } }, + { { { -0.28f, 0.007499993f, -0.5f, 0 } } }, + { { { -0.5f, 0.007499993f, -0.28f, 0 } } }, + { { { -0.5f, 0.007499993f, -0, 0 } } }, + { { { 0, -0.105f, -0.5f, 0 } } }, + { { { -0.28f, -0.105f, -0.5f, 0 } } }, + { { { -0.5f, -0.105f, -0.28f, 0 } } }, + { { { -0.5f, -0.105f, -0, 0 } } }, + { { { 0, -0.105f, 0.5f, 0 } } }, + { { { 0, -0.2175f, -0.5f, 0 } } }, + { { { -0.28f, -0.2175f, -0.5f, 0 } } }, + { { { -0.5f, -0.2175f, -0.28f, 0 } } }, + { { { -0.5f, -0.2175f, -0, 0 } } }, + { { { 0, -0.27375f, -0.375f, 0 } } }, + { { { -0.21f, -0.27375f, -0.375f, 0 } } }, + { { { -0.375f, -0.27375f, -0.21f, 0 } } }, + { { { -0.375f, -0.27375f, -0, 0 } } }, + { { { 0, -0.2925f, -0.375f, 0 } } }, + { { { -0.21f, -0.2925f, -0.375f, 0 } } }, + { { { -0.375f, -0.2925f, -0.21f, 0 } } }, + { { { -0.375f, -0.2925f, -0, 0 } } }, + { { { 0, 0.17625f, 0.4f, 0 } } }, + { { { -0.075f, 0.17625f, 0.4f, 0 } } }, + { { { -0.075f, 0.2325f, 0.375f, 0 } } }, + { { { 0, 0.2325f, 0.375f, 0 } } }, + { { { 0, 0.17625f, 0.575f, 0 } } }, + { { { -0.075f, 0.17625f, 0.575f, 0 } } }, + { { { -0.075f, 0.2325f, 0.625f, 0 } } }, + { { { 0, 0.2325f, 0.625f, 0 } } }, + { { { 0, 0.17625f, 0.675f, 0 } } }, + { { { -0.075f, 0.17625f, 0.675f, 0 } } }, + { { { -0.075f, 0.2325f, 0.75f, 0 } } }, + { { { 0, 0.2325f, 0.75f, 0 } } }, + { { { 0, 0.12f, 0.675f, 0 } } }, + { { { -0.075f, 0.12f, 0.675f, 0 } } }, + { { { -0.075f, 0.12f, 0.75f, 0 } } }, + { { { 0, 0.12f, 0.75f, 0 } } }, + { { { 0, 0.06375f, 0.675f, 0 } } }, + { { { -0.075f, 0.06375f, 0.675f, 0 } } }, + { { { -0.075f, 0.007499993f, 0.75f, 0 } } }, + { { { 0, 0.007499993f, 0.75f, 0 } } }, + { { { 0, -0.04875001f, 0.625f, 0 } } }, + { { { -0.075f, -0.04875001f, 0.625f, 0 } } }, + { { { -0.075f, -0.09562501f, 0.6625f, 0 } } }, + { { { 0, -0.09562501f, 0.6625f, 0 } } }, + { { { -0.075f, -0.105f, 0.5f, 0 } } }, + { { { -0.075f, -0.18f, 0.475f, 0 } } }, + { { { 0, -0.18f, 0.475f, 0 } } }, + { { { 0, 0.02624997f, -0.425f, 0 } } }, + { { { -0.165f, 0.02624997f, -0.425f, 0 } } }, + { { { -0.165f, -0.18f, -0.425f, 0 } } }, + { { { 0, -0.18f, -0.425f, 0 } } }, + { { { 0, 0.02624997f, -0.65f, 0 } } }, + { { { -0.165f, 0.02624997f, -0.65f, 0 } } }, + { { { -0.165f, -0.12375f, -0.775f, 0 } } }, + { { { 0, -0.12375f, -0.775f, 0 } } }, + { { { 0, 0.195f, -0.575f, 0 } } }, + { { { -0.0625f, 0.195f, -0.575f, 0 } } }, + { { { -0.0625f, 0.17625f, -0.6f, 0 } } }, + { { { 0, 0.17625f, -0.6f, 0 } } }, + { { { 0, 0.27f, -0.675f, 0 } } }, + { { { -0.0625f, 0.27f, -0.675f, 0 } } }, + { { { -0.0625f, 0.27f, -0.825f, 0 } } }, + { { { 0, 0.27f, -0.825f, 0 } } }, + { { { 0, 0.28875f, -0.7f, 0 } } }, + { { { -0.0625f, 0.28875f, -0.7f, 0 } } }, + { { { -0.0625f, 0.2934375f, -0.88125f, 0 } } }, + { { { 0, 0.2934375f, -0.88125f, 0 } } }, + { { { 0, 0.28875f, -0.725f, 0 } } }, + { { { -0.0375f, 0.28875f, -0.725f, 0 } } }, + { { { -0.0375f, 0.298125f, -0.8625f, 0 } } }, + { { { 0, 0.298125f, -0.8625f, 0 } } }, + { { { 0, 0.27f, -0.7f, 0 } } }, + { { { -0.0375f, 0.27f, -0.7f, 0 } } }, + { { { -0.0375f, 0.27f, -0.8f, 0 } } }, + { { { 0, 0.27f, -0.8f, 0 } } }, + { { { 0, 0.4575f, -0, 0 } } }, + { { { 0, 0.4575f, -0.2f, 0 } } }, + { { { -0.1125f, 0.4575f, -0.2f, 0 } } }, + { { { -0.2f, 0.4575f, -0.1125f, 0 } } }, + { { { -0.2f, 0.4575f, -0, 0 } } }, + { { { 0, 0.3825f, -0, 0 } } }, + { { { 0, 0.27f, -0.35f, 0 } } }, + { { { -0.196f, 0.27f, -0.35f, 0 } } }, + { { { -0.35f, 0.27f, -0.196f, 0 } } }, + { { { -0.35f, 0.27f, -0, 0 } } }, + { { { 0, 0.3075f, -0.1f, 0 } } }, + { { { -0.056f, 0.3075f, -0.1f, 0 } } }, + { { { -0.1f, 0.3075f, -0.056f, 0 } } }, + { { { -0.1f, 0.3075f, -0, 0 } } }, + { { { 0, 0.3075f, -0.325f, 0 } } }, + { { { -0.182f, 0.3075f, -0.325f, 0 } } }, + { { { -0.325f, 0.3075f, -0.182f, 0 } } }, + { { { -0.325f, 0.3075f, -0, 0 } } }, + { { { 0, 0.27f, -0.325f, 0 } } }, + { { { -0.182f, 0.27f, -0.325f, 0 } } }, + { { { -0.325f, 0.27f, -0.182f, 0 } } }, + { { { -0.325f, 0.27f, -0, 0 } } }, + { { { 0, -0.33f, -0, 0 } } }, + { { { -0.1995f, -0.33f, -0.35625f, 0 } } }, + { { { 0, -0.31125f, -0.375f, 0 } } }, + { { { 0, -0.33f, -0.35625f, 0 } } }, + { { { -0.35625f, -0.33f, -0.1995f, 0 } } }, + { { { -0.375f, -0.31125f, -0, 0 } } }, + { { { -0.35625f, -0.33f, -0, 0 } } }, + { { { -0.21f, -0.31125f, -0.375f, 0 } } }, + { { { -0.375f, -0.31125f, -0.21f, 0 } } }, +}; diff --git a/Sdk/External/DirectXTK/Src/ToneMapPostProcess.cpp b/Sdk/External/DirectXTK/Src/ToneMapPostProcess.cpp new file mode 100644 index 0000000..d5da723 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/ToneMapPostProcess.cpp @@ -0,0 +1,435 @@ +//-------------------------------------------------------------------------------------- +// File: ToneMapPostProcess.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "PostProcess.h" +#include "BufferHelpers.h" +#include "CommonStates.h" +#include "DirectXHelpers.h" +#include "AlignedNew.h" +#include "DemandCreate.h" +#include "SharedResourcePool.h" + +using namespace DirectX; + +using Microsoft::WRL::ComPtr; + +namespace +{ + const constexpr int Dirty_ConstantBuffer = 0x01; + const constexpr int Dirty_Parameters = 0x02; + +#if defined(_XBOX_ONE) && defined(_TITLE) + const constexpr int PixelShaderCount = 15; + const constexpr int ShaderPermutationCount = 24; +#else + const constexpr int PixelShaderCount = 9; + const constexpr int ShaderPermutationCount = 12; +#endif + + // Constant buffer layout. Must match the shader! + __declspec(align(16)) struct ToneMapConstants + { + // linearExposure is .x + // paperWhiteNits is .y + XMVECTOR parameters; + }; + + static_assert((sizeof(ToneMapConstants) % 16) == 0, "CB size not padded correctly"); +} + +// Include the precompiled shader code. +namespace +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + #include "Shaders/Compiled/XboxOneToneMap_VSQuad.inc" + + #include "Shaders/Compiled/XboxOneToneMap_PSCopy.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSSaturate.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSReinhard.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSACESFilmic.inc" + #include "Shaders/Compiled/XboxOneToneMap_PS_SRGB.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSSaturate_SRGB.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSReinhard_SRGB.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSACESFilmic_SRGB.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10_Saturate.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10_Reinhard.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10_ACESFilmic.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10_Saturate_SRGB.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10_Reinhard_SRGB.inc" + #include "Shaders/Compiled/XboxOneToneMap_PSHDR10_ACESFilmic_SRGB.inc" +#else + #include "Shaders/Compiled/ToneMap_VSQuad.inc" + + #include "Shaders/Compiled/ToneMap_PSCopy.inc" + #include "Shaders/Compiled/ToneMap_PSSaturate.inc" + #include "Shaders/Compiled/ToneMap_PSReinhard.inc" + #include "Shaders/Compiled/ToneMap_PSACESFilmic.inc" + #include "Shaders/Compiled/ToneMap_PS_SRGB.inc" + #include "Shaders/Compiled/ToneMap_PSSaturate_SRGB.inc" + #include "Shaders/Compiled/ToneMap_PSReinhard_SRGB.inc" + #include "Shaders/Compiled/ToneMap_PSACESFilmic_SRGB.inc" + #include "Shaders/Compiled/ToneMap_PSHDR10.inc" +#endif +} + +namespace +{ + struct ShaderBytecode + { + void const* code; + size_t length; + }; + + const ShaderBytecode pixelShaders[] = + { + { ToneMap_PSCopy, sizeof(ToneMap_PSCopy) }, + { ToneMap_PSSaturate, sizeof(ToneMap_PSSaturate) }, + { ToneMap_PSReinhard, sizeof(ToneMap_PSReinhard) }, + { ToneMap_PSACESFilmic, sizeof(ToneMap_PSACESFilmic) }, + { ToneMap_PS_SRGB, sizeof(ToneMap_PS_SRGB) }, + { ToneMap_PSSaturate_SRGB, sizeof(ToneMap_PSSaturate_SRGB) }, + { ToneMap_PSReinhard_SRGB, sizeof(ToneMap_PSReinhard_SRGB) }, + { ToneMap_PSACESFilmic_SRGB, sizeof(ToneMap_PSACESFilmic_SRGB) }, + { ToneMap_PSHDR10, sizeof(ToneMap_PSHDR10) }, + +#if defined(_XBOX_ONE) && defined(_TITLE) + // Shaders that generate both HDR10 and GameDVR SDR signals via Multiple Render Targets. + { ToneMap_PSHDR10_Saturate, sizeof(ToneMap_PSHDR10_Saturate) }, + { ToneMap_PSHDR10_Reinhard, sizeof(ToneMap_PSHDR10_Reinhard) }, + { ToneMap_PSHDR10_ACESFilmic, sizeof(ToneMap_PSHDR10_ACESFilmic) }, + { ToneMap_PSHDR10_Saturate_SRGB, sizeof(ToneMap_PSHDR10_Saturate_SRGB) }, + { ToneMap_PSHDR10_Reinhard_SRGB, sizeof(ToneMap_PSHDR10_Reinhard_SRGB) }, + { ToneMap_PSHDR10_ACESFilmic_SRGB, sizeof(ToneMap_PSHDR10_ACESFilmic_SRGB) }, +#endif + }; + + static_assert(_countof(pixelShaders) == PixelShaderCount, "array/max mismatch"); + + const int pixelShaderIndices[] = + { + // Linear EOTF + 0, // Copy + 1, // Saturate + 2, // Reinhard + 3, // ACES Filmic + + // Gamam22 EOTF + 4, // SRGB + 5, // Saturate_SRGB + 6, // Reinhard_SRGB + 7, // ACES Filmic + + // ST.2084 EOTF + 8, // HDR10 + 8, // HDR10 + 8, // HDR10 + 8, // HDR10 + +#if defined(_XBOX_ONE) && defined(_TITLE) + // MRT Linear EOTF + 9, // HDR10+Saturate + 9, // HDR10+Saturate + 10, // HDR10+Reinhard + 11, // HDR10+ACESFilmic + + // MRT Gamma22 EOTF + 12, // HDR10+Saturate_SRGB + 12, // HDR10+Saturate_SRGB + 13, // HDR10+Reinhard_SRGB + 14, // HDR10+ACESFilmic + + // MRT ST.2084 EOTF + 9, // HDR10+Saturate + 9, // HDR10+Saturate + 10, // HDR10+Reinhard + 11, // HDR10+ACESFilmic +#endif + }; + + static_assert(_countof(pixelShaderIndices) == ShaderPermutationCount, "array/max mismatch"); + + // Factory for lazily instantiating shaders. + class DeviceResources + { + public: + DeviceResources(_In_ ID3D11Device* device) + : stateObjects(device), + mDevice(device), + mVertexShader{}, + mPixelShaders{}, + mMutex{} + { } + + // Gets or lazily creates the vertex shader. + ID3D11VertexShader* GetVertexShader() + { + return DemandCreate(mVertexShader, mMutex, [&](ID3D11VertexShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreateVertexShader(ToneMap_VSQuad, sizeof(ToneMap_VSQuad), nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "ToneMapPostProcess"); + + return hr; + }); + } + + // Gets or lazily creates the specified pixel shader. + ID3D11PixelShader* GetPixelShader(int permutation) + { + assert(permutation >= 0 && permutation < ShaderPermutationCount); + _Analysis_assume_(permutation >= 0 && permutation < ShaderPermutationCount); + int shaderIndex = pixelShaderIndices[permutation]; + assert(shaderIndex >= 0 && shaderIndex < PixelShaderCount); + _Analysis_assume_(shaderIndex >= 0 && shaderIndex < PixelShaderCount); + + return DemandCreate(mPixelShaders[shaderIndex], mMutex, [&](ID3D11PixelShader** pResult) -> HRESULT + { + HRESULT hr = mDevice->CreatePixelShader(pixelShaders[shaderIndex].code, pixelShaders[shaderIndex].length, nullptr, pResult); + + if (SUCCEEDED(hr)) + SetDebugObjectName(*pResult, "ToneMapPostProcess"); + + return hr; + }); + } + + CommonStates stateObjects; + + protected: + ComPtr mDevice; + ComPtr mVertexShader; + ComPtr mPixelShaders[PixelShaderCount]; + std::mutex mMutex; + }; +} + +class ToneMapPostProcess::Impl : public AlignedNew +{ +public: + Impl(_In_ ID3D11Device* device); + + void Process(_In_ ID3D11DeviceContext* deviceContext, std::function& setCustomState); + + void SetDirtyFlag() noexcept { mDirtyFlags = INT_MAX; } + + int GetCurrentShaderPermutation() const noexcept; + + // Fields. + ToneMapConstants constants; + ComPtr hdrTexture; + float linearExposure; + float paperWhiteNits; + + Operator op; + TransferFunction func; + bool mrt; + +private: + int mDirtyFlags; + + ConstantBuffer mConstantBuffer; + + // Per-device resources. + std::shared_ptr mDeviceResources; + + static SharedResourcePool deviceResourcesPool; +}; + + +// Global pool of per-device ToneMapPostProcess resources. +SharedResourcePool ToneMapPostProcess::Impl::deviceResourcesPool; + + +// Constructor. +ToneMapPostProcess::Impl::Impl(_In_ ID3D11Device* device) + : constants{}, + linearExposure(1.f), + paperWhiteNits(200.f), + op(None), + func(Linear), + mrt(false), + mDirtyFlags(INT_MAX), + mConstantBuffer(device), + mDeviceResources(deviceResourcesPool.DemandCreate(device)) +{ + if (device->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0) + { + throw std::exception("ToneMapPostProcess requires Feature Level 10.0 or later"); + } + + SetDebugObjectName(mConstantBuffer.GetBuffer(), "ToneMapPostProcess"); +} + + +// Sets our state onto the D3D device. +void ToneMapPostProcess::Impl::Process( + _In_ ID3D11DeviceContext* deviceContext, + std::function& setCustomState) +{ + // Set the texture. + ID3D11ShaderResourceView* textures[1] = { hdrTexture.Get() }; + deviceContext->PSSetShaderResources(0, 1, textures); + + auto sampler = mDeviceResources->stateObjects.PointClamp(); + deviceContext->PSSetSamplers(0, 1, &sampler); + + // Set state objects. + deviceContext->OMSetBlendState(mDeviceResources->stateObjects.Opaque(), nullptr, 0xffffffff); + deviceContext->OMSetDepthStencilState(mDeviceResources->stateObjects.DepthNone(), 0); + deviceContext->RSSetState(mDeviceResources->stateObjects.CullNone()); + + // Set shaders. + auto vertexShader = mDeviceResources->GetVertexShader(); + auto pixelShader = mDeviceResources->GetPixelShader(GetCurrentShaderPermutation()); + + deviceContext->VSSetShader(vertexShader, nullptr, 0); + deviceContext->PSSetShader(pixelShader, nullptr, 0); + + // Set constants. + if (mDirtyFlags & Dirty_Parameters) + { + mDirtyFlags &= ~Dirty_Parameters; + mDirtyFlags |= Dirty_ConstantBuffer; + + constants.parameters = XMVectorSet(linearExposure, paperWhiteNits, 0.f, 0.f); + } + +#if defined(_XBOX_ONE) && defined(_TITLE) + void *grfxMemory; + mConstantBuffer.SetData(deviceContext, constants, &grfxMemory); + + Microsoft::WRL::ComPtr deviceContextX; + ThrowIfFailed(deviceContext->QueryInterface(IID_GRAPHICS_PPV_ARGS(deviceContextX.GetAddressOf()))); + + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContextX->PSSetPlacementConstantBuffer(0, buffer, grfxMemory); +#else + if (mDirtyFlags & Dirty_ConstantBuffer) + { + mDirtyFlags &= ~Dirty_ConstantBuffer; + mConstantBuffer.SetData(deviceContext, constants); + } + + // Set the constant buffer. + auto buffer = mConstantBuffer.GetBuffer(); + + deviceContext->PSSetConstantBuffers(0, 1, &buffer); +#endif + + if (setCustomState) + { + setCustomState(); + } + + // Draw quad. + deviceContext->IASetInputLayout(nullptr); + deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + deviceContext->Draw(3, 0); +} + + +int ToneMapPostProcess::Impl::GetCurrentShaderPermutation() const noexcept +{ +#if defined(_XBOX_ONE) && defined(_TITLE) + int permutation = (mrt) ? 12 : 0; + return permutation + (static_cast(func) * static_cast(Operator_Max)) + static_cast(op); +#else + return (static_cast(func) * static_cast(Operator_Max)) + static_cast(op); +#endif +} + + +// Public constructor. +ToneMapPostProcess::ToneMapPostProcess(_In_ ID3D11Device* device) + : pImpl(std::make_unique(device)) +{ +} + + +// Move constructor. +ToneMapPostProcess::ToneMapPostProcess(ToneMapPostProcess&& moveFrom) noexcept + : pImpl(std::move(moveFrom.pImpl)) +{ +} + + +// Move assignment. +ToneMapPostProcess& ToneMapPostProcess::operator= (ToneMapPostProcess&& moveFrom) noexcept +{ + pImpl = std::move(moveFrom.pImpl); + return *this; +} + + +// Public destructor. +ToneMapPostProcess::~ToneMapPostProcess() +{ +} + + +// IPostProcess methods. +void ToneMapPostProcess::Process( + _In_ ID3D11DeviceContext* deviceContext, + _In_opt_ std::function setCustomState) +{ + pImpl->Process(deviceContext, setCustomState); +} + + +// Shader control. +void ToneMapPostProcess::SetOperator(Operator op) +{ + if (op >= Operator_Max) + throw std::out_of_range("Tonemap operator not defined"); + + pImpl->op = op; +} + + +void ToneMapPostProcess::SetTransferFunction(TransferFunction func) +{ + if (func >= TransferFunction_Max) + throw std::out_of_range("Electro-optical transfer function not defined"); + + pImpl->func = func; +} + + +#if defined(_XBOX_ONE) && defined(_TITLE) +void ToneMapPostProcess::SetMRTOutput(bool value) +{ + pImpl->mrt = value; +} +#endif + + +// Properties +void ToneMapPostProcess::SetHDRSourceTexture(_In_opt_ ID3D11ShaderResourceView* value) +{ + pImpl->hdrTexture = value; +} + + +void ToneMapPostProcess::SetExposure(float exposureValue) +{ + pImpl->linearExposure = powf(2.f, exposureValue); + pImpl->SetDirtyFlag(); +} + + +void ToneMapPostProcess::SetST2084Parameter(float paperWhiteNits) +{ + pImpl->paperWhiteNits = paperWhiteNits; + pImpl->SetDirtyFlag(); +} diff --git a/Sdk/External/DirectXTK/Src/VertexTypes.cpp b/Sdk/External/DirectXTK/Src/VertexTypes.cpp new file mode 100644 index 0000000..f349c94 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/VertexTypes.cpp @@ -0,0 +1,173 @@ +//-------------------------------------------------------------------------------------- +// File: VertexTypes.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" +#include "VertexTypes.h" + +#include + +using namespace DirectX; +using namespace DirectX::PackedVector; + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position . +const D3D11_INPUT_ELEMENT_DESC VertexPosition::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPosition) == 12, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position and color information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionColor::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionColor) == 28, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position and texture mapping information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionTexture::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionTexture) == 20, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position and dual texture mapping information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionDualTexture::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionDualTexture) == 28, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position and normal vector. +const D3D11_INPUT_ELEMENT_DESC VertexPositionNormal::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionNormal) == 24, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position, color, and texture mapping information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionColorTexture::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionColorTexture) == 36, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position, normal vector, and color information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionNormalColor::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionNormalColor) == 40, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position, normal vector, and texture mapping information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionNormalTexture::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionNormalTexture) == 32, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct holding position, normal vector, color, and texture mapping information. +const D3D11_INPUT_ELEMENT_DESC VertexPositionNormalColorTexture::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionNormalColorTexture) == 48, "Vertex struct/layout mismatch"); + + +//-------------------------------------------------------------------------------------- +// Vertex struct for Visual Studio Shader Designer (DGSL) holding position, normal, +// tangent, color (RGBA), and texture mapping information +const D3D11_INPUT_ELEMENT_DESC VertexPositionNormalTangentColorTexture::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TANGENT", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(sizeof(VertexPositionNormalTangentColorTexture) == 52, "Vertex struct/layout mismatch"); + +void XM_CALLCONV VertexPositionNormalTangentColorTexture::SetColor(FXMVECTOR icolor) noexcept +{ + XMUBYTEN4 rgba; + XMStoreUByteN4(&rgba, icolor); + this->color = rgba.v; +} + + +//-------------------------------------------------------------------------------------- +// Vertex struct for Visual Studio Shader Designer (DGSL) holding position, normal, +// tangent, color (RGBA), texture mapping information, and skinning weights +const D3D11_INPUT_ELEMENT_DESC VertexPositionNormalTangentColorTextureSkinning::InputElements[] = +{ + { "SV_Position", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TANGENT", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "BLENDINDICES",0, DXGI_FORMAT_R8G8B8A8_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "BLENDWEIGHT", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, +}; + +static_assert(VertexPositionNormalTangentColorTextureSkinning::InputElementCount == VertexPositionNormalTangentColorTexture::InputElementCount + 2, "layout mismatch"); + +static_assert(sizeof(VertexPositionNormalTangentColorTextureSkinning) == 60, "Vertex struct/layout mismatch"); + +void VertexPositionNormalTangentColorTextureSkinning::SetBlendIndices(XMUINT4 const& iindices) noexcept +{ + this->indices = ((iindices.w & 0xff) << 24) | ((iindices.z & 0xff) << 16) | ((iindices.y & 0xff) << 8) | (iindices.x & 0xff); +} + +void XM_CALLCONV VertexPositionNormalTangentColorTextureSkinning::SetBlendWeights(FXMVECTOR iweights) noexcept +{ + XMUBYTEN4 packed; + XMStoreUByteN4(&packed, iweights); + this->weights = packed.v; +} diff --git a/Sdk/External/DirectXTK/Src/WICTextureLoader.cpp b/Sdk/External/DirectXTK/Src/WICTextureLoader.cpp new file mode 100644 index 0000000..f764fa5 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/WICTextureLoader.cpp @@ -0,0 +1,1218 @@ +//-------------------------------------------------------------------------------------- +// File: WICTextureLoader.cpp +// +// Function for loading a WIC image and creating a Direct3D runtime texture for it +// (auto-generating mipmaps if possible) +// +// Note: Assumes application has already called CoInitializeEx +// +// Warning: CreateWICTexture* functions are not thread-safe if given a d3dContext instance for +// auto-gen mipmap support. +// +// Note these functions are useful for images created as simple 2D textures. For +// more complex resources, DDSTextureLoader is an excellent light-weight runtime loader. +// For a full-featured DDS file reader, writer, and texture processing pipeline see +// the 'Texconv' sample and the 'DirectXTex' library. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +// We could load multi-frame images (TIFF/GIF) into a texture array. +// For now, we just load the first frame (note: DirectXTex supports multi-frame images) + +#include "pch.h" + +#include "WICTextureLoader.h" + +#include "DirectXHelpers.h" +#include "PlatformHelpers.h" +#include "LoaderHelpers.h" + +using namespace DirectX; +using Microsoft::WRL::ComPtr; + +namespace +{ + //------------------------------------------------------------------------------------- + // WIC Pixel Format Translation Data + //------------------------------------------------------------------------------------- + struct WICTranslate + { + const GUID& wic; + DXGI_FORMAT format; + }; + + constexpr WICTranslate g_WICFormats[] = + { + { GUID_WICPixelFormat128bppRGBAFloat, DXGI_FORMAT_R32G32B32A32_FLOAT }, + + { GUID_WICPixelFormat64bppRGBAHalf, DXGI_FORMAT_R16G16B16A16_FLOAT }, + { GUID_WICPixelFormat64bppRGBA, DXGI_FORMAT_R16G16B16A16_UNORM }, + + { GUID_WICPixelFormat32bppRGBA, DXGI_FORMAT_R8G8B8A8_UNORM }, + { GUID_WICPixelFormat32bppBGRA, DXGI_FORMAT_B8G8R8A8_UNORM }, // DXGI 1.1 + { GUID_WICPixelFormat32bppBGR, DXGI_FORMAT_B8G8R8X8_UNORM }, // DXGI 1.1 + + { GUID_WICPixelFormat32bppRGBA1010102XR, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM }, // DXGI 1.1 + { GUID_WICPixelFormat32bppRGBA1010102, DXGI_FORMAT_R10G10B10A2_UNORM }, + + { GUID_WICPixelFormat16bppBGRA5551, DXGI_FORMAT_B5G5R5A1_UNORM }, + { GUID_WICPixelFormat16bppBGR565, DXGI_FORMAT_B5G6R5_UNORM }, + + { GUID_WICPixelFormat32bppGrayFloat, DXGI_FORMAT_R32_FLOAT }, + { GUID_WICPixelFormat16bppGrayHalf, DXGI_FORMAT_R16_FLOAT }, + { GUID_WICPixelFormat16bppGray, DXGI_FORMAT_R16_UNORM }, + { GUID_WICPixelFormat8bppGray, DXGI_FORMAT_R8_UNORM }, + + { GUID_WICPixelFormat8bppAlpha, DXGI_FORMAT_A8_UNORM }, + }; + + //------------------------------------------------------------------------------------- + // WIC Pixel Format nearest conversion table + //------------------------------------------------------------------------------------- + struct WICConvert + { + const GUID& source; + const GUID& target; + }; + + constexpr WICConvert g_WICConvert [] = + { + // Note target GUID in this conversion table must be one of those directly supported formats (above). + + { GUID_WICPixelFormatBlackWhite, GUID_WICPixelFormat8bppGray }, // DXGI_FORMAT_R8_UNORM + + { GUID_WICPixelFormat1bppIndexed, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat2bppIndexed, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat4bppIndexed, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat8bppIndexed, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + + { GUID_WICPixelFormat2bppGray, GUID_WICPixelFormat8bppGray }, // DXGI_FORMAT_R8_UNORM + { GUID_WICPixelFormat4bppGray, GUID_WICPixelFormat8bppGray }, // DXGI_FORMAT_R8_UNORM + + { GUID_WICPixelFormat16bppGrayFixedPoint, GUID_WICPixelFormat16bppGrayHalf }, // DXGI_FORMAT_R16_FLOAT + { GUID_WICPixelFormat32bppGrayFixedPoint, GUID_WICPixelFormat32bppGrayFloat }, // DXGI_FORMAT_R32_FLOAT + + { GUID_WICPixelFormat16bppBGR555, GUID_WICPixelFormat16bppBGRA5551 }, // DXGI_FORMAT_B5G5R5A1_UNORM + + { GUID_WICPixelFormat32bppBGR101010, GUID_WICPixelFormat32bppRGBA1010102 }, // DXGI_FORMAT_R10G10B10A2_UNORM + + { GUID_WICPixelFormat24bppBGR, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat24bppRGB, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat32bppPBGRA, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat32bppPRGBA, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + + { GUID_WICPixelFormat48bppRGB, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + { GUID_WICPixelFormat48bppBGR, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + { GUID_WICPixelFormat64bppBGRA, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + { GUID_WICPixelFormat64bppPRGBA, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + { GUID_WICPixelFormat64bppPBGRA, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + + { GUID_WICPixelFormat48bppRGBFixedPoint, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + { GUID_WICPixelFormat48bppBGRFixedPoint, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + { GUID_WICPixelFormat64bppRGBAFixedPoint, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + { GUID_WICPixelFormat64bppBGRAFixedPoint, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + { GUID_WICPixelFormat64bppRGBFixedPoint, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + { GUID_WICPixelFormat64bppRGBHalf, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + { GUID_WICPixelFormat48bppRGBHalf, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + + { GUID_WICPixelFormat128bppPRGBAFloat, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + { GUID_WICPixelFormat128bppRGBFloat, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + { GUID_WICPixelFormat128bppRGBAFixedPoint, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + { GUID_WICPixelFormat128bppRGBFixedPoint, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + { GUID_WICPixelFormat32bppRGBE, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + + { GUID_WICPixelFormat32bppCMYK, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat64bppCMYK, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + { GUID_WICPixelFormat40bppCMYKAlpha, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat80bppCMYKAlpha, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) + { GUID_WICPixelFormat32bppRGB, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM + { GUID_WICPixelFormat64bppRGB, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM + { GUID_WICPixelFormat64bppPRGBAHalf, GUID_WICPixelFormat64bppRGBAHalf }, // DXGI_FORMAT_R16G16B16A16_FLOAT + #endif + + // We don't support n-channel formats + }; + + bool g_WIC2 = false; + + BOOL WINAPI InitializeWICFactory(PINIT_ONCE, PVOID, PVOID *ifactory) noexcept + { + #if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) + HRESULT hr = CoCreateInstance( + CLSID_WICImagingFactory2, + nullptr, + CLSCTX_INPROC_SERVER, + __uuidof(IWICImagingFactory2), + ifactory + ); + + if (SUCCEEDED(hr)) + { + // WIC2 is available on Windows 10, Windows 8.x, and Windows 7 SP1 with KB 2670838 installed + g_WIC2 = true; + return TRUE; + } + else + { + hr = CoCreateInstance( + CLSID_WICImagingFactory1, + nullptr, + CLSCTX_INPROC_SERVER, + __uuidof(IWICImagingFactory), + ifactory + ); + return SUCCEEDED(hr) ? TRUE : FALSE; + } + #else + return SUCCEEDED(CoCreateInstance( + CLSID_WICImagingFactory, + nullptr, + CLSCTX_INPROC_SERVER, + __uuidof(IWICImagingFactory), + ifactory)) ? TRUE : FALSE; + #endif + } +} + +//-------------------------------------------------------------------------------------- +namespace DirectX +{ + bool _IsWIC2() noexcept; + IWICImagingFactory* _GetWIC() noexcept; + // Also used by ScreenGrab + + bool _IsWIC2() noexcept + { + return g_WIC2; + } + + IWICImagingFactory* _GetWIC() noexcept + { + static INIT_ONCE s_initOnce = INIT_ONCE_STATIC_INIT; + + IWICImagingFactory* factory = nullptr; + if (!InitOnceExecuteOnce( + &s_initOnce, + InitializeWICFactory, + nullptr, + reinterpret_cast(&factory))) + { + return nullptr; + } + + return factory; + } + +} // namespace DirectX + + +namespace +{ + //--------------------------------------------------------------------------------- + DXGI_FORMAT _WICToDXGI(const GUID& guid) noexcept + { + for (size_t i = 0; i < _countof(g_WICFormats); ++i) + { + if (memcmp(&g_WICFormats[i].wic, &guid, sizeof(GUID)) == 0) + return g_WICFormats[i].format; + } + +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) + if (g_WIC2) + { + if (memcmp(&GUID_WICPixelFormat96bppRGBFloat, &guid, sizeof(GUID)) == 0) + return DXGI_FORMAT_R32G32B32_FLOAT; + } +#endif + + return DXGI_FORMAT_UNKNOWN; + } + + //--------------------------------------------------------------------------------- + size_t _WICBitsPerPixel(REFGUID targetGuid) noexcept + { + auto pWIC = _GetWIC(); + if (!pWIC) + return 0; + + ComPtr cinfo; + if (FAILED(pWIC->CreateComponentInfo(targetGuid, cinfo.GetAddressOf()))) + return 0; + + WICComponentType type; + if (FAILED(cinfo->GetComponentType(&type))) + return 0; + + if (type != WICPixelFormat) + return 0; + + ComPtr pfinfo; + if (FAILED(cinfo.As(&pfinfo))) + return 0; + + UINT bpp; + if (FAILED(pfinfo->GetBitsPerPixel(&bpp))) + return 0; + + return bpp; + } + + //--------------------------------------------------------------------------------- + HRESULT CreateTextureFromWIC( + _In_ ID3D11Device* d3dDevice, + _In_opt_ ID3D11DeviceContext* d3dContext, +#if defined(_XBOX_ONE) && defined(_TITLE) + _In_opt_ ID3D11DeviceX* d3dDeviceX, + _In_opt_ ID3D11DeviceContextX* d3dContextX, +#endif + _In_ IWICBitmapFrameDecode *frame, + _In_ size_t maxsize, + _In_ D3D11_USAGE usage, + _In_ unsigned int bindFlags, + _In_ unsigned int cpuAccessFlags, + _In_ unsigned int miscFlags, + _In_ unsigned int loadFlags, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept + { + UINT width, height; + HRESULT hr = frame->GetSize(&width, &height); + if (FAILED(hr)) + return hr; + + if (maxsize > UINT32_MAX) + return E_INVALIDARG; + + assert(width > 0 && height > 0); + + if (!maxsize) + { + // This is a bit conservative because the hardware could support larger textures than + // the Feature Level defined minimums, but doing it this way is much easier and more + // performant for WIC than the 'fail and retry' model used by DDSTextureLoader + + switch (d3dDevice->GetFeatureLevel()) + { + case D3D_FEATURE_LEVEL_9_1: + case D3D_FEATURE_LEVEL_9_2: + maxsize = 2048u /*D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; + break; + + case D3D_FEATURE_LEVEL_9_3: + maxsize = 4096u /*D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; + break; + + case D3D_FEATURE_LEVEL_10_0: + case D3D_FEATURE_LEVEL_10_1: + maxsize = 8192u /*D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; + break; + + default: + maxsize = size_t(D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION); + break; + } + } + + assert(maxsize > 0); + + UINT twidth = width; + UINT theight = height; + if (loadFlags & WIC_LOADER_FIT_POW2) + { + LoaderHelpers::FitPowerOf2(width, height, twidth, theight, maxsize); + } + else if (width > maxsize || height > maxsize) + { + float ar = static_cast(height) / static_cast(width); + if (width > height) + { + twidth = static_cast(maxsize); + theight = std::max(1, static_cast(static_cast(maxsize) * ar)); + } + else + { + theight = static_cast(maxsize); + twidth = std::max(1, static_cast(static_cast(maxsize) / ar)); + } + assert(twidth <= maxsize && theight <= maxsize); + } + + if (loadFlags & WIC_LOADER_MAKE_SQUARE) + { + twidth = std::max(twidth, theight); + theight = twidth; + } + + // Determine format + WICPixelFormatGUID pixelFormat; + hr = frame->GetPixelFormat(&pixelFormat); + if (FAILED(hr)) + return hr; + + WICPixelFormatGUID convertGUID; + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &pixelFormat, sizeof(GUID)); + + size_t bpp = 0; + + DXGI_FORMAT format = _WICToDXGI(pixelFormat); + if (format == DXGI_FORMAT_UNKNOWN) + { + if (memcmp(&GUID_WICPixelFormat96bppRGBFixedPoint, &pixelFormat, sizeof(WICPixelFormatGUID)) == 0) + { +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) + if (g_WIC2) + { + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &GUID_WICPixelFormat96bppRGBFloat, sizeof(GUID)); + format = DXGI_FORMAT_R32G32B32_FLOAT; + bpp = 96; + } + else +#endif + { + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &GUID_WICPixelFormat128bppRGBAFloat, sizeof(GUID)); + format = DXGI_FORMAT_R32G32B32A32_FLOAT; + bpp = 128; + } + } + else + { + for (size_t i = 0; i < _countof(g_WICConvert); ++i) + { + if (memcmp(&g_WICConvert[i].source, &pixelFormat, sizeof(WICPixelFormatGUID)) == 0) + { + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &g_WICConvert[i].target, sizeof(GUID)); + + format = _WICToDXGI(g_WICConvert[i].target); + assert(format != DXGI_FORMAT_UNKNOWN); + bpp = _WICBitsPerPixel(convertGUID); + break; + } + } + } + + if (format == DXGI_FORMAT_UNKNOWN) + { + DebugTrace("ERROR: WICTextureLoader does not support all DXGI formats (WIC GUID {%8.8lX-%4.4X-%4.4X-%2.2X%2.2X-%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X}). Consider using DirectXTex.\n", + pixelFormat.Data1, pixelFormat.Data2, pixelFormat.Data3, + pixelFormat.Data4[0], pixelFormat.Data4[1], pixelFormat.Data4[2], pixelFormat.Data4[3], + pixelFormat.Data4[4], pixelFormat.Data4[5], pixelFormat.Data4[6], pixelFormat.Data4[7]); + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + } + else + { + bpp = _WICBitsPerPixel(pixelFormat); + } + +#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8) || defined(_WIN7_PLATFORM_UPDATE) + if ((format == DXGI_FORMAT_R32G32B32_FLOAT) && d3dContext && textureView) + { + // Special case test for optional device support for autogen mipchains for R32G32B32_FLOAT + UINT fmtSupport = 0; + hr = d3dDevice->CheckFormatSupport(DXGI_FORMAT_R32G32B32_FLOAT, &fmtSupport); + if (FAILED(hr) || !(fmtSupport & D3D11_FORMAT_SUPPORT_MIP_AUTOGEN)) + { + // Use R32G32B32A32_FLOAT instead which is required for Feature Level 10.0 and up + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &GUID_WICPixelFormat128bppRGBAFloat, sizeof(GUID)); + format = DXGI_FORMAT_R32G32B32A32_FLOAT; + bpp = 128; + } + } +#endif + + if (loadFlags & WIC_LOADER_FORCE_RGBA32) + { + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &GUID_WICPixelFormat32bppRGBA, sizeof(GUID)); + format = DXGI_FORMAT_R8G8B8A8_UNORM; + bpp = 32; + } + + if (!bpp) + return E_FAIL; + + // Handle sRGB formats + if (loadFlags & WIC_LOADER_FORCE_SRGB) + { + format = LoaderHelpers::MakeSRGB(format); + } + else if (!(loadFlags & WIC_LOADER_IGNORE_SRGB)) + { + ComPtr metareader; + if (SUCCEEDED(frame->GetMetadataQueryReader(metareader.GetAddressOf()))) + { + GUID containerFormat; + if (SUCCEEDED(metareader->GetContainerFormat(&containerFormat))) + { + bool sRGB = false; + + PROPVARIANT value; + PropVariantInit(&value); + + // Check for colorspace chunks + if (memcmp(&containerFormat, &GUID_ContainerFormatPng, sizeof(GUID)) == 0) + { + // Check for sRGB chunk + if (SUCCEEDED(metareader->GetMetadataByName(L"/sRGB/RenderingIntent", &value)) && value.vt == VT_UI1) + { + sRGB = true; + } + else if (SUCCEEDED(metareader->GetMetadataByName(L"/gAMA/ImageGamma", &value)) && value.vt == VT_UI4) + { + sRGB = (value.uintVal == 45455); + } + else + { + sRGB = (loadFlags & WIC_LOADER_SRGB_DEFAULT) != 0; + } + } +#if defined(_XBOX_ONE) && defined(_TITLE) + else if (memcmp(&containerFormat, &GUID_ContainerFormatJpeg, sizeof(GUID)) == 0) + { + if (SUCCEEDED(metareader->GetMetadataByName(L"/app1/ifd/exif/{ushort=40961}", &value)) && value.vt == VT_UI2) + { + sRGB = (value.uiVal == 1); + } + else + { + sRGB = (loadFlags & WIC_LOADER_SRGB_DEFAULT) != 0; + } + } + else if (memcmp(&containerFormat, &GUID_ContainerFormatTiff, sizeof(GUID)) == 0) + { + if (SUCCEEDED(metareader->GetMetadataByName(L"/ifd/exif/{ushort=40961}", &value)) && value.vt == VT_UI2) + { + sRGB = (value.uiVal == 1); + } + else + { + sRGB = (loadFlags & WIC_LOADER_SRGB_DEFAULT) != 0; + } + } +#else + else if (SUCCEEDED(metareader->GetMetadataByName(L"System.Image.ColorSpace", &value)) && value.vt == VT_UI2) + { + sRGB = (value.uiVal == 1); + } + else + { + sRGB = (loadFlags & WIC_LOADER_SRGB_DEFAULT) != 0; + } +#endif + + (void)PropVariantClear(&value); + + if (sRGB) + format = LoaderHelpers::MakeSRGB(format); + } + } + } + + // Verify our target format is supported by the current device + // (handles WDDM 1.0 or WDDM 1.1 device driver cases as well as DirectX 11.0 Runtime without 16bpp format support) + UINT support = 0; + hr = d3dDevice->CheckFormatSupport(format, &support); + if (FAILED(hr) || !(support & D3D11_FORMAT_SUPPORT_TEXTURE2D)) + { + // Fallback to RGBA 32-bit format which is supported by all devices + memcpy_s(&convertGUID, sizeof(WICPixelFormatGUID), &GUID_WICPixelFormat32bppRGBA, sizeof(GUID)); + format = DXGI_FORMAT_R8G8B8A8_UNORM; + bpp = 32; + } + + // Allocate temporary memory for image + uint64_t rowBytes = (uint64_t(twidth) * uint64_t(bpp) + 7u) / 8u; + uint64_t numBytes = rowBytes * uint64_t(theight); + + if (rowBytes > UINT32_MAX || numBytes > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); + + auto rowPitch = static_cast(rowBytes); + auto imageSize = static_cast(numBytes); + + std::unique_ptr temp(new (std::nothrow) uint8_t[imageSize]); + if (!temp) + return E_OUTOFMEMORY; + + // Load image data + if (memcmp(&convertGUID, &pixelFormat, sizeof(GUID)) == 0 + && twidth == width + && theight == height) + { + // No format conversion or resize needed + hr = frame->CopyPixels(nullptr, static_cast(rowPitch), static_cast(imageSize), temp.get()); + if (FAILED(hr)) + return hr; + } + else if (twidth != width || theight != height) + { + // Resize + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + ComPtr scaler; + hr = pWIC->CreateBitmapScaler(scaler.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = scaler->Initialize(frame, twidth, theight, WICBitmapInterpolationModeFant); + if (FAILED(hr)) + return hr; + + WICPixelFormatGUID pfScaler; + hr = scaler->GetPixelFormat(&pfScaler); + if (FAILED(hr)) + return hr; + + if (memcmp(&convertGUID, &pfScaler, sizeof(GUID)) == 0) + { + // No format conversion needed + hr = scaler->CopyPixels(nullptr, static_cast(rowPitch), static_cast(imageSize), temp.get()); + if (FAILED(hr)) + return hr; + } + else + { + ComPtr FC; + hr = pWIC->CreateFormatConverter(FC.GetAddressOf()); + if (FAILED(hr)) + return hr; + + BOOL canConvert = FALSE; + hr = FC->CanConvert(pfScaler, convertGUID, &canConvert); + if (FAILED(hr) || !canConvert) + { + return E_UNEXPECTED; + } + + hr = FC->Initialize(scaler.Get(), convertGUID, WICBitmapDitherTypeErrorDiffusion, nullptr, 0, WICBitmapPaletteTypeMedianCut); + if (FAILED(hr)) + return hr; + + hr = FC->CopyPixels(nullptr, static_cast(rowPitch), static_cast(imageSize), temp.get()); + if (FAILED(hr)) + return hr; + } + } + else + { + // Format conversion but no resize + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + ComPtr FC; + hr = pWIC->CreateFormatConverter(FC.GetAddressOf()); + if (FAILED(hr)) + return hr; + + BOOL canConvert = FALSE; + hr = FC->CanConvert(pixelFormat, convertGUID, &canConvert); + if (FAILED(hr) || !canConvert) + { + return E_UNEXPECTED; + } + + hr = FC->Initialize(frame, convertGUID, WICBitmapDitherTypeErrorDiffusion, nullptr, 0, WICBitmapPaletteTypeMedianCut); + if (FAILED(hr)) + return hr; + + hr = FC->CopyPixels(nullptr, static_cast(rowPitch), static_cast(imageSize), temp.get()); + if (FAILED(hr)) + return hr; + } + + // See if format is supported for auto-gen mipmaps (varies by feature level) + bool autogen = false; + if (d3dContext && textureView) // Must have context and shader-view to auto generate mipmaps + { + UINT fmtSupport = 0; + hr = d3dDevice->CheckFormatSupport(format, &fmtSupport); + if (SUCCEEDED(hr) && (fmtSupport & D3D11_FORMAT_SUPPORT_MIP_AUTOGEN)) + { + autogen = true; +#if defined(_XBOX_ONE) && defined(_TITLE) + if (!d3dDeviceX || !d3dContextX) + return E_INVALIDARG; +#endif + } + } + + // Create texture + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = twidth; + desc.Height = theight; + desc.MipLevels = (autogen) ? 0u : 1u; + desc.ArraySize = 1; + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = usage; + desc.CPUAccessFlags = cpuAccessFlags; + + if (autogen) + { + desc.BindFlags = bindFlags | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + desc.MiscFlags = miscFlags | D3D11_RESOURCE_MISC_GENERATE_MIPS; + } + else + { + desc.BindFlags = bindFlags; + desc.MiscFlags = miscFlags; + } + + D3D11_SUBRESOURCE_DATA initData = { temp.get(), static_cast(rowPitch), static_cast(imageSize) }; + + ID3D11Texture2D* tex = nullptr; + hr = d3dDevice->CreateTexture2D(&desc, (autogen) ? nullptr : &initData, &tex); + if (SUCCEEDED(hr) && tex) + { + if (textureView) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = desc.Format; + + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + SRVDesc.Texture2D.MipLevels = (autogen) ? unsigned(-1) : 1u; + + hr = d3dDevice->CreateShaderResourceView(tex, &SRVDesc, textureView); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + + if (autogen) + { + assert(d3dContext != nullptr); + +#if defined(_XBOX_ONE) && defined(_TITLE) + ID3D11Texture2D *pStaging = nullptr; + CD3D11_TEXTURE2D_DESC stagingDesc(format, twidth, theight, 1, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ, 1, 0, 0); + initData.pSysMem = temp.get(); + initData.SysMemPitch = static_cast(rowPitch); + initData.SysMemSlicePitch = static_cast(imageSize); + + hr = d3dDevice->CreateTexture2D(&stagingDesc, &initData, &pStaging); + if (SUCCEEDED(hr)) + { + d3dContext->CopySubresourceRegion(tex, 0, 0, 0, 0, pStaging, 0, nullptr); + + UINT64 copyFence = d3dContextX->InsertFence(0); + while (d3dDeviceX->IsFencePending(copyFence)) { SwitchToThread(); } + pStaging->Release(); + } +#else + d3dContext->UpdateSubresource(tex, 0, nullptr, temp.get(), static_cast(rowPitch), static_cast(imageSize)); +#endif + d3dContext->GenerateMips(*textureView); + } + } + + if (texture) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "WICTextureLoader"); + tex->Release(); + } + } + + return hr; + } + + //-------------------------------------------------------------------------------------- + void SetDebugTextureInfo( + _In_z_ const wchar_t* fileName, + _In_opt_ ID3D11Resource** texture, + _In_opt_ ID3D11ShaderResourceView** textureView) noexcept + { +#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) ) + if (texture || textureView) + { +#if defined(_XBOX_ONE) && defined(_TITLE) + const wchar_t* pstrName = wcsrchr(fileName, '\\'); + if (!pstrName) + { + pstrName = fileName; + } + else + { + pstrName++; + } + if (texture && *texture) + { + (*texture)->SetName(pstrName); + } + if (textureView && *textureView) + { + (*textureView)->SetName(pstrName); + } +#else + CHAR strFileA[MAX_PATH]; + int result = WideCharToMultiByte(CP_UTF8, + WC_NO_BEST_FIT_CHARS, + fileName, + -1, + strFileA, + MAX_PATH, + nullptr, + nullptr + ); + if (result > 0) + { + const char* pstrName = strrchr(strFileA, '\\'); + if (!pstrName) + { + pstrName = strFileA; + } + else + { + pstrName++; + } + + if (texture && *texture) + { + (*texture)->SetPrivateData(WKPDID_D3DDebugObjectName, + static_cast(strnlen_s(pstrName, MAX_PATH)), + pstrName + ); + } + + if (textureView && *textureView) + { + (*textureView)->SetPrivateData(WKPDID_D3DDebugObjectName, + static_cast(strnlen_s(pstrName, MAX_PATH)), + pstrName + ); + } + } +#endif + } +#else + UNREFERENCED_PARAMETER(fileName); + UNREFERENCED_PARAMETER(texture); + UNREFERENCED_PARAMETER(textureView); +#endif + } +} // anonymous namespace + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::CreateWICTextureFromMemory( + ID3D11Device* d3dDevice, + const uint8_t* wicData, + size_t wicDataSize, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize) noexcept +{ + return CreateWICTextureFromMemoryEx(d3dDevice, + wicData, wicDataSize, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + WIC_LOADER_DEFAULT, + texture, textureView); +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) + HRESULT DirectX::CreateWICTextureFromMemory( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else + HRESULT DirectX::CreateWICTextureFromMemory( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const uint8_t* wicData, + size_t wicDataSize, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize) noexcept +{ + return CreateWICTextureFromMemoryEx(d3dDevice, d3dContext, + wicData, wicDataSize, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + WIC_LOADER_DEFAULT, + texture, textureView); +} + +_Use_decl_annotations_ +HRESULT DirectX::CreateWICTextureFromMemoryEx( + ID3D11Device* d3dDevice, + const uint8_t* wicData, + size_t wicDataSize, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + WIC_LOADER_FLAGS loadFlags, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!d3dDevice || !wicData || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + if (!wicDataSize) + return E_FAIL; + + if (wicDataSize > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_FILE_TOO_LARGE); + + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + // Create input stream for memory + ComPtr stream; + HRESULT hr = pWIC->CreateStream(stream.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = stream->InitializeFromMemory(const_cast(wicData), static_cast(wicDataSize)); + if (FAILED(hr)) + return hr; + + // Initialize WIC + ComPtr decoder; + hr = pWIC->CreateDecoderFromStream(stream.Get(), nullptr, WICDecodeMetadataCacheOnDemand, decoder.GetAddressOf()); + if (FAILED(hr)) + return hr; + + ComPtr frame; + hr = decoder->GetFrame(0, frame.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = CreateTextureFromWIC(d3dDevice, nullptr, +#if defined(_XBOX_ONE) && defined(_TITLE) + nullptr, nullptr, +#endif + frame.Get(), maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + loadFlags, + texture, textureView); + if (FAILED(hr)) + return hr; + + if (texture && *texture) + { + SetDebugObjectName(*texture, "WICTextureLoader"); + } + + if (textureView && *textureView) + { + SetDebugObjectName(*textureView, "WICTextureLoader"); + } + + return hr; +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) + HRESULT DirectX::CreateWICTextureFromMemoryEx( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else + HRESULT DirectX::CreateWICTextureFromMemoryEx( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const uint8_t* wicData, + size_t wicDataSize, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + WIC_LOADER_FLAGS loadFlags, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!d3dDevice || !wicData || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + if (!wicDataSize) + return E_FAIL; + + if (wicDataSize > UINT32_MAX) + return HRESULT_FROM_WIN32(ERROR_FILE_TOO_LARGE); + + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + // Create input stream for memory + ComPtr stream; + HRESULT hr = pWIC->CreateStream(stream.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = stream->InitializeFromMemory(const_cast(wicData), static_cast(wicDataSize)); + if (FAILED(hr)) + return hr; + + // Initialize WIC + ComPtr decoder; + hr = pWIC->CreateDecoderFromStream(stream.Get(), nullptr, WICDecodeMetadataCacheOnDemand, decoder.GetAddressOf()); + if (FAILED(hr)) + return hr; + + ComPtr frame; + hr = decoder->GetFrame(0, frame.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = CreateTextureFromWIC(d3dDevice, d3dContext, +#if defined(_XBOX_ONE) && defined(_TITLE) + d3dDevice, d3dContext, +#endif + frame.Get(), + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + loadFlags, + texture, textureView); + if (FAILED(hr)) + return hr; + + if (texture && *texture) + { + SetDebugObjectName(*texture, "WICTextureLoader"); + } + + if (textureView && *textureView) + { + SetDebugObjectName(*textureView, "WICTextureLoader"); + } + + return hr; +} + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT DirectX::CreateWICTextureFromFile( + ID3D11Device* d3dDevice, + const wchar_t* fileName, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize) noexcept +{ + return CreateWICTextureFromFileEx(d3dDevice, + fileName, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + WIC_LOADER_DEFAULT, + texture, textureView); +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) + HRESULT DirectX::CreateWICTextureFromFile( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else + HRESULT DirectX::CreateWICTextureFromFile( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const wchar_t* fileName, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + size_t maxsize) noexcept +{ + return CreateWICTextureFromFileEx(d3dDevice, d3dContext, + fileName, + maxsize, + D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE, 0, 0, + WIC_LOADER_DEFAULT, + texture, textureView); +} + +_Use_decl_annotations_ +HRESULT DirectX::CreateWICTextureFromFileEx( + ID3D11Device* d3dDevice, + const wchar_t* fileName, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + WIC_LOADER_FLAGS loadFlags, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!d3dDevice || !fileName || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + // Initialize WIC + ComPtr decoder; + HRESULT hr = pWIC->CreateDecoderFromFilename(fileName, + nullptr, + GENERIC_READ, + WICDecodeMetadataCacheOnDemand, + decoder.GetAddressOf()); + if (FAILED(hr)) + return hr; + + ComPtr frame; + hr = decoder->GetFrame(0, frame.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = CreateTextureFromWIC(d3dDevice, nullptr, +#if defined(_XBOX_ONE) && defined(_TITLE) + nullptr, nullptr, +#endif + frame.Get(), + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + loadFlags, + texture, textureView); + + if (SUCCEEDED(hr)) + { + SetDebugTextureInfo(fileName, texture, textureView); + } + + return hr; +} + +_Use_decl_annotations_ +#if defined(_XBOX_ONE) && defined(_TITLE) + HRESULT DirectX::CreateWICTextureFromFileEx( + ID3D11DeviceX* d3dDevice, + ID3D11DeviceContextX* d3dContext, +#else + HRESULT DirectX::CreateWICTextureFromFileEx( + ID3D11Device* d3dDevice, + ID3D11DeviceContext* d3dContext, +#endif + const wchar_t* fileName, + size_t maxsize, + D3D11_USAGE usage, + unsigned int bindFlags, + unsigned int cpuAccessFlags, + unsigned int miscFlags, + WIC_LOADER_FLAGS loadFlags, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView) noexcept +{ + if (texture) + { + *texture = nullptr; + } + if (textureView) + { + *textureView = nullptr; + } + + if (!d3dDevice || !fileName || (!texture && !textureView)) + { + return E_INVALIDARG; + } + + if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) + { + return E_INVALIDARG; + } + + auto pWIC = _GetWIC(); + if (!pWIC) + return E_NOINTERFACE; + + // Initialize WIC + ComPtr decoder; + HRESULT hr = pWIC->CreateDecoderFromFilename(fileName, + nullptr, + GENERIC_READ, + WICDecodeMetadataCacheOnDemand, + decoder.GetAddressOf()); + if (FAILED(hr)) + return hr; + + ComPtr frame; + hr = decoder->GetFrame(0, frame.GetAddressOf()); + if (FAILED(hr)) + return hr; + + hr = CreateTextureFromWIC(d3dDevice, d3dContext, +#if defined(_XBOX_ONE) && defined(_TITLE) + d3dDevice, d3dContext, +#endif + frame.Get(), + maxsize, + usage, bindFlags, cpuAccessFlags, miscFlags, + loadFlags, + texture, textureView); + + if (SUCCEEDED(hr)) + { + SetDebugTextureInfo(fileName, texture, textureView); + } + + return hr; +} diff --git a/Sdk/External/DirectXTK/Src/XboxDDSTextureLoader.cpp b/Sdk/External/DirectXTK/Src/XboxDDSTextureLoader.cpp new file mode 100644 index 0000000..c84993e --- /dev/null +++ b/Sdk/External/DirectXTK/Src/XboxDDSTextureLoader.cpp @@ -0,0 +1,788 @@ +//-------------------------------------------------------------------------------------- +// File: XboxDDSTextureLoader.cpp +// +// Functions for loading a DDS texture using the XBOX extended header and creating a +// Direct3D11.X runtime resource for it via the CreatePlacement APIs +// +// Note these functions will not load standard DDS files. Use the DDSTextureLoader +// module in the DirectXTex package or as part of the DirectXTK library to load +// these files which use standard Direct3D resource creation APIs. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" + +#include "XboxDDSTextureLoader.h" + +#include "PlatformHelpers.h" +#include "DDS.h" +#include "DirectXHelpers.h" + +#include + +using namespace DirectX; +using namespace Xbox; + +namespace +{ + //-------------------------------------------------------------------------------------- + // Default XMemAlloc attributes for texture loading + //-------------------------------------------------------------------------------------- + const uint64_t c_XMemAllocAttributes = MAKE_XALLOC_ATTRIBUTES( + eXALLOCAllocatorId_MiddlewareReservedMin, + 0, + XALLOC_MEMTYPE_GRAPHICS_WRITECOMBINE_GPU_READONLY, + XALLOC_PAGESIZE_64KB, + XALLOC_ALIGNMENT_64K); + + //-------------------------------------------------------------------------------------- + // DDS file structure definitions + // + // See DDS.h in the 'Texconv' sample and the 'DirectXTex' library + //-------------------------------------------------------------------------------------- + #pragma pack(push,1) + + struct DDS_HEADER_XBOX + // Must match structure defined in xtexconv tool + { + DXGI_FORMAT dxgiFormat; + uint32_t resourceDimension; + uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG + uint32_t arraySize; + uint32_t miscFlags2; // see DDS_MISC_FLAGS2 + uint32_t tileMode; // see XG_TILE_MODE + uint32_t baseAlignment; + uint32_t dataSize; + uint32_t xdkVer; // matching _XDK_VER + }; + + static_assert(sizeof(DDS_HEADER_XBOX) == 36, "DDS XBOX Header size mismatch"); + + #pragma pack(pop) + + //-------------------------------------------------------------------------------------- + HRESULT LoadTextureDataFromFile(_In_z_ const wchar_t* fileName, + std::unique_ptr& ddsData, + DDS_HEADER** header, + uint8_t** bitData, + size_t* bitSize) noexcept + { + if (!header || !bitData || !bitSize) + { + return E_POINTER; + } + + // open the file + ScopedHandle hFile(safe_handle(CreateFile2(fileName, + GENERIC_READ, + FILE_SHARE_READ, + OPEN_EXISTING, + nullptr))); + + if (!hFile) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + // Get the file size + LARGE_INTEGER FileSize = {}; + + FILE_STANDARD_INFO fileInfo; + if (!GetFileInformationByHandleEx(hFile.get(), FileStandardInfo, &fileInfo, sizeof(fileInfo))) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + FileSize = fileInfo.EndOfFile; + + // File is too big for 32-bit allocation, so reject read + if (FileSize.HighPart > 0) + { + return E_FAIL; + } + + // Need at least enough data to fill the header and magic number to be a valid DDS + if (FileSize.LowPart < (sizeof(DDS_HEADER) + sizeof(uint32_t))) + { + return E_FAIL; + } + + // create enough space for the file data + ddsData.reset(new (std::nothrow) uint8_t[FileSize.LowPart]); + if (!ddsData) + { + return E_OUTOFMEMORY; + } + + // read the data in + DWORD BytesRead = 0; + if (!ReadFile(hFile.get(), + ddsData.get(), + FileSize.LowPart, + &BytesRead, + nullptr + )) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + if (BytesRead < FileSize.LowPart) + { + return E_FAIL; + } + + // DDS files always start with the same magic number ("DDS ") + auto dwMagicNumber = *reinterpret_cast(ddsData.get()); + if (dwMagicNumber != DDS_MAGIC) + { + return E_FAIL; + } + + auto hdr = reinterpret_cast(ddsData.get() + sizeof(uint32_t)); + + // Verify header to validate DDS file + if (hdr->size != sizeof(DDS_HEADER) || + hdr->ddspf.size != sizeof(DDS_PIXELFORMAT)) + { + return E_FAIL; + } + + // Check for XBOX extension + if (!(hdr->ddspf.flags & DDS_FOURCC) + || (MAKEFOURCC('X', 'B', 'O', 'X') != hdr->ddspf.fourCC)) + { + // Use standard DDSTextureLoader instead + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + // Must be long enough for both headers and magic value + if (FileSize.LowPart < (sizeof(DDS_HEADER) + sizeof(uint32_t) + sizeof(DDS_HEADER_XBOX))) + { + return E_FAIL; + } + + // setup the pointers in the process request + *header = hdr; + auto offset = sizeof(uint32_t) + sizeof(DDS_HEADER) + sizeof(DDS_HEADER_XBOX); + *bitData = ddsData.get() + offset; + *bitSize = FileSize.LowPart - offset; + + return S_OK; + } + + //-------------------------------------------------------------------------------------- + DXGI_FORMAT MakeSRGB(_In_ DXGI_FORMAT format) noexcept + { + switch (format) + { + case DXGI_FORMAT_R8G8B8A8_UNORM: + return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + + case DXGI_FORMAT_BC1_UNORM: + return DXGI_FORMAT_BC1_UNORM_SRGB; + + case DXGI_FORMAT_BC2_UNORM: + return DXGI_FORMAT_BC2_UNORM_SRGB; + + case DXGI_FORMAT_BC3_UNORM: + return DXGI_FORMAT_BC3_UNORM_SRGB; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB; + + case DXGI_FORMAT_B8G8R8X8_UNORM: + return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB; + + case DXGI_FORMAT_BC7_UNORM: + return DXGI_FORMAT_BC7_UNORM_SRGB; + + default: + return format; + } + } + + //-------------------------------------------------------------------------------------- + HRESULT CreateD3DResources(_In_ ID3D11DeviceX* d3dDevice, + _In_ const DDS_HEADER_XBOX* xboxext, + _In_ uint32_t width, + _In_ uint32_t height, + _In_ uint32_t depth, + _In_ uint32_t mipCount, + _In_ uint32_t arraySize, + _In_ bool forceSRGB, + _In_ bool isCubeMap, + _In_ void* grfxMemory, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept + { + if (!d3dDevice || !grfxMemory) + return E_POINTER; + + HRESULT hr = E_FAIL; + + DXGI_FORMAT format = xboxext->dxgiFormat; + if (forceSRGB) + { + format = MakeSRGB(format); + } + + switch (xboxext->resourceDimension) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + { + D3D11_TEXTURE1D_DESC desc = {}; + desc.Width = static_cast(width); + desc.MipLevels = static_cast(mipCount); + desc.ArraySize = static_cast(arraySize); + desc.Format = format; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + ID3D11Texture1D* tex = nullptr; + hr = d3dDevice->CreatePlacementTexture1D(&desc, xboxext->tileMode, 0, grfxMemory, &tex); + if (SUCCEEDED(hr) && tex != 0) + { + if (textureView != 0) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = format; + + if (arraySize > 1) + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + SRVDesc.Texture1DArray.MipLevels = desc.MipLevels; + SRVDesc.Texture1DArray.ArraySize = static_cast(arraySize); + } + else + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + SRVDesc.Texture1D.MipLevels = desc.MipLevels; + } + + hr = d3dDevice->CreateShaderResourceView(tex, + &SRVDesc, + textureView + ); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + } + + if (texture != 0) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "XboxDDSTextureLoader"); + tex->Release(); + } + } + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + { + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.MipLevels = static_cast(mipCount); + desc.ArraySize = static_cast(arraySize); + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.MiscFlags = (isCubeMap) ? D3D11_RESOURCE_MISC_TEXTURECUBE : 0; + + ID3D11Texture2D* tex = nullptr; + hr = d3dDevice->CreatePlacementTexture2D(&desc, xboxext->tileMode, 0, grfxMemory, &tex); + if (SUCCEEDED(hr) && tex != 0) + { + if (textureView != 0) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = format; + + if (isCubeMap) + { + if (arraySize > 6) + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBEARRAY; + SRVDesc.TextureCubeArray.MipLevels = desc.MipLevels; + + // Earlier we set arraySize to (NumCubes * 6) + SRVDesc.TextureCubeArray.NumCubes = static_cast(arraySize / 6); + } + else + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + SRVDesc.TextureCube.MipLevels = desc.MipLevels; + } + } + else if (arraySize > 1) + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + SRVDesc.Texture2DArray.MipLevels = desc.MipLevels; + SRVDesc.Texture2DArray.ArraySize = static_cast(arraySize); + } + else + { + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + SRVDesc.Texture2D.MipLevels = desc.MipLevels; + } + + hr = d3dDevice->CreateShaderResourceView(tex, + &SRVDesc, + textureView + ); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + } + + if (texture != 0) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "XboxDDSTextureLoader"); + tex->Release(); + } + } + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + { + D3D11_TEXTURE3D_DESC desc = {}; + desc.Width = static_cast(width); + desc.Height = static_cast(height); + desc.Depth = static_cast(depth); + desc.MipLevels = static_cast(mipCount); + desc.Format = format; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + ID3D11Texture3D* tex = nullptr; + hr = d3dDevice->CreatePlacementTexture3D(&desc, xboxext->tileMode, 0, grfxMemory, &tex); + if (SUCCEEDED(hr) && tex != 0) + { + if (textureView != 0) + { + D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = format; + + SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + SRVDesc.Texture3D.MipLevels = desc.MipLevels; + + hr = d3dDevice->CreateShaderResourceView(tex, + &SRVDesc, + textureView + ); + if (FAILED(hr)) + { + tex->Release(); + return hr; + } + } + + if (texture != 0) + { + *texture = tex; + } + else + { + SetDebugObjectName(tex, "XboxDDSTextureLoader"); + tex->Release(); + } + } + } + break; + } + + return hr; + } + + //-------------------------------------------------------------------------------------- + HRESULT CreateTextureFromDDS(_In_ ID3D11DeviceX* d3dDevice, + _In_ const DDS_HEADER* header, + _In_reads_bytes_(bitSize) const uint8_t* bitData, + _In_ size_t bitSize, + _In_ bool forceSRGB, + _Outptr_opt_ ID3D11Resource** texture, + _Outptr_opt_ ID3D11ShaderResourceView** textureView, + _Outptr_ void** grfxMemory) noexcept + { + HRESULT hr = S_OK; + + uint32_t width = header->width; + uint32_t height = header->height; + uint32_t depth = header->depth; + + uint32_t mipCount = header->mipMapCount; + if (0 == mipCount) + { + mipCount = 1; + } + + if (!(header->ddspf.flags & DDS_FOURCC) + || (MAKEFOURCC('X', 'B', 'O', 'X') != header->ddspf.fourCC)) + { + // Use standard DDSTextureLoader instead + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + auto xboxext = reinterpret_cast(reinterpret_cast(header) + sizeof(DDS_HEADER)); + +#ifndef NDEBUG + if (xboxext->xdkVer < _XDK_VER) + { + OutputDebugStringA("WARNING: DDS XBOX file may be outdated and need regeneration\n"); + } +#endif + + uint32_t arraySize = xboxext->arraySize; + if (arraySize == 0) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + + bool isCubeMap = false; + + switch (xboxext->resourceDimension) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + if ((header->flags & DDS_HEIGHT) && height != 1) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + height = depth = 1; + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + if (xboxext->miscFlag & D3D11_RESOURCE_MISC_TEXTURECUBE) + { + arraySize *= 6; + isCubeMap = true; + } + depth = 1; + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + if (!(header->flags & DDS_HEADER_FLAGS_VOLUME)) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + + if (arraySize > 1) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + default: + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + if (xboxext->tileMode == uint32_t(-1)) + { + return HRESULT_FROM_WIN32(ERROR_INVALID_DATA); + } + + // Bound sizes + if (mipCount > D3D11_REQ_MIP_LEVELS) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + + switch (xboxext->resourceDimension) + { + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + if ((arraySize > D3D11_REQ_TEXTURE1D_ARRAY_AXIS_DIMENSION) || + (width > D3D11_REQ_TEXTURE1D_U_DIMENSION)) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + if (isCubeMap) + { + // This is the right bound because we set arraySize to (NumCubes*6) above + if ((arraySize > D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION) || + (width > D3D11_REQ_TEXTURECUBE_DIMENSION) || + (height > D3D11_REQ_TEXTURECUBE_DIMENSION)) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + } + else if ((arraySize > D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION) || + (width > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION) || + (height > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION)) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + if ((arraySize > 1) || + (width > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) || + (height > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION) || + (depth > D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION)) + { + return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); + } + break; + } + + if (xboxext->dxgiFormat == DXGI_FORMAT_UNKNOWN) + { + return E_FAIL; + } + + if (!xboxext->dataSize || !xboxext->baseAlignment) + { + return E_FAIL; + } + + if (xboxext->dataSize > bitSize) + { + return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); + } + + // Allocate graphics memory. Depending on the data size it uses 4MB or 64K pages. + *grfxMemory = XMemAlloc(xboxext->dataSize, c_XMemAllocAttributes); + if (!*grfxMemory) + return E_OUTOFMEMORY; + + // Copy tiled data into graphics memory + memcpy(*grfxMemory, bitData, xboxext->dataSize); + + // Create the texture + hr = CreateD3DResources(d3dDevice, xboxext, + width, height, depth, mipCount, arraySize, + forceSRGB, isCubeMap, *grfxMemory, + texture, textureView); + if (FAILED(hr)) + { + XMemFree(grfxMemory, c_XMemAllocAttributes); + *grfxMemory = nullptr; + } + + return hr; + } + + //-------------------------------------------------------------------------------------- + DDS_ALPHA_MODE GetAlphaMode(_In_ const DDS_HEADER* header) noexcept + { + if (header->ddspf.flags & DDS_FOURCC) + { + if (MAKEFOURCC('X', 'B', 'O', 'X') == header->ddspf.fourCC) + { + auto xboxext = reinterpret_cast(reinterpret_cast(header) + sizeof(DDS_HEADER)); + auto mode = static_cast(xboxext->miscFlags2 & DDS_MISC_FLAGS2_ALPHA_MODE_MASK); + switch (mode) + { + case DDS_ALPHA_MODE_STRAIGHT: + case DDS_ALPHA_MODE_PREMULTIPLIED: + case DDS_ALPHA_MODE_OPAQUE: + case DDS_ALPHA_MODE_CUSTOM: + return mode; + + default: + break; + } + } + } + + return DDS_ALPHA_MODE_UNKNOWN; + } +} // anonymous namespace + + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT Xbox::CreateDDSTextureFromMemory( + ID3D11DeviceX* d3dDevice, + const uint8_t* ddsData, + size_t ddsDataSize, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + void** grfxMemory, + DDS_ALPHA_MODE* alphaMode, + bool forceSRGB ) noexcept +{ + if ( texture ) + { + *texture = nullptr; + } + if ( textureView ) + { + *textureView = nullptr; + } + if ( grfxMemory ) + { + *grfxMemory = nullptr; + } + if ( alphaMode ) + { + *alphaMode = DDS_ALPHA_MODE_UNKNOWN; + } + + if ( !d3dDevice || !ddsData || (!texture && !textureView) || !grfxMemory ) + { + return E_INVALIDARG; + } + + // Validate DDS file in memory + if (ddsDataSize < (sizeof(uint32_t) + sizeof(DDS_HEADER))) + { + return E_FAIL; + } + + auto dwMagicNumber = *reinterpret_cast(ddsData); + if (dwMagicNumber != DDS_MAGIC) + { + return E_FAIL; + } + + auto header = reinterpret_cast( ddsData + sizeof( uint32_t ) ); + + // Verify header to validate DDS file + if (header->size != sizeof(DDS_HEADER) || + header->ddspf.size != sizeof(DDS_PIXELFORMAT)) + { + return E_FAIL; + } + + // Check for XBOX extension + if ( !( header->ddspf.flags & DDS_FOURCC ) + || ( MAKEFOURCC( 'X', 'B', 'O', 'X' ) != header->ddspf.fourCC ) ) + { + // Use standard DDSTextureLoader instead + return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED ); + } + + // Must be long enough for both headers and magic value + if (ddsDataSize < (sizeof(DDS_HEADER) + sizeof(uint32_t) + sizeof(DDS_HEADER_XBOX))) + { + return E_FAIL; + } + + auto offset = sizeof( uint32_t ) + sizeof( DDS_HEADER ) + sizeof( DDS_HEADER_XBOX ); + + HRESULT hr = CreateTextureFromDDS( d3dDevice, header, + ddsData + offset, ddsDataSize - offset, forceSRGB, + texture, textureView, + grfxMemory ); + if ( SUCCEEDED(hr) ) + { + if (texture != 0 && *texture != 0) + { + SetDebugObjectName(*texture, "XboxDDSTextureLoader"); + } + + if (textureView != 0 && *textureView != 0) + { + SetDebugObjectName(*textureView, "XboxDDSTextureLoader"); + } + + if ( alphaMode ) + *alphaMode = GetAlphaMode( header ); + } + + return hr; +} + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +HRESULT Xbox::CreateDDSTextureFromFile( + ID3D11DeviceX* d3dDevice, + const wchar_t* fileName, + ID3D11Resource** texture, + ID3D11ShaderResourceView** textureView, + void** grfxMemory, + DDS_ALPHA_MODE* alphaMode, + bool forceSRGB ) noexcept +{ + if ( texture ) + { + *texture = nullptr; + } + if ( textureView ) + { + *textureView = nullptr; + } + if ( grfxMemory ) + { + *grfxMemory = nullptr; + } + if ( alphaMode ) + { + *alphaMode = DDS_ALPHA_MODE_UNKNOWN; + } + + if ( !d3dDevice || !fileName || (!texture && !textureView) || !grfxMemory ) + { + return E_INVALIDARG; + } + + DDS_HEADER* header = nullptr; + uint8_t* bitData = nullptr; + size_t bitSize = 0; + + std::unique_ptr ddsData; + HRESULT hr = LoadTextureDataFromFile( fileName, + ddsData, + &header, + &bitData, + &bitSize + ); + if (FAILED(hr)) + { + return hr; + } + + hr = CreateTextureFromDDS( d3dDevice, header, + bitData, bitSize, forceSRGB, + texture, textureView, + grfxMemory ); + + if ( SUCCEEDED(hr) ) + { +#if !defined(NO_D3D11_DEBUG_NAME) && ( defined(_DEBUG) || defined(PROFILE) ) + if (texture != 0 && *texture != 0) + { + (*texture)->SetName( fileName ); + } + if (textureView != 0 && *textureView != 0 ) + { + (*textureView)->SetName( fileName ); + } +#endif + + if ( alphaMode ) + *alphaMode = GetAlphaMode( header ); + } + + return hr; +} + +//-------------------------------------------------------------------------------------- +_Use_decl_annotations_ +void Xbox::FreeDDSTextureMemory(void* grfxMemory) noexcept +{ + if (grfxMemory) + { + XMemFree(grfxMemory, c_XMemAllocAttributes); + } +} diff --git a/Sdk/External/DirectXTK/Src/pch.cpp b/Sdk/External/DirectXTK/Src/pch.cpp new file mode 100644 index 0000000..09b51b5 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/pch.cpp @@ -0,0 +1,10 @@ +//-------------------------------------------------------------------------------------- +// File: pch.cpp +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#include "pch.h" diff --git a/Sdk/External/DirectXTK/Src/pch.h b/Sdk/External/DirectXTK/Src/pch.h new file mode 100644 index 0000000..d6fbe48 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/pch.h @@ -0,0 +1,152 @@ +//-------------------------------------------------------------------------------------- +// File: pch.h +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +//-------------------------------------------------------------------------------------- + +#pragma once + +// Off by default warnings +#pragma warning(disable : 4619 4616 4061 4265 4365 4571 4623 4625 4626 4628 4668 4710 4711 4746 4774 4820 4987 5026 5027 5031 5032 5039 5045 5219 26812) +// C4619/4616 #pragma warning warnings +// C4061 enumerator 'X' in switch of enum 'X' is not explicitly handled by a case label +// C4265 class has virtual functions, but destructor is not virtual +// C4365 signed/unsigned mismatch +// C4571 behavior change +// C4623 default constructor was implicitly defined as deleted +// C4625 copy constructor was implicitly defined as deleted +// C4626 assignment operator was implicitly defined as deleted +// C4628 digraphs not supported +// C4668 not defined as a preprocessor macro +// C4710 function not inlined +// C4711 selected for automatic inline expansion +// C4746 volatile access of '' is subject to /volatile: setting +// C4774 format string expected in argument 3 is not a string literal +// C4820 padding added after data member +// C4987 nonstandard extension used +// C5026 move constructor was implicitly defined as deleted +// C5027 move assignment operator was implicitly defined as deleted +// C5031/5032 push/pop mismatches in windows headers +// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc +// C5045 Spectre mitigation warning +// C5219 implicit conversion from 'int' to 'float', possible loss of data +// 26812: The enum type 'x' is unscoped. Prefer 'enum class' over 'enum' (Enum.3). + +// Windows 8.1 SDK related Off by default warnings +#pragma warning(disable : 4471 4917 4986 5029) +// C4471 forward declaration of an unscoped enumeration must have an underlying type +// C4917 a GUID can only be associated with a class, interface or namespace +// C4986 exception specification does not match previous declaration +// C5029 nonstandard extension used + +// Xbox One XDK related Off by default warnings +#pragma warning(disable : 4643 5043) +// C4643 Forward declaring in namespace std is not permitted by the C++ Standard +// C5043 exception specification does not match previous declaration + +#ifdef __INTEL_COMPILER +#pragma warning(disable : 161 2960 3280) +// warning #161: unrecognized #pragma +// message #2960: allocation may not satisfy the type's alignment; consider using header +// message #3280: declaration hides member +#endif + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#pragma clang diagnostic ignored "-Wc++98-compat-local-type-template-args" +#pragma clang diagnostic ignored "-Wcovered-switch-default" +#pragma clang diagnostic ignored "-Wexit-time-destructors" +#pragma clang diagnostic ignored "-Wfloat-equal" +#pragma clang diagnostic ignored "-Wglobal-constructors" +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wlanguage-extension-token" +#pragma clang diagnostic ignored "-Wmissing-variable-declarations" +#pragma clang diagnostic ignored "-Wmicrosoft-include" +#pragma clang diagnostic ignored "-Wnested-anon-types" +#pragma clang diagnostic ignored "-Wreserved-id-macro" +#pragma clang diagnostic ignored "-Wswitch-enum" +#pragma clang diagnostic ignored "-Wunknown-pragmas" +#pragma clang diagnostic ignored "-Wunused-const-variable" +#pragma clang diagnostic ignored "-Wunused-member-function" +#endif + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#pragma warning(push) +#pragma warning(disable : 4005) +#define NOMINMAX +#define NODRAWTEXT +#define NOGDI +#define NOBITMAP +#define NOMCX +#define NOSERVICE +#define NOHELP +#pragma warning(pop) + +#include + +#ifndef _WIN32_WINNT_WIN10 +#define _WIN32_WINNT_WIN10 0x0A00 +#endif + +#ifndef WINAPI_FAMILY_GAMES +#define WINAPI_FAMILY_GAMES 6 +#endif + +#ifdef _GAMING_XBOX +#error This version of DirectX Tool Kit not supported for GDK +#elif defined(_XBOX_ONE) && defined(_TITLE) +#include +#else +#include +#endif + +#define _XM_NO_XMVECTOR_OVERLOADS_ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma warning(push) +#pragma warning(disable : 4702) +#include +#pragma warning(pop) + +#include + +#pragma warning(push) +#pragma warning(disable : 4467 5038 5204 5220) +#include +#pragma warning(pop) + +#include + +#if (defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP)) || (defined(_XBOX_ONE) && defined(_TITLE)) +#pragma warning(push) +#pragma warning(disable: 4471 5204) +#include +#pragma warning(pop) +#endif + +#include diff --git a/Sdk/External/DirectXTK/Src/vbo.h b/Sdk/External/DirectXTK/Src/vbo.h new file mode 100644 index 0000000..ff999d1 --- /dev/null +++ b/Sdk/External/DirectXTK/Src/vbo.h @@ -0,0 +1,36 @@ +//-------------------------------------------------------------------------------------- +// File: vbo.h +// +// The VBO file format was introduced in the Windows 8.0 ResourceLoading sample. It's +// a simple binary file containing a 16-bit index buffer and a fixed-format vertex buffer. +// +// The meshconvert sample tool for DirectXMesh can produce this file type +// http://go.microsoft.com/fwlink/?LinkID=324981 +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// http://go.microsoft.com/fwlink/?LinkId=248929 +// http://go.microsoft.com/fwlink/?LinkID=615561 +//-------------------------------------------------------------------------------------- + +#pragma once + +#include + +namespace VBO +{ +#pragma pack(push,1) + + struct header_t + { + uint32_t numVertices; + uint32_t numIndices; + }; + +#pragma pack(pop) + +} // namespace + +static_assert(sizeof(VBO::header_t) == 8, "VBO header size mismatch"); + diff --git a/Sdk/External/DirectXTK/XWBTool/directx.ico b/Sdk/External/DirectXTK/XWBTool/directx.ico new file mode 100644 index 0000000000000000000000000000000000000000..bc43c1b2085df668dffff26d80adcb0ef73f23a9 GIT binary patch literal 25214 zcmeI5cVLg#_xSHS39q)lv}=A(BYsjRYY` zH7TWNZ8b`*#-_&kz0UJaROjc@&v*UvdpUTaG(n_2(TGm{_PcFOh|~~Ux5GdBRM&l zyS@?YKO>^Qtc#yK@=u5{>Hq1KU{6RG^@qxdybhWe6!5z*iT1>iBLf2i0)G1;F)?^# zV8BFP|K>v)#c5rObN$+f-L4fz{=Pz5qICr&I2w9ZA=o!DF>ZWtf_-8F&HR|vwHA+S zP{nE~FzDAF;&|;H84$z(0_?9pt3dI%xLlFpBZEgyw2$=+&#I6b7nfMqGcq#Jo?s8G z>p4OzD5~12M`UDVKydKL!F9QFDK%(Ci${(KPDs!KRM7WnalYOj`aV1;F?cLL!o&HH zo}QYR=;fsCL!B+pF9>D>DqtX+7fufKQRxlhe0_a0Thhkeyn_+~CQ{y; z@%qw|zKo3Ya1I_BkPwiRKHirPap3Byuh&#)kYKk51iqc_cQV6y=gx#&M~CO@L-E!DDMq{D4+R$L?E-5kdTxfq^}{V z_R1Ab1$!d>rj4h4Uq|aorEn_P6QL)?y&bKD>Y%L(3D6A!c{^J1u&ALu1T$YCvv07zrvnFZ+6(= z!6P&>)SF>b!Oc5So1q~)?>!?cSoAO+&WTntdR!`nNv zb#2Msz{^zdR)W)mg`~cvLSh3i$ZcKETGtOj<9V&ceSNL5>O1SQ2iYm_>+2vv)?4oO zVC(vAY5ksR*Xy~GfAJGJWQn3*@5$vY`SjS>tb2wrQ&nCi8&h8I$gKQ&N0Pbn&r|Ns z+#hqdaeu&FKT?f~#+=o=|F8FCeJ+oD%)E!_J-;Oi|Mae(jjQ$Vf6F~YoBEUflJ)&_ zc%9Yf=kU%NPuBRd#+x;Mp}5lH8GVvzPM@>rmqkwr$#2osBDh1(xKXzm@PDpn(hw@6 zeq|K<#EB}>vtFboCR*Y;vjmT-B^+XuGRWN^>Z^9p8fl4fb;E-bf`WL~8fkG}p5Y@$ zP8^Fw*5^c(l4}y|bt4hD`dOu8Kw?lXmB6Erv$|-*gU0Jy>)DbHBfTQIkcZ>rkhY$Y zYgBZ2ObkdzXj%n$40YxiS;=bE!lBM8h{vOP=SUTm z$VuN!v4jLmNU+3_Cm%yCkv3Arik1$wunc}XJ(c0{{Dt-ziX=%H@1lQ$VViZO@cg#p!Lh#i_pB~&XL-tMg_ozop8jWHRfS|$MhI-21%{>l zcaT+1@5$6Xo7s-GscmZ?+L!jJ_EZ7D4fx;T^R!Gpw8Ns%AkH-?5WY#P8yP${JRm(K zS7cz_$bfV-*cw=Fl%=vlCyw%tgTtyMjIxwcVB#oCQHBR6r8qIyk?Yb^>qf5f3iVA- zAMcsls{#|3T9+A88mBf;%8SH#J@6kn@@)RdfMnP7^blunj}YfI3C0!~Zfxo4##YzU z*a8yuImCGs-<>>?UBiPYANjz4RQhl2&Kywd&#$ArJ_zgwz6R2OFM++l9$*)+6W9(U z0b2n(unC9*Vt^=MEf5ZT28;&+0Y4zrW-|+bQ%+7M8Q2C)=D8QJ-`Uxu06TyfAl}8r zyvO}g_Uxt--@oO)2MEiS&3xzTYPJB$IdYhGl&MQQ@%)YgR`NbIS1$7~Up})vcW(0o z_w+n@%%{9Nls~`OQ?Q_k;{HqV;^rqHjOUXjN|=2mOPZ(d?#9mVI~6LJ!{y4E3mzV3 zFlDb-s$@!m&l68ivxIVa=x=q|vgU4$8pb6Bbz|?*!+1lFd*jC& znKa2%D^kSB=Qvw{)q= z;8~V0Hxe3Z9<5wyRt+C+rh~tPhnut+GmNZXZ!-8T8#kIe>(&`bNHCA$;*4zBVjgbV zWMunxb9U`oBfEAP*|W#UzJ2C0-(>%OGoL;lfBm(&#C#3p2K5%ZzR2Oyfi!PV`}W;|-NU#+iPz-MwqF^E*3z zI6=pp{LTa9OingAKKQ_7A2!T5*QsL)+`erJf^RNpnw_~X#&4FWDR%X$$unh&ae;q| zef5eQ)Svvx1^={H2Xn73fbvX!gC!opTj<&PEGv}x0#1xnhlWzQG-zCsr>W`C#^ zRKVknl{zawuv?;$xzYUAT7V>6n+`q4B|!LsGZI zT)2KuE397mc~%E4{e8+iLX!rtVJO z92b3n4j#>Uv3s8Z18?r4fv9iK{Rj=#g|ArZVAgK_HA?3#S+4EXwA4L26F0?tt0M^P z^kVNpM-$^?Hf-1sv;XwvhfhPlSh~P!ziXo!rE?XmNQI-@_ogHz#2#Y=le=|oosqIB zHYR3cyghN}srwP3%N^yrb!}d~OwL^SOO~tL;lvJbvd5gea^vanff?H<9vesL)T1}& ze(>qCr3>cKftJscD~D_DB1MaLID2&ap5%nDFI>6(^UptQ+z=hJ(Vo2L%bOu{=gj|j z>5>n1g;Ksk-rPA|v$+{&? zzC5{HU2Mjdw`{ErX(?MaMjg`9-yZwn!LZ>229F#)cKoF2vp<|Y%~8H!!Tfo1(}B&H zg2gI-w|5Iv$6`8g?A)dM!w0<7|CJ#DBSy1;W=^4eVDH9-3Kh)n=)ky?EPi&cJ$6GB z7v+Dty?8*MzJ2@k?>}Vhw3#y|QU0};n^Y@Y5R7tWceWYllC_Q|!$wh2RAFXs=u~&_ z)vH(EfJrl^jkn5otW%f@7Di6_%AMG`DK zZ#n=YXq2;1@iVj@4G+X6Y_`Y8z?^p<4QR&Iu2;bLv07eU>)oVI39CZBl7sfxH%5a= zOx)IOyS61J#99?z)5t*g{v!gkxV$#BW0N9Up+vEaU2)OTiu=aIy+_khQ<67Z6@FQS z=xEpLl_8Gu+Ch;bMatGZnq)N|6%}jWd$YsUgJ161rWLMT|0SZOceCz&9pxoxXvYR6 z%a(ih=uVJPLPp1JyIH&@ykS*1cHzp6hr^q;>)y+1USz^+z1zb&2UB;!{fbTO)+05G z6e(KEQDNi0@6X@w(X>;qewpRzpm&dQmCx?jYgO0~vuV$uLN3_~Dl^f;R(taCnvMNB zWi?L+K||ZOuGJy4LQMR&nci z@o2GJE>2X)M}>piQ;&A8*wC+2pI?iMyy>X$ouZVuw|HTegNrj2iq^V%l;S>qZC-f3 zxX9FJ6;k%xESoPk3yvDu3Ky%@wsYCKO6G>DMsYHzl zLx=Y6-L*Ma*RI{T{-(6{GU3gsZ@v~782IwbFAuOTN7sM%?`yB?HD=710R#T7*gyN0 zWe4d!xjd$qk*qiiPUuv9PR@+Ogy5j+Gq%(Jk6Zm3x67i(e=TnH*M2JGH`{jGq>?G7vcl9rvA5; zE1Qe>KM55pnkV>~8TgrdvHKr1Zfw@$M+iRU5u(}Bw5j=tcY<$_fe-m5K3lT-lpQ)4 zd;R*RGyQyrPbyuzT7Ku(l<@@DAKSGvmFeduKH_$KiuKf6gb$;>=uXOSZQk6((zd+x zlKB!}W;;G@EboVd4#v^g&>tk&z=!96o9i{;m3+AJNvE z_`zZ7qfVS?meF?t{p=q+*a&{hWqhrUj8~>lH}~+JBJf{}ft$>oYnJutV*>H7pW=5$ zQvQ(ote<}R%m*<4j0G%?Z0wr%E?-EL;!7fVWt z+5XW-M)1YH9$m z78$c(fid{@X2=j@!zZ^*n`WGnl8iGxq4TwC#u-1>>9fy_%j3t!1z*+$-_MD;a9Og% zxZ=mU;wNXP?`-($E{xZi_&^SDcftS9NgMeXTXxzn@XIeI7wr_`Irr?@rYQBw;Cq(C zH?6=uC;U}|d#xKcj0b&q>o@#fKl=5I*K5gFq@=NpK#fxSNc^XDi&j0WaMR2?*FgAGFtoaK*Ui7iUWPetwbmM=L)6bWvsx4Xc$ddnO_B*u{+d@4Ynm zwYMf8i8_D#$%@ZEaUj{eo?DrtN%rXQAFj~Az)=%I?dy(Tp}=PgtYJ2(TcPlQok{i$ z;pea4f4A?D$=}38hW&8m)}!T%=W7p5Jc}1Bma+?zFydmy!&e7{#;;!+v3B3NYfqQW z<#U_Ho;mXtF1CBmj;(QrF5mcQ@>dZNYop>4zW!;+!nw2bdnK1#`3e_1us3;2%*l&C zo;$W`_1c)Nd%u`I^Mg4v`0nfB?2;=_!J>D*PmEuC{D;%WkEL%o96EWzTT^GL;or@# zqKlKWYw`Sri|yI8F6`LxBO(s|aOK{_E*(4f96JR=erS(IrQEV- z&t3LZ{JJ&aYu3kX+;He(#xEhgTelxPezXu~YgEP6)xFUE^=rachp&sV-W?3DFT$ z-~3LIVn>rVM@O7Jce7=yj!NHG#|?O)*3q35Si3g*Oo6aIX?by{f4bN>vtJ9 zF|bdu6lQ2+?BTLGa^x?1VAuAtwk?`u@|X`T65 zUUd}MmHd4^4l}ssDO~G9jVcXWcggxL0TdYd&VdsJb3;j|TzSgWuG6qZr)Qsq0u$eu zJhGX4sq!8rOO&f#$FKQsK2vgB$fOAa+W9qTP`7d8CT%)&c&@VJ=gZ^Xe0|K&LH)b- z?%n%OK5L==Lx&C-GNgCcKh(8eum|vezwhwZc1jYmN^KO4lWzpnKFNxaui^I!U?DIC zP#Z;K{oX=w;d`4_}BX+_8exF5$-$wu6MxSQ@b!m4o@$GbMh|$;>n;JGW zt!OjV-`_~DUS=n{S%@{ys$JWhL}%a8SS~t0347u??=}AXgt%e?`ahJi5xk#=KG=*s zv61pG(r5Pnci?U9S553~^z3>n-zm>Qp0h=9m!G{hxns>Fqnn`48x$ui7o> z>k`E1e?foAjveL+dRpVcmw7&luGxyNpNqaaMLhE$`nw@^gr$$sJytweb+YPXVxd+% z7@cL%B~~n0PXY!t&7bhpVy8#daop_6Rrf7`5C#)%lR)5(*@ z>6>qiGjU&M^p`7r=Dc;wj}{h1eCpC7%K9le{4F&3qsE8{PT4lGOgY}jn& zu^C*Mll<6170`bk)TxBMkeir(UHWc>j`XLkGGoS=meljbPQc+YJ&9cx!qyu_eET)} z91W0%frN@p|9x#tXh zJ7ngHdX3w4SX05(#T0yZ@SxY<|D;(Juhwg7=F3^;jX?v43=96~(|%Qb)~xZUHahT? z0dsqdnKXNGCHiYpYwU=CH;d<*HEKeQ%8F3S7E_0RZga`=VRLtVT(hQ4x3F?Wik9`P z=y>E`zXo+`S9d%iqZ>Z(cW2-7^6g~To+Cn>-(4Bv{Ah)-En-pW!fIH}*v_zzc`+lz z`K7@j&XbQMyWW0sHvcCJ9>DNbnIZMj1_3tX#=rkFSF>i#T#6Sjo}1PS0VM%9pc0@c zsEep~eRc!%yOzrZI0OGzmlGZ5%AGrR@$A{NS95W3Y3l6k+#ML?ZeO#mHNF+j(n?fs8l@PkchU8PEuyyeQ3^C(fGgnzZ-#Rp9+S8mCQYSp&y zXwcxh?_0IHbftUuJNE|;eDrkWNVz|D?4zH?jk|N?wb!m}89MaDqF%lB4Qkvtx^UjS zvl-J^5bgyu2Fd|ChyNpU5)u;P#Kuf+cX#*lWy+LkHl=v++41Gd?KxAm>J@3&P^2}s zRkv<}Zp1bs{HhV#NkoySP8Ie}1z%OhnSp>W zP#VYq*nt1?r8MU_O|~XYniM5=SATA~a_{dhRqDWG$YljXU%1@|PZNS#@<+opTcfb3na%Fi^tCmRp`XVh`3b6||8(tO} zjdA|gTMpbm_&{U{+ce9UJ8+JS6xpyr@clVX5%&kUv1ZM^mE*=8u2;G8a`mf!NE7P*>zEVQMxPs8#jvB?IKA@!alhJwrvyQ7sCFz^)2nAk1h>u-`>s`-b5y} z1^-fj3-E7UnfN<%?kH!ME?p|FuT*LANe_=}KZE-%PfxiE&)nz!sA^St%DX33s>p*n zb>x1dM)E}0K=Q?7pf`gz1utoZ2}clX=Z!1G4Mijv{sA@IH2D^*G! z7A-0d3Ko>Bg$l`0H#a%Syxf7GIG-V$!EmhOz(B!X70y5iXC6eBv9GASuk*kdo~ES8 zlP|xNr)gF|A=EoPxtQ^&RdA=-YwYL))?LyFhJ+w zP4L(CplkY115J&eaGfBS7~T|fEoL*a~xaF#?q9x*~b zWNs4StskNJBgOx~0TG=K9Ru&quU>sCutNu354xsRE~#w&&6P=emmNEHen`%P_mA1K$pvR;`3Ah-1w7#u&UXmsGo<5{D>9DXtA-4bAK;(ISu#o2 zGw+14vF9Q(ZJKa?Mh+mOrXsT%^zJSG@cUb=gDtG@pLXt)$5~@geuBS)a48D0&rXxdA;C;>}$#@$>s4^J#-QV1Z3uKB&MGTkfBi6ait0JlajK`EU7+B?rfsfu z)vCSnb-8jkkAVA8=>9#p9|!M~;G70v$Ve&0owY}NRXF=2#i)!H5HOY3fA~Qn;F%wh zr;n9~R3B%}0lI;+O|lQ&JjBOG3bt)4PK=kcJkpqUMsi)Xtr)IUq(a^1=&5LHf&MbTenRWEpF_c8L<=j4ZM-UTvom zbU!(|ibq;$2Z!)jTjvkeL%!A6M$0}1<7n~E5JK^CE zU-tKp&Rhp-w_2CVdwYcnE50gIM)pDbeaMwP(4Ao~R9LEc)&GupN-xy{}E z_}-EwWe>F9jjY)U-t&OwKpxh!4VoI{q=60w8E4L)7Xz-Gdz0++TN<$D0DhPV-PXc0 z7kqu?2|D3E<4;FF3`3U`LcZGIhv#XahE`sn6ziZibJ(6XhTXU!A0R_hR;`lj*c^`) zf9$6dbLU)}a?*e6R{0cNaSFO~wousr5m~W9hWPu(LjUgIuYR1_ zSgF#iUC?HGks`7k-eLbwwu5sNFazicAauo<@fmCagDuZKj2PyYGlXK$0V+!iGX`(Q z&;xu%Gq&Z(($pF?BmvsIjGa*!xnxrwWPI!sIo1$36Q6R7NBQ#jadE~b;#{CKzz&~> z{yvS(%^69Hf7Fip{B{9f$ST*h-eq zgZ=c~#EDn)=g&V0)HNnxw$!L`X)AM_z#MaKSrWisaZU#I0Q)lU#D6V4fD#uLzp!{5 za{-;Lb^ZPKmTb)f?iHAWTHI?uXE&fAzq9eqWPy$WyMtsuaZ$Xn0qY=}hCr9)%x^k# za20*WnO%nu!8#TN*5B3O;Jcji3CWZ}Cb3SP`*!HRu|NTdMefIe|7LL82J8a91XLF& z=zOpPDQ^IcfINT=o8RIkbd5z%WV*_e5(*H7e zW*>GDXO@LC%kntOhn_X-S05B#;=$W*KPp3Nsg_u#+oe zyLLck&%zhli;TU5-sW7kJXT*+?IyMT)F)McT77BtN0A**(I3P<-pQi>)`ku5$Kj)_ zN4~9NeX88ib*nNo33z@Eh*%4|#c~+f01N}l1J2l^20pd$SKS7VD%YJe!6kDHYHyU~ z8|R6o9eB+^wtkI{xQ)JkqPkjrX|=u8_UqHfVT0>DC?8>aYb@izm@$t_mMj^J9;>YM z-`uq6N&;V9pD_aynv1TK6b?cX#Y_C25dts9-y|L#x&G^QX5eHAdSVm|Gr2_h+KR1 z)w`ujmwr7*jvVfae?s%--^M|QweaC;*6$kle;qhI&qv8T?*-BU<)80>uYpB?@=!L_ z&#cAgZ6mc$tug3W_-zA}pUNu!Ko9iyczl$##EdR$97f{-%!L(`LGE&P1LGWjROdqd zX>iwZob28EC*o&;=m1o!G4U;1#>XSCBC&tMa^#TJ%mMpLvK~;GqrComKGL-u4|E6e zW82tNAFEzcnXa-$aaX+cx4u(-r7|D+$r*O>1b@z-%MfJh^c_278+`K%aRE_)7ewPT z&{VLs1$ku6f%?+u?>(J59j{QKLLX2^l(R3^y!q?|=59SYK8!W63i~Gvydr?-*Fu&& z+L`&yvW@UTY%0H14=H|%ui~ws&-z~N%nKJBHV<}<%KjpZp$=nEyLl92n9JJV4-Sur z4+!>_V6O-=MOfRMrE$!G#`Rd&ku6%JRI664HK<$mPi&JWUG0o93R`6rJhYNISP2j5 z7}N&UG3Z**^{jGfKH#Oc40eenFI6ugGcDbubXQQjK<#X`w^etk-=Ol9b(>{x6l08V zjIdf|3~kK8Hsl0>(JhR52>md{v0hC5oaJIyOpW>axFIl*&j#U|ion|A}U_%BPbdkY! zG1!vqxjE(oz9I+0vi-X=hT%LKfe*3J9lQH zEU{qIF~qcJu`HhW(^~j%C3f8B@X%-A`Z@3k5CXIW^0Q8D>I10m&*Bxuo5n2A-^%aE zA*aw#$>r}a1!&6^JZ#7kGjpb8E3jwyn=wFPL3+vn=41?WHV0q%AnWlL=Go#Sja6dL z?(p|dCyv~H`0(Kj>{t9ZG-=W?p<~C}QLL9R=74-9`G`4~0?s{v%E;E7ue~PZr~Jw@ zS#*CMcYO!X+t5#i=}+yY4uB_+i}q~DMS~nP*fSPC>AGNT7Xa@njJ*-{Ij1ANv6nwY z9-YRY{iRwp%a6F*u;If+&6_Xggl3H$J9hjbjuP9ZP4q^1V-2>*GUi|!w%kkfSqXXM zij83VSaJsXkvAiqzw=*{|3HzZnDkMsALf zVP1072~pUi?@-yj38uzoY} z-wroycy@k|9wQemT2%4!<;$wVHP-p3`0F^L{QdjJckOzf{5uI_46|uxF!N9sIg*bt zIHMaa|4V&HT@NbL!QBPk()GF;*a@iJs=TA)P~3GKSz}OMssm(aojDC2EZMQ6y>jQ4 z>Covb?iYv=U1Oi*RGm6E*0yRj51y}e=+GgRIqF9ICGJ`C5Z$WPg!pdVe%5(d#a;zD zjq(a(@S^>^IuGb}gYGr>riM70jo3gb^rXrw)p2TXsO_z`>+i>*e5Lk@&QndG6#B9* z{4y1~??vvNCH{8_zWJec?K|;)e#__1o$Gz_0f97S5e8FQO5D8A|7ic0K$n$5!XukPrEYOx(Ue8yb-9K&27i)AMI zF&N$WK7D_|Sk|b_M{aQDTgXM07+@o?3E09r&VyS#l7e1Ig~!vN_cze{2y^&N&6+op zYS&)>S*uoUHgDcsG9x2H*XlFe{|c{v`aY&rs}~r@#*N*&-HpQTUyHtug0EtbQ_;{b znt6#P-V=-7iDS(t09$}0 zQJ3vc_0&Jb`}gJ7`T2EXZG0Y&FR>NdVk=|Wg6+G7dD)8Jl*pL2Vb5+y?{nT=cJq5L z@4iB}rvoWIJ|`2syh5XD)#7N>@Pe#SrL}^tQ+53<+`0ctS^0b1{>gK!+TP8Yb;E9& zy{S!`O&3&A-2AamoMo9rQvnQMB&h zm8{Rt-Opw|XWcXOnQ>Y4z%YEQ&)k(xro2TW$328+=p<*k3;$^ay;D%>$($Ki?p1m1 z1~dcy8_sDd1zm_w=H*@l&=^N~z#|j%x4zSFn4#9$hAgV{_@7;RzQ@hYEr+|idvRhb zo_aQ^Sg~T=3KuTizfhq<0R;;d9GNd)zL9zJ<{iqJtA6Z3bz%?1m)i}KgI(Z z+UPrb%a$#R`;o^zs$RXJYZ^BWO>NU=-`U>1&)p0Ry!CkE#K(`PPJNs)efsSaAt4u% z#*g1WuV25ICZ3)%vSrKm3V5`o?`l9nAd55k`PWxwzfSlQxqJ5PS>?lq4ae-MRcq_D zMvbnD#sS$sA~(ko_tSGr#P#``4%V3;>nsZXA4t<<>?s)Y;$Yx8K1(6pR#wOduY$j zz_9-!#BDBwgt5)JI^w~Z1 z@B{SRL+rPU=)ObX@iY3uItzkrZJo)%_qWbM=~*25wf5(Ae^SpHfQ$XT_YU|}tHz%# zaqgChgVNzQm-fA7%a+{izm%n`? z+1L{B;z_-(YK5F=^)DDf~z-m@HvGnfxmucS#!Ie@|f_JdJ&x``W+a z!CWXmWb&l03rmXY*duG#Zj)Z7%;~QgH@SVn!jatg;Wj-FLEOZZzCA%y7Q(~`Ic#Fsr`0_;YhRc>7|z>EE!yoy`$Jh@ zF&dL#U66Apv}6N)5Ot{k=-n>z_x z{0sUYM&B@put%n3#&_7S*S%@-5u6mK>U_Jx(s^icplAc`y+xKYDijZAV1t$4Z%xR!;c zUGNJQA}bC;$J^lYlz58j3)NYwJL%tp^Sux4_-f?KT74(d-)2B#%rk)gz%yDv$7g3O z7;|p=)V(+DzYtIy`jbm2o$yN*;@hXOM#(dk2hjeh?lb6q8#?Q8NXX-7{Wop;O9DPj zH2ufYSJuAB4t^^=RszZsC=YQ$2U&3$<#COv(kFS9k^|6ve)1lr5i(;kaW?X2g}>P( zPgPE;tX6%&9O!=8QhANMu=a~MBX@K7@LOubDF1C~-TDAI zzY>X`s{3?VIKZ=%4@`C_3VhQ4PBqzp#yoE5`m2P z20CPDoLl#FMhq?L(SI*s6!R2}9*m;>4C1BM-XF4>++>agm0@TmPi+1A z!^uIG4fxw%pl4?y1C-w6Ig2y<`qnzuzLeI=-wH}|#%<%=f)iu2;&$je3m5cGcJL$j zRa#TF8#EcjIgLHEFRHuQ6C-A`ioMcm{DGcq{0*@3_V(@1#^9fl$1UV`Ng?>hN#npO ze|7&*b%4s3tg*A^b#JaD`bzoIk`wT~6?fA;J1q~6g;}Q_%!@BJ(q`-#-FLh0=XZ6C zk57O*HZ9|~ba0HH-^UtD`Uo0zLFX2P53I9AdM-zCP(7$^s%%mD#_wEn=15=q*0rJQ z(~=SJ7Jur{A+MERkQJ`%-;&2J$I%gwp;c2s$EV0*0ru?O>g3Hr&bXg9nD&( z4(+ou50(!2_+tkSstZq_7ILITLJBJ`4C>?Z-7a4 z=)Qpdb^$7{R8IK-&-51kx?q?2Q9qJ)E}-9T5m(&as8N{O)oR;6-?#oo=Gvm0HvM7~ zI$d!HV7@$wt>%OuY-yM+{EY*dijAalY9nB$uSD*8HjBT*Ad$#W{)U3s(LINt-w%vCwN9P5&pLH#rnbHM z7k@SO=kd_FP=1K*-1&4Qd!vhqBh5$lFJlfP;f)yDkH=oKLz^V-JBjn{;oX<$ku+jp zhw&4>@$&j%yO-BIwX1h}k{H>h7jZzu8`=&Ki65>SNWORGT)lUl;oR%{qI&UHvT~ zHVqw2eL$GzED?uf?D7x?D7}MA7T?z38i^zu+a$x05 zM8r=oHETAH`jxWUP#%bF)TrBuk|i_98y0e6h1^gfC)Ya9Lo7kauNCq_g`8QrwRP*= zzW)B&hQ?J)O8@>PlZzBNMGmr%t19HI$^aTAZ&b+t6>@il98)1zRmd+Da&P5)WaO19 zrAw=w;SyKA{9EKP3%RM*UO)MMN&<8rMP(iNxz@Q0 za(pct)bi1gljKAT`Ont8=-hHHuuW8j3YXSUN9{XpL-joQzt7GFkxy*-OyuthInqKd zwskHeW6YS_%3~XRd{W5)mxa_Nr(4K5cGy8`kEnfu%}2hmkQ*%I{tEfeLN2$Q?9$~7 z=PJv`HElYcoNpnw+o8);pQ}y79FSuy + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "WAVFileReader.h" + +#ifdef __INTEL_COMPILER +#pragma warning(disable : 161) +// warning #161: unrecognized #pragma +#endif + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +#ifndef MAKEFOURCC +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + (static_cast(static_cast(ch0)) \ + | (static_cast(static_cast(ch1)) << 8) \ + | (static_cast(static_cast(ch2)) << 16) \ + | (static_cast(static_cast(ch3)) << 24)) +#endif /* defined(MAKEFOURCC) */ + +#ifndef WAVE_FORMAT_XMA2 +#define WAVE_FORMAT_XMA2 0x166 + +#pragma pack(push,1) +struct XMA2WAVEFORMATEX +{ + WAVEFORMATEX wfx; + // Meaning of the WAVEFORMATEX fields here: + // wFormatTag; // Audio format type; always WAVE_FORMAT_XMA2 + // nChannels; // Channel count of the decoded audio + // nSamplesPerSec; // Sample rate of the decoded audio + // nAvgBytesPerSec; // Used internally by the XMA encoder + // nBlockAlign; // Decoded sample size; channels * wBitsPerSample / 8 + // wBitsPerSample; // Bits per decoded mono sample; always 16 for XMA + // cbSize; // Size in bytes of the rest of this structure (34) + + WORD NumStreams; // Number of audio streams (1 or 2 channels each) + DWORD ChannelMask; // Spatial positions of the channels in this file, + // stored as SPEAKER_xxx values (see audiodefs.h) + DWORD SamplesEncoded; // Total number of PCM samples per channel the file decodes to + DWORD BytesPerBlock; // XMA block size (but the last one may be shorter) + DWORD PlayBegin; // First valid sample in the decoded audio + DWORD PlayLength; // Length of the valid part of the decoded audio + DWORD LoopBegin; // Beginning of the loop region in decoded sample terms + DWORD LoopLength; // Length of the loop region in decoded sample terms + BYTE LoopCount; // Number of loop repetitions; 255 = infinite + BYTE EncoderVersion; // Version of XMA encoder that generated the file + WORD BlockCount; // XMA blocks in file (and entries in its seek table) +}; +#pragma pack(pop) +#endif + +static_assert(sizeof(XMA2WAVEFORMATEX) == 52, "Mismatch of XMA2 type"); + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +namespace +{ + struct handle_closer { void operator()(HANDLE h) { if (h) CloseHandle(h); } }; + + using ScopedHandle = std::unique_ptr; + + inline HANDLE safe_handle(HANDLE h) { return (h == INVALID_HANDLE_VALUE) ? nullptr : h; } + + struct find_closer { void operator()(HANDLE h) { assert(h != INVALID_HANDLE_VALUE); if (h) FindClose(h); } }; + + using ScopedFindHandle = std::unique_ptr; + +#define BLOCKALIGNPAD(a, b) \ + ((((a) + ((b) - 1)) / (b)) * (b)) + +#define XACT_CONTENT_VERSION 46 // DirectX SDK (June 2010) + +#pragma pack(push, 1) + + static const size_t DVD_SECTOR_SIZE = 2048; + + static const size_t ALIGNMENT_MIN = 4; + static const size_t ALIGNMENT_DVD = DVD_SECTOR_SIZE; + + static const size_t MAX_COMPACT_DATA_SEGMENT_SIZE = 0x001FFFFF; + + static const size_t ENTRYNAME_LENGTH = 64; + + struct REGION + { + uint32_t dwOffset; // Region offset, in bytes. + uint32_t dwLength; // Region length, in bytes. + }; + + struct SAMPLEREGION + { + uint32_t dwStartSample; // Start sample for the region. + uint32_t dwTotalSamples; // Region length in samples. + }; + + struct HEADER + { + static const uint32_t SIGNATURE = MAKEFOURCC('W', 'B', 'N', 'D'); + static const uint32_t VERSION = 44; + + enum SEGIDX + { + SEGIDX_BANKDATA = 0, // Bank data + SEGIDX_ENTRYMETADATA, // Entry meta-data + SEGIDX_SEEKTABLES, // Storage for seek tables for the encoded waves. + SEGIDX_ENTRYNAMES, // Entry friendly names + SEGIDX_ENTRYWAVEDATA, // Entry wave data + SEGIDX_COUNT + }; + + uint32_t dwSignature; // File signature + uint32_t dwVersion; // Version of the tool that created the file + uint32_t dwHeaderVersion; // Version of the file format + REGION Segments[SEGIDX_COUNT]; // Segment lookup table + }; + +#pragma warning( disable : 4201 4203 ) + + union MINIWAVEFORMAT + { + static const uint32_t TAG_PCM = 0x0; + static const uint32_t TAG_XMA = 0x1; + static const uint32_t TAG_ADPCM = 0x2; + static const uint32_t TAG_WMA = 0x3; + + static const uint32_t BITDEPTH_8 = 0x0; // PCM only + static const uint32_t BITDEPTH_16 = 0x1; // PCM only + + static const size_t ADPCM_BLOCKALIGN_CONVERSION_OFFSET = 22; + + struct + { + uint32_t wFormatTag : 2; // Format tag + uint32_t nChannels : 3; // Channel count (1 - 6) + uint32_t nSamplesPerSec : 18; // Sampling rate + uint32_t wBlockAlign : 8; // Block alignment. For WMA, lower 6 bits block alignment index, upper 2 bits bytes-per-second index. + uint32_t wBitsPerSample : 1; // Bits per sample (8 vs. 16, PCM only); WMAudio2/WMAudio3 (for WMA) + }; + + uint32_t dwValue; + }; + + struct ENTRY + { + static const uint32_t FLAGS_READAHEAD = 0x00000001; // Enable stream read-ahead + static const uint32_t FLAGS_LOOPCACHE = 0x00000002; // One or more looping sounds use this wave + static const uint32_t FLAGS_REMOVELOOPTAIL = 0x00000004;// Remove data after the end of the loop region + static const uint32_t FLAGS_IGNORELOOP = 0x00000008; // Used internally when the loop region can't be used + static const uint32_t FLAGS_MASK = 0x00000008; + + union + { + struct + { + // Entry flags + uint32_t dwFlags : 4; + + // Duration of the wave, in units of one sample. + // For instance, a ten second long wave sampled + // at 48KHz would have a duration of 480,000. + // This value is not affected by the number of + // channels, the number of bits per sample, or the + // compression format of the wave. + uint32_t Duration : 28; + }; + uint32_t dwFlagsAndDuration; + }; + + MINIWAVEFORMAT Format; // Entry format. + REGION PlayRegion; // Region within the wave data segment that contains this entry. + SAMPLEREGION LoopRegion; // Region within the wave data (in samples) that should loop. + }; + + struct ENTRYCOMPACT + { + uint32_t dwOffset : 21; // Data offset, in multiplies of the bank alignment + uint32_t dwLengthDeviation : 11; // Data length deviation, in bytes + }; + + struct BANKDATA + { + static const size_t BANKNAME_LENGTH = 64; + + static const uint32_t TYPE_BUFFER = 0x00000000; + static const uint32_t TYPE_STREAMING = 0x00000001; + static const uint32_t TYPE_MASK = 0x00000001; + + static const uint32_t FLAGS_ENTRYNAMES = 0x00010000; + static const uint32_t FLAGS_COMPACT = 0x00020000; + static const uint32_t FLAGS_SYNC_DISABLED = 0x00040000; + static const uint32_t FLAGS_SEEKTABLES = 0x00080000; + static const uint32_t FLAGS_MASK = 0x000F0000; + + uint32_t dwFlags; // Bank flags + uint32_t dwEntryCount; // Number of entries in the bank + char szBankName[BANKNAME_LENGTH]; // Bank friendly name + uint32_t dwEntryMetaDataElementSize; // Size of each entry meta-data element, in bytes + uint32_t dwEntryNameElementSize; // Size of each entry name element, in bytes + uint32_t dwAlignment; // Entry alignment, in bytes + MINIWAVEFORMAT CompactFormat; // Format data for compact bank + FILETIME BuildTime; // Build timestamp + }; + +#pragma pack(pop) + + static_assert(sizeof(REGION) == 8, "Mismatch with xact3wb.h"); + static_assert(sizeof(SAMPLEREGION) == 8, "Mismatch with xact3wb.h"); + static_assert(sizeof(HEADER) == 52, "Mismatch with xact3wb.h"); + static_assert(sizeof(ENTRY) == 24, "Mismatch with xact3wb.h"); + static_assert(sizeof(MINIWAVEFORMAT) == 4, "Mismatch with xact3wb.h"); + static_assert(sizeof(ENTRY) == 24, "Mismatch with xact3wb.h"); + static_assert(sizeof(ENTRYCOMPACT) == 4, "Mismatch with xact3wb.h"); + static_assert(sizeof(BANKDATA) == 96, "Mismatch with xact3wb.h"); + + template WORD ChannelsSpecifiedInMask(T x) + { + WORD bitCount = 0; + while (x) { ++bitCount; x &= (x - 1); } + return bitCount; + } + + WORD AdpcmBlockSizeFromPcmFrames(WORD nPcmFrames, WORD nChannels) + { + // The full calculation is as follows: + // UINT uHeaderBytes = MSADPCM_HEADER_LENGTH * nChannels; + // UINT uBitsPerSample = MSADPCM_BITS_PER_SAMPLE * nChannels; + // UINT uBlockAlign = uHeaderBytes + (nPcmFrames - 2) * uBitsPerSample / 8; + // return WORD(uBlockAlign); + + assert(nChannels == 1 || nChannels == 2); + + if (nPcmFrames) + { + if (nChannels == 1) + { + assert(nPcmFrames % 2 == 0); // Mono data needs even nPcmFrames + return WORD(nPcmFrames / 2 + 6); + } + else + { + return WORD(nPcmFrames + 12); + } + } + else + { + return 0; + } + } + + DWORD EncodeWMABlockAlign(DWORD dwBlockAlign, DWORD dwAvgBytesPerSec) + { + static const uint32_t aWMABlockAlign[] = + { + 929, + 1487, + 1280, + 2230, + 8917, + 8192, + 4459, + 5945, + 2304, + 1536, + 1485, + 1008, + 2731, + 4096, + 6827, + 5462, + 1280 + }; + + static const uint32_t aWMAAvgBytesPerSec[] = + { + 12000, + 24000, + 4000, + 6000, + 8000, + 20000, + 2500 + }; + + auto bit = std::find(std::begin(aWMABlockAlign), std::end(aWMABlockAlign), dwBlockAlign); + if (bit == std::end(aWMABlockAlign)) + return DWORD(-1); + + DWORD blockAlignIndex = DWORD(bit - std::begin(aWMABlockAlign)); + + auto ait = std::find(std::begin(aWMAAvgBytesPerSec), std::end(aWMAAvgBytesPerSec), dwAvgBytesPerSec); + if (ait == std::end(aWMAAvgBytesPerSec)) + return DWORD(-1); + + DWORD bytesPerSecIndex = DWORD(ait - std::begin(aWMAAvgBytesPerSec)); + + return DWORD(blockAlignIndex | (bytesPerSecIndex << 5)); + } + + bool ConvertToMiniFormat(const WAVEFORMATEX* wfx, bool hasSeek, MINIWAVEFORMAT& miniFmt) + { + if (!wfx) + return false; + + if (!wfx->nChannels) + { + wprintf(L"ERROR: Wave bank entry must have at least 1 channel\n"); + return false; + } + + if (wfx->nChannels > 7) + { + wprintf(L"ERROR: Wave banks only support up to 7 channels\n"); + return false; + } + + if (!wfx->nSamplesPerSec) + { + wprintf(L"ERROR: Wave banks entry sample rate must be non-zero\n"); + return false; + } + + if (wfx->nSamplesPerSec > 262143) + { + wprintf(L"ERROR: Wave banks only support sample rates up to 2^18 (262143)\n"); + return false; + } + + miniFmt.dwValue = 0; + miniFmt.nSamplesPerSec = wfx->nSamplesPerSec; + miniFmt.nChannels = wfx->nChannels; + + switch (wfx->wFormatTag) + { + case WAVE_FORMAT_PCM: + if ((wfx->wBitsPerSample != 8) && (wfx->wBitsPerSample != 16)) + { + wprintf(L"ERROR: Wave banks only support 8-bit or 16-bit integer PCM data\n"); + return false; + } + + if (wfx->nBlockAlign > 255) + { + wprintf(L"ERROR: Wave banks only support block alignments up to 255 (%u)\n", wfx->nBlockAlign); + return false; + } + + if (wfx->nBlockAlign != (wfx->nChannels * wfx->wBitsPerSample / 8)) + { + wprintf(L"ERROR: nBlockAlign (%u) != nChannels (%u) * wBitsPerSample (%u) / 8\n", + wfx->nBlockAlign, wfx->nChannels, wfx->wBitsPerSample); + return false; + } + + if (wfx->nAvgBytesPerSec != (wfx->nSamplesPerSec * wfx->nBlockAlign)) + { + wprintf(L"ERROR: nAvgBytesPerSec (%lu) != nSamplesPerSec (%lu) * nBlockAlign (%u)\n", + wfx->nAvgBytesPerSec, wfx->nSamplesPerSec, wfx->nBlockAlign); + return false; + } + + miniFmt.wFormatTag = MINIWAVEFORMAT::TAG_PCM; + miniFmt.wBitsPerSample = (wfx->wBitsPerSample == 16) ? MINIWAVEFORMAT::BITDEPTH_16 : MINIWAVEFORMAT::BITDEPTH_8; + miniFmt.wBlockAlign = wfx->nBlockAlign; + return true; + + case WAVE_FORMAT_IEEE_FLOAT: + wprintf(L"ERROR: Wave banks do not support IEEE float PCM data\n"); + return false; + + case WAVE_FORMAT_ADPCM: + if ((wfx->nChannels != 1) && (wfx->nChannels != 2)) + { + wprintf(L"ERROR: ADPCM wave format must have 1 or 2 channels (not %u)\n", wfx->nChannels); + return false; + } + + if (wfx->wBitsPerSample != 4 /*MSADPCM_BITS_PER_SAMPLE*/) + { + wprintf(L"ERROR: ADPCM wave format must have 4 bits per sample (not %u)\n", wfx->wBitsPerSample); + return false; + } + + if (wfx->cbSize != 32 /*MSADPCM_FORMAT_EXTRA_BYTES*/) + { + wprintf(L"ERROR: ADPCM wave format must have cbSize = 32 (not %u)\n", wfx->cbSize); + return false; + } + else + { + auto wfadpcm = reinterpret_cast(wfx); + + if (wfadpcm->wNumCoef != 7 /*MSADPCM_NUM_COEFFICIENTS*/) + { + wprintf(L"ERROR: ADPCM wave format must have 7 coefficients (not %u)\n", wfadpcm->wNumCoef); + return false; + } + + bool valid = true; + for (int j = 0; j < 7 /*MSADPCM_NUM_COEFFICIENTS*/; ++j) + { + // Microsoft ADPCM standard encoding coefficients + static const short g_pAdpcmCoefficients1[] = { 256, 512, 0, 192, 240, 460, 392 }; + static const short g_pAdpcmCoefficients2[] = { 0, -256, 0, 64, 0, -208, -232 }; + + if (wfadpcm->aCoef[j].iCoef1 != g_pAdpcmCoefficients1[j] + || wfadpcm->aCoef[j].iCoef2 != g_pAdpcmCoefficients2[j]) + { + valid = false; + } + } + + if (!valid) + { + wprintf(L"ERROR: Non-standard coefficients for ADPCM found\n"); + return false; + } + + if ((wfadpcm->wSamplesPerBlock < 4 /*MSADPCM_MIN_SAMPLES_PER_BLOCK*/) + || (wfadpcm->wSamplesPerBlock > 64000 /*MSADPCM_MAX_SAMPLES_PER_BLOCK*/)) + { + wprintf(L"ERROR: ADPCM wave format wSamplesPerBlock must be 4..64000\n"); + return false; + } + + if (wfadpcm->wfx.nChannels == 1 && (wfadpcm->wSamplesPerBlock % 2)) + { + wprintf(L"ERROR: ADPCM wave format mono files must have even wSamplesPerBlock\n"); + return false; + } + + unsigned int nHeaderBytes = 7 /*MSADPCM_HEADER_LENGTH*/ * wfx->nChannels; + unsigned int nBitsPerFrame = 4 /*MSADPCM_BITS_PER_SAMPLE*/ * wfx->nChannels; + unsigned int nPcmFramesPerBlock = (wfx->nBlockAlign - nHeaderBytes) * 8 / nBitsPerFrame + 2; + + if (wfadpcm->wSamplesPerBlock != nPcmFramesPerBlock) + { + wprintf(L"ERROR: ADPCM %u-channel format with nBlockAlign = %u must have wSamplesPerBlock = %u (not %u)\n", + wfx->nChannels, wfx->nBlockAlign, nPcmFramesPerBlock, wfadpcm->wSamplesPerBlock); + return false; + } + + miniFmt.wFormatTag = MINIWAVEFORMAT::TAG_ADPCM; + miniFmt.wBitsPerSample = 0; + miniFmt.wBlockAlign = AdpcmBlockSizeFromPcmFrames(wfadpcm->wSamplesPerBlock, 1) - MINIWAVEFORMAT::ADPCM_BLOCKALIGN_CONVERSION_OFFSET; + } + return true; + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + if (!hasSeek) + { + wprintf(L"ERROR: xWMA requires seek tables ('dpds' chunk)\n"); + return false; + } + + if (wfx->wBitsPerSample != 16) + { + wprintf(L"ERROR: Wave banks only support 16-bit xWMA data\n"); + return false; + } + + if (!wfx->nBlockAlign) + { + wprintf(L"ERROR: Wave bank xWMA must have a non-zero nBlockAlign\n"); + return false; + } + + if (!wfx->nAvgBytesPerSec) + { + wprintf(L"ERROR: Wave bank xWMA must have a non-zero nAvgBytesPerSec\n"); + return false; + } + + if (wfx->cbSize != 0) + { + wprintf(L"ERROR: Unexpected data found in xWMA header\n"); + return false; + } + + miniFmt.wFormatTag = MINIWAVEFORMAT::TAG_WMA; + miniFmt.wBitsPerSample = (wfx->wFormatTag == WAVE_FORMAT_WMAUDIO3) ? MINIWAVEFORMAT::BITDEPTH_16 : MINIWAVEFORMAT::BITDEPTH_8; + { + DWORD blockAlign = EncodeWMABlockAlign(wfx->nBlockAlign, wfx->nAvgBytesPerSec); + if (blockAlign == DWORD(-1)) + { + wprintf(L"ERROR: Failed encoding nBlockAlign and nAvgBytesPerSec for xWMA\n"); + return false; + } + miniFmt.wBlockAlign = blockAlign; + } + return true; + + case WAVE_FORMAT_XMA2: + if (!hasSeek) + { + wprintf(L"ERROR: XMA2 requires seek tables ('seek' chunk)\n"); + return false; + } + + if (wfx->nBlockAlign != wfx->nChannels * 2 /*XMA_OUTPUT_SAMPLE_BYTES*/) + { + wprintf(L"ERROR: XMA2 nBlockAlign (%u) != nChannels(%u) * 2\n", wfx->nBlockAlign, wfx->nChannels); + return false; + } + + if (wfx->wBitsPerSample != 16 /*XMA_OUTPUT_SAMPLE_BITS*/) + { + wprintf(L"ERROR: XMA2 wBitsPerSample (%u) should be 16\n", wfx->wBitsPerSample); + return false; + } + + if (wfx->cbSize != (sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX))) + { + wprintf(L"ERROR: XMA2 cbSize must be %zu (%u)", (sizeof(XMA2WAVEFORMATEX) - sizeof(WAVEFORMATEX)), wfx->cbSize); + return false; + } + else + { + auto xmaFmt = reinterpret_cast(wfx); + + if (xmaFmt->EncoderVersion < 3) + { + wprintf(L"ERROR: XMA2 encoder version (%u) - 3 or higher is required", xmaFmt->EncoderVersion); + return false; + } + + if (!xmaFmt->BlockCount) + { + wprintf(L"ERROR: XMA2 BlockCount must be non-zero\n"); + return false; + } + + if (!xmaFmt->BytesPerBlock || (xmaFmt->BytesPerBlock > 8386560 /*XMA_READBUFFER_MAX_BYTES*/)) + { + wprintf(L"ERROR: XMA2 BytesPerBlock (%lu) is invalid\n", xmaFmt->BytesPerBlock); + return false; + } + + if (xmaFmt->ChannelMask) + { + auto channelBits = ChannelsSpecifiedInMask(xmaFmt->ChannelMask); + if (channelBits != wfx->nChannels) + { + wprintf(L"ERROR: XMA2 nChannels=%lu but ChannelMask (%08X) has %u bits set\n", + xmaFmt->ChannelMask, wfx->nChannels, channelBits); + return false; + } + } + + if (xmaFmt->NumStreams != ((wfx->nChannels + 1) / 2)) + { + wprintf(L"ERROR: XMA2 NumStreams (%u) != ( nChannels(%u) + 1 ) / 2\n", xmaFmt->NumStreams, wfx->nChannels); + return false; + } + + if (!xmaFmt->SamplesEncoded) + { + wprintf(L"ERROR: XMA2 SamplesEncoded must be non-zero\n"); + return false; + } + + if ((xmaFmt->PlayBegin + xmaFmt->PlayLength) > xmaFmt->SamplesEncoded) + { + wprintf(L"ERROR: XMA2 play region too large (%lu + %lu > %lu)", xmaFmt->PlayBegin, xmaFmt->PlayLength, xmaFmt->SamplesEncoded); + return false; + } + + if ((xmaFmt->LoopBegin + xmaFmt->LoopLength) > xmaFmt->SamplesEncoded) + { + wprintf(L"ERROR: XMA2 loop region too large (%lu + %lu > %lu)", xmaFmt->LoopBegin, xmaFmt->LoopLength, xmaFmt->SamplesEncoded); + return false; + } + + miniFmt.wFormatTag = MINIWAVEFORMAT::TAG_XMA; + miniFmt.wBlockAlign = 2 * wfx->nChannels; + miniFmt.wBitsPerSample = MINIWAVEFORMAT::BITDEPTH_16; + } + return true; + + case WAVE_FORMAT_EXTENSIBLE: + if (wfx->cbSize < (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX))) + { + wprintf(L"ERROR: WAVEFORMATEXTENSIBLE cbSize must be at least %zu (%u)", (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX)), wfx->cbSize); + return false; + } + else + { + static const GUID s_wfexBase = { 0x00000000, 0x0000, 0x0010, { 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71 } }; + + auto wfex = reinterpret_cast(wfx); + + if (memcmp(reinterpret_cast(&wfex->SubFormat) + sizeof(DWORD), + reinterpret_cast(&s_wfexBase) + sizeof(DWORD), sizeof(GUID) - sizeof(DWORD)) != 0) + { + wprintf(L"ERROR: WAVEFORMATEXTENSIBLE encountered with unknown GUID ({%8.8lX-%4.4X-%4.4X-%2.2X%2.2X-%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X})\n", + wfex->SubFormat.Data1, wfex->SubFormat.Data2, wfex->SubFormat.Data3, + wfex->SubFormat.Data4[0], wfex->SubFormat.Data4[1], wfex->SubFormat.Data4[2], wfex->SubFormat.Data4[3], + wfex->SubFormat.Data4[4], wfex->SubFormat.Data4[5], wfex->SubFormat.Data4[6], wfex->SubFormat.Data4[7]); + return false; + } + + switch (wfex->SubFormat.Data1) + { + case WAVE_FORMAT_PCM: + if ((wfx->wBitsPerSample != 8) && (wfx->wBitsPerSample != 16)) + { + wprintf(L"ERROR: Wave banks only support 8-bit or 16-bit integer PCM data (%u)\n", wfx->wBitsPerSample); + return false; + } + + if (!wfex->Samples.wValidBitsPerSample) + { + wprintf(L"WARNING: Integer PCM WAVEFORMATEXTENSIBLE format should not have wValidBitsPerSample = 0\n"); + } + else if (((wfex->Samples.wValidBitsPerSample != 8) && (wfex->Samples.wValidBitsPerSample != 16)) + || (wfex->Samples.wValidBitsPerSample > wfx->wBitsPerSample)) + { + wprintf(L"ERROR: Unexpected wValidBitsPerSample value (%u)\n", wfex->Samples.wValidBitsPerSample); + return false; + } + + if (wfx->nBlockAlign > 255) + { + wprintf(L"ERROR: Wave banks only support block alignments up to 255 (%u)\n", wfx->nBlockAlign); + return false; + } + + if (wfx->nBlockAlign != (wfx->nChannels * wfx->wBitsPerSample / 8)) + { + wprintf(L"ERROR: nBlockAlign (%u) != nChannels (%u) * wBitsPerSample (%u) / 8\n", + wfx->nBlockAlign, wfx->nChannels, wfx->wBitsPerSample); + return false; + } + + if (wfx->nAvgBytesPerSec != (wfx->nSamplesPerSec * wfx->nBlockAlign)) + { + wprintf(L"ERROR: nAvgBytesPerSec (%lu) != nSamplesPerSec (%lu) * nBlockAlign (%u)\n", + wfx->nAvgBytesPerSec, wfx->nSamplesPerSec, wfx->nBlockAlign); + return false; + } + + miniFmt.wFormatTag = MINIWAVEFORMAT::TAG_PCM; + miniFmt.wBitsPerSample = (wfex->Samples.wValidBitsPerSample == 16) ? MINIWAVEFORMAT::BITDEPTH_16 : MINIWAVEFORMAT::BITDEPTH_8; + miniFmt.wBlockAlign = wfx->nBlockAlign; + break; + + case WAVE_FORMAT_IEEE_FLOAT: + wprintf(L"ERROR: Wave banks do not support float PCM data\n"); + return false; + + case WAVE_FORMAT_ADPCM: + wprintf(L"ERROR: ADPCM is not supported as a WAVEFORMATEXTENSIBLE\n"); + return false; + + case WAVE_FORMAT_WMAUDIO2: + case WAVE_FORMAT_WMAUDIO3: + if (!hasSeek) + { + wprintf(L"ERROR: xWMA requires seek tables (dpds chunk)\n"); + return false; + } + + if (wfx->wBitsPerSample != 16) + { + wprintf(L"ERROR: Wave banks only support 16-bit xWMA data\n"); + return false; + } + + if (!wfx->nBlockAlign) + { + wprintf(L"ERROR: Wvae bank xWMA must have a non-zero nBlockAlign\n"); + return false; + } + + if (!wfx->nAvgBytesPerSec) + { + wprintf(L"ERROR: Wave bank xWMA must have a non-zero nAvgBytesPerSec\n"); + return false; + } + + miniFmt.wFormatTag = MINIWAVEFORMAT::TAG_WMA; + miniFmt.wBitsPerSample = (wfx->wFormatTag == WAVE_FORMAT_WMAUDIO3) ? MINIWAVEFORMAT::BITDEPTH_16 : MINIWAVEFORMAT::BITDEPTH_8; + { + DWORD blockAlign = EncodeWMABlockAlign(wfx->nBlockAlign, wfx->nAvgBytesPerSec); + if (blockAlign == DWORD(-1)) + { + wprintf(L"ERROR: Failed encoding nBlockAlign and nAvgBytesPerSec for xWMA\n"); + return false; + } + miniFmt.wBlockAlign = blockAlign; + } + break; + + case WAVE_FORMAT_XMA2: + wprintf(L"ERROR: XMA2 is not supported as a WAVEFORMATEXTENSIBLE\n"); + return false; + + default: + wprintf(L"ERROR: Unknown WAVEFORMATEXTENSIBLE format tag\n"); + return false; + } + + if (wfex->dwChannelMask) + { + auto channelBits = ChannelsSpecifiedInMask(wfex->dwChannelMask); + if (channelBits != wfx->nChannels) + { + wprintf(L"ERROR: WAVEFORMATEXTENSIBLE: nChannels=%u but ChannelMask has %u bits set\n", + wfx->nChannels, channelBits); + return false; + } + else + { + wprintf(L"WARNING: WAVEFORMATEXTENSIBLE ChannelMask is ignored in wave banks\n"); + } + } + + return true; + } + + default: + return false; + } + } +} + + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +enum OPTIONS +{ + OPT_RECURSIVE = 1, + OPT_STREAMING, + OPT_OUTPUTFILE, + OPT_OUTPUTHEADER, + OPT_TOLOWER, + OPT_OVERWRITE, + OPT_COMPACT, + OPT_NOCOMPACT, + OPT_FRIENDLY_NAMES, + OPT_NOLOGO, + OPT_FILELIST, + OPT_MAX +}; + +static_assert(OPT_MAX <= 32, "dwOptions is a DWORD bitfield"); + +struct SConversion +{ + wchar_t szSrc[MAX_PATH]; +}; + +struct SValue +{ + LPCWSTR pName; + DWORD dwValue; +}; + +struct WaveFile +{ + DirectX::WAVData data; + size_t conv; + MINIWAVEFORMAT miniFmt; + std::unique_ptr waveData; + + WaveFile() noexcept : + data{}, + conv(0), + miniFmt{} + {} + + WaveFile(WaveFile&) = delete; + WaveFile& operator= (WaveFile&) = delete; + + WaveFile(WaveFile&&) = default; + WaveFile& operator= (WaveFile&&) = default; +}; + +namespace +{ + void FileNameToIdentifier(_Inout_updates_all_(count) wchar_t* str, size_t count) + { + size_t j = 0; + for (wchar_t* c = str; j < count && *c != 0; ++c, ++j) + { + wchar_t t = towupper(*c); + if (!iswdigit(t) && !iswalpha(t)) + t = '_'; + *c = t; + } + } +} + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +const SValue g_pOptions[] = +{ + { L"r", OPT_RECURSIVE }, + { L"s", OPT_STREAMING }, + { L"o", OPT_OUTPUTFILE }, + { L"l", OPT_TOLOWER }, + { L"h", OPT_OUTPUTHEADER }, + { L"y", OPT_OVERWRITE }, + { L"c", OPT_COMPACT }, + { L"nc", OPT_NOCOMPACT }, + { L"f", OPT_FRIENDLY_NAMES }, + { L"nologo", OPT_NOLOGO }, + { L"flist", OPT_FILELIST }, + { nullptr, 0 } +}; + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +namespace +{ +#ifdef _PREFAST_ +#pragma prefast(disable : 26018, "Only used with static internal arrays") +#endif + + DWORD LookupByName(const wchar_t *pName, const SValue *pArray) + { + while (pArray->pName) + { + if (!_wcsicmp(pName, pArray->pName)) + return pArray->dwValue; + + pArray++; + } + + return 0; + } + + void SearchForFiles(const wchar_t* path, std::list& files, bool recursive) + { + // Process files + WIN32_FIND_DATAW findData = {}; + ScopedFindHandle hFile(safe_handle(FindFirstFileExW(path, + FindExInfoBasic, &findData, + FindExSearchNameMatch, nullptr, + FIND_FIRST_EX_LARGE_FETCH))); + if (hFile) + { + for (;;) + { + if (!(findData.dwFileAttributes & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_DIRECTORY))) + { + wchar_t drive[_MAX_DRIVE] = {}; + wchar_t dir[_MAX_DIR] = {}; + _wsplitpath_s(path, drive, _MAX_DRIVE, dir, _MAX_DIR, nullptr, 0, nullptr, 0); + + SConversion conv; + _wmakepath_s(conv.szSrc, drive, dir, findData.cFileName, nullptr); + files.push_back(conv); + } + + if (!FindNextFileW(hFile.get(), &findData)) + break; + } + } + + // Process directories + if (recursive) + { + wchar_t searchDir[MAX_PATH] = {}; + { + wchar_t drive[_MAX_DRIVE] = {}; + wchar_t dir[_MAX_DIR] = {}; + _wsplitpath_s(path, drive, _MAX_DRIVE, dir, _MAX_DIR, nullptr, 0, nullptr, 0); + _wmakepath_s(searchDir, drive, dir, L"*", nullptr); + } + + hFile.reset(safe_handle(FindFirstFileExW(searchDir, + FindExInfoBasic, &findData, + FindExSearchLimitToDirectories, nullptr, + FIND_FIRST_EX_LARGE_FETCH))); + if (!hFile) + return; + + for (;;) + { + if (findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + if (findData.cFileName[0] != L'.') + { + wchar_t subdir[MAX_PATH] = {}; + + { + wchar_t drive[_MAX_DRIVE] = {}; + wchar_t dir[_MAX_DIR] = {}; + wchar_t fname[_MAX_FNAME] = {}; + wchar_t ext[_MAX_FNAME] = {}; + _wsplitpath_s(path, drive, dir, fname, ext); + wcscat_s(dir, findData.cFileName); + _wmakepath_s(subdir, drive, dir, fname, ext); + } + + SearchForFiles(subdir, files, recursive); + } + } + + if (!FindNextFileW(hFile.get(), &findData)) + break; + } + } + } + + void PrintLogo() + { + wchar_t version[32] = {}; + + wchar_t appName[_MAX_PATH] = {}; + if (GetModuleFileNameW(nullptr, appName, _countof(appName))) + { + DWORD size = GetFileVersionInfoSizeW(appName, nullptr); + if (size > 0) + { + auto verInfo = std::make_unique(size); + if (GetFileVersionInfoW(appName, 0, size, verInfo.get())) + { + LPVOID lpstr = nullptr; + UINT strLen = 0; + if (VerQueryValueW(verInfo.get(), L"\\StringFileInfo\\040904B0\\ProductVersion", &lpstr, &strLen)) + { + wcsncpy_s(version, reinterpret_cast(lpstr), strLen); + } + } + } + } + + if (!*version) + { + wcscpy_s(version, L"MISSING"); + } + + wprintf(L"Microsoft (R) XACT-style Wave Bank Tool [DirectXTK] Version %ls\n", version); + wprintf(L"Copyright (C) Microsoft Corp. All rights reserved.\n"); +#ifdef _DEBUG + wprintf(L"*** Debug build ***\n"); +#endif + wprintf(L"\n"); + } + + void PrintUsage() + { + PrintLogo(); + + wprintf(L"Usage: xwbtool \n"); + wprintf(L"\n"); + wprintf(L" -r wildcard filename search is recursive\n"); + wprintf(L" -s creates a streaming wave bank,\n"); + wprintf(L" otherwise an in-memory bank is created\n"); + wprintf(L" -o output filename\n"); + wprintf(L" -h output C/C++ header\n"); + wprintf(L" -l force output filename to lower case\n"); + wprintf(L" -y overwrite existing output file (if any)\n"); + wprintf(L" -c force creation of compact wavebank\n"); + wprintf(L" -nc force creation of non-compact wavebank\n"); + wprintf(L" -f include entry friendly names\n"); + wprintf(L" -nologo suppress copyright message\n"); + wprintf(L" -flist use text file with a list of input files (one per line)\n"); + } + + const char* GetFormatTagName(WORD wFormatTag) + { + switch (wFormatTag) + { + case WAVE_FORMAT_PCM: return "PCM"; + case WAVE_FORMAT_ADPCM: return "MS ADPCM"; + case WAVE_FORMAT_EXTENSIBLE: return "EXTENSIBLE"; + case WAVE_FORMAT_IEEE_FLOAT: return "IEEE float"; + case WAVE_FORMAT_MPEGLAYER3: return "ISO/MPEG Layer3"; + case WAVE_FORMAT_DOLBY_AC3_SPDIF: return "Dolby Audio Codec 3 over S/PDIF"; + case WAVE_FORMAT_WMAUDIO2: return "Windows Media Audio"; + case WAVE_FORMAT_WMAUDIO3: return "Windows Media Audio Pro"; + case WAVE_FORMAT_WMASPDIF: return "Windows Media Audio over S/PDIF"; + case 0x165: /*WAVE_FORMAT_XMA*/ return "Xbox XMA"; + case 0x166: /*WAVE_FORMAT_XMA2*/ return "Xbox XMA2"; + default: return "*UNKNOWN*"; + } + } + + const char *ChannelDesc(DWORD dwChannelMask) + { + switch (dwChannelMask) + { + case 0x00000004 /*SPEAKER_MONO*/: return "Mono"; + case 0x00000003 /* SPEAKER_STEREO */: return "Stereo"; + case 0x0000000B /* SPEAKER_2POINT1 */: return "2.1"; + case 0x00000107 /* SPEAKER_SURROUND */: return "Surround"; + case 0x00000033 /* SPEAKER_QUAD */: return "Quad"; + case 0x0000003B /* SPEAKER_4POINT1 */: return "4.1"; + case 0x0000003F /* SPEAKER_5POINT1 */: return "5.1"; + case 0x000000FF /* SPEAKER_7POINT1 */: return "7.1"; + case 0x0000060F /* SPEAKER_5POINT1_SURROUND */: return "Surround5.1"; + case 0x0000063F /* SPEAKER_7POINT1_SURROUND */: return "Surround7.1"; + default: return "Custom"; + } + } + + void PrintInfo(const WaveFile& wave) + { + if (wave.data.wfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE + && (wave.data.wfx->cbSize >= (sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX)))) + { + auto wext = reinterpret_cast(&wave.data.wfx); + + wprintf(L" (%hs %u channels, %u-bit, %lu Hz, CMask:%hs)", GetFormatTagName(wave.data.wfx->wFormatTag), wave.data.wfx->nChannels, wave.data.wfx->wBitsPerSample, wave.data.wfx->nSamplesPerSec, ChannelDesc(wext->dwChannelMask)); + } + else + { + wprintf(L" (%hs %u channels, %u-bit, %lu Hz)", GetFormatTagName(wave.data.wfx->wFormatTag), wave.data.wfx->nChannels, wave.data.wfx->wBitsPerSample, wave.data.wfx->nSamplesPerSec); + } + } +} + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// + +//-------------------------------------------------------------------------------------- +// Entry-point +//-------------------------------------------------------------------------------------- +#ifdef _PREFAST_ +#pragma prefast(disable : 28198, "Command-line tool, frees all memory on exit") +#endif + +int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[]) +{ + // Parameters and defaults + wchar_t szOutputFile[MAX_PATH] = {}; + wchar_t szHeaderFile[MAX_PATH] = {}; + + ScopedHandle hFile; + + // Process command line + DWORD dwOptions = 0; + std::list conversion; + + for (int iArg = 1; iArg < argc; iArg++) + { + PWSTR pArg = argv[iArg]; + + if (('-' == pArg[0]) || ('/' == pArg[0])) + { + pArg++; + PWSTR pValue; + + for (pValue = pArg; *pValue && (':' != *pValue); pValue++); + + if (*pValue) + *pValue++ = 0; + + DWORD dwOption = LookupByName(pArg, g_pOptions); + + if (!dwOption || (dwOptions & (1 << dwOption))) + { + PrintUsage(); + return 1; + } + + dwOptions |= 1 << dwOption; + + // Handle options with additional value parameter + switch (dwOption) + { + case OPT_OUTPUTFILE: + case OPT_OUTPUTHEADER: + case OPT_FILELIST: + if (!*pValue) + { + if ((iArg + 1 >= argc)) + { + PrintUsage(); + return 1; + } + + iArg++; + pValue = argv[iArg]; + } + break; + } + + switch (dwOption) + { + case OPT_OUTPUTFILE: + wcscpy_s(szOutputFile, MAX_PATH, pValue); + break; + + case OPT_OUTPUTHEADER: + wcscpy_s(szHeaderFile, MAX_PATH, pValue); + break; + + case OPT_COMPACT: + if (dwOptions & (1 << OPT_NOCOMPACT)) + { + wprintf(L"-c and -nc are mutually exclusive options\n"); + return 1; + } + break; + + case OPT_NOCOMPACT: + if (dwOptions & (1 << OPT_COMPACT)) + { + wprintf(L"-c and -nc are mutually exclusive options\n"); + return 1; + } + break; + + case OPT_FILELIST: + { + std::wifstream inFile(pValue); + if (!inFile) + { + wprintf(L"Error opening -flist file %ls\n", pValue); + return 1; + } + wchar_t fname[1024] = {}; + for (;;) + { + inFile >> fname; + if (!inFile) + break; + + if (*fname == L'#') + { + // Comment + } + else if (*fname == L'-') + { + wprintf(L"Command-line arguments not supported in -flist file\n"); + return 1; + } + else if (wcspbrk(fname, L"?*") != nullptr) + { + wprintf(L"Wildcards not supported in -flist file\n"); + return 1; + } + else + { + SConversion conv; + wcscpy_s(conv.szSrc, MAX_PATH, fname); + conversion.push_back(conv); + } + + inFile.ignore(1000, '\n'); + } + inFile.close(); + } + break; + } + } + else if (wcspbrk(pArg, L"?*") != nullptr) + { + size_t count = conversion.size(); + SearchForFiles(pArg, conversion, (dwOptions & (1 << OPT_RECURSIVE)) != 0); + if (conversion.size() <= count) + { + wprintf(L"No matching files found for %ls\n", pArg); + return 1; + } + } + else + { + SConversion conv; + wcscpy_s(conv.szSrc, MAX_PATH, pArg); + + conversion.push_back(conv); + } + } + + if (conversion.empty()) + { + wprintf(L"ERROR: Need at least 1 wave file to build wave bank\n\n"); + PrintUsage(); + return 0; + } + + if (~dwOptions & (1 << OPT_NOLOGO)) + PrintLogo(); + + // Determine output file name + if (!*szOutputFile) + { + auto pConv = conversion.begin(); + + wchar_t ext[_MAX_EXT]; + wchar_t fname[_MAX_FNAME]; + _wsplitpath_s(pConv->szSrc, nullptr, 0, nullptr, 0, fname, _MAX_FNAME, ext, _MAX_EXT); + + if (_wcsicmp(ext, L".xwb") == 0) + { + wprintf(L"ERROR: Need to specify output file via -o\n"); + return 1; + } + + _wmakepath_s(szOutputFile, nullptr, nullptr, fname, L".xwb"); + } + + if (dwOptions & (1 << OPT_TOLOWER)) + { + (void)_wcslwr_s(szOutputFile); + + if (*szHeaderFile) + { + (void)_wcslwr_s(szHeaderFile); + } + } + + if (~dwOptions & (1 << OPT_OVERWRITE)) + { + if (GetFileAttributesW(szOutputFile) != INVALID_FILE_ATTRIBUTES) + { + wprintf(L"ERROR: Output file %ls already exists, use -y to overwrite!\n", szOutputFile); + return 1; + } + + if (*szHeaderFile) + { + if (GetFileAttributesW(szHeaderFile) != INVALID_FILE_ATTRIBUTES) + { + wprintf(L"ERROR: Output header file %ls already exists!\n", szHeaderFile); + return 1; + } + } + } + + // Gather wave files + std::unique_ptr entries; + std::unique_ptr entryNames; + std::vector waves; + MINIWAVEFORMAT compactFormat = {}; + + bool xma = false; + + size_t index = 0; + for (auto pConv = conversion.begin(); pConv != conversion.end(); ++pConv, ++index) + { + wchar_t ext[_MAX_EXT]; + wchar_t fname[_MAX_FNAME]; + _wsplitpath_s(pConv->szSrc, nullptr, 0, nullptr, 0, fname, _MAX_FNAME, ext, _MAX_EXT); + + // Load source image + if (pConv != conversion.begin()) + wprintf(L"\n"); + + wprintf(L"reading %ls", pConv->szSrc); + fflush(stdout); + + WaveFile wave; + wave.conv = index; + std::unique_ptr waveData; + + HRESULT hr = DirectX::LoadWAVAudioFromFileEx(pConv->szSrc, waveData, wave.data); + if (FAILED(hr)) + { + wprintf(L"\nERROR: Failed to load file (%08X)\n", static_cast(hr)); + return 1; + } + + wave.waveData = std::move(waveData); + + PrintInfo(wave); + + if (wave.data.wfx->wFormatTag == WAVE_FORMAT_XMA2) + xma = true; + + waves.emplace_back(std::move(wave)); + } + + wprintf(L"\n"); + + DWORD dwAlignment = ALIGNMENT_MIN; + if (dwOptions & (1 << OPT_STREAMING)) + dwAlignment = ALIGNMENT_DVD; + else if (xma) + dwAlignment = 2048; + + // Convert wave format to miniformat, failing if any won't map + // Check to see if we can use the compact wave bank format + bool compact = (dwOptions & (1 << OPT_NOCOMPACT)) ? false : true; + int reason = 0; + uint64_t waveOffset = 0; + + for (auto it = waves.begin(); it != waves.end(); ++it) + { + if (!ConvertToMiniFormat(it->data.wfx, it->data.seek != nullptr, it->miniFmt)) + { + auto cit = conversion.cbegin(); + advance(cit, it->conv); + wprintf(L"ERROR: Failed encoding %ls\n", cit->szSrc); + return 1; + } + + if (it == waves.begin()) + { + memcpy(&compactFormat, &it->miniFmt, sizeof(MINIWAVEFORMAT)); + } + else if (memcmp(&compactFormat, &it->miniFmt, sizeof(MINIWAVEFORMAT)) != 0) + { + compact = false; + reason |= 0x1; + } + + if (it->data.loopLength > 0) + { + compact = false; + reason |= 0x2; + } + + DWORD alignedSize = BLOCKALIGNPAD(it->data.audioBytes, dwAlignment); + waveOffset += alignedSize; + } + + if (waveOffset > UINT32_MAX) + { + wprintf(L"ERROR: Audio wave data is too large to encode into wavebank (offset %llu)", waveOffset); + return 1; + } + else if (waveOffset > (MAX_COMPACT_DATA_SEGMENT_SIZE * uint64_t(dwAlignment))) + { + compact = false; + reason |= 0x4; + } + + if ((dwOptions & (1 << OPT_COMPACT)) && !compact) + { + wprintf(L"ERROR: Cannot create compact wave bank:\n"); + if (reason & 0x1) + { + wprintf(L"- Mismatched formats. All formats must be identical for a compact wavebank.\n"); + } + if (reason & 0x2) + { + wprintf(L"- Found loop points. Compact wavebanks do not support loop points.\n"); + } + if (reason & 0x4) + { + wprintf(L"- Audio wave data is too large to encode in compact wavebank (%llu > %llu).\n", waveOffset, (uint64_t(MAX_COMPACT_DATA_SEGMENT_SIZE) * uint64_t(dwAlignment))); + } + return 1; + } + + // Build entry metadata (and assign wave offset within data segment) + // Build entry friendly names if requested + entries.reset(new uint8_t[(compact ? sizeof(ENTRYCOMPACT) : sizeof(ENTRY)) * waves.size()]); + + if (dwOptions & (1 << OPT_FRIENDLY_NAMES)) + { + entryNames.reset(new char[waves.size() * ENTRYNAME_LENGTH]); + memset(entryNames.get(), 0, sizeof(char) * waves.size() * ENTRYNAME_LENGTH); + } + + waveOffset = 0; + size_t count = 0; + size_t seekEntries = 0; + for (auto it = waves.begin(); it != waves.end(); ++it, ++count) + { + DWORD alignedSize = BLOCKALIGNPAD(it->data.audioBytes, dwAlignment); + + auto wfx = it->data.wfx; + + uint64_t duration = 0; + + switch (it->miniFmt.wFormatTag) + { + case MINIWAVEFORMAT::TAG_XMA: + if (it->data.seekCount > 0) + seekEntries += size_t(it->data.seekCount) + 1u; + + duration = reinterpret_cast(wfx)->SamplesEncoded; + break; + + case MINIWAVEFORMAT::TAG_ADPCM: + { + auto adpcmFmt = reinterpret_cast(wfx); + duration = (uint64_t(it->data.audioBytes) / uint64_t(wfx->nBlockAlign)) * uint64_t(adpcmFmt->wSamplesPerBlock); + int partial = it->data.audioBytes % wfx->nBlockAlign; + if (partial) + { + if (partial >= (7 * wfx->nChannels)) + duration += (uint64_t(partial) * 2 / uint64_t(wfx->nChannels - 12)); + } + } + break; + + case MINIWAVEFORMAT::TAG_WMA: + if (it->data.seekCount > 0) + { + seekEntries += size_t(it->data.seekCount) + 1u; + duration = it->data.seek[it->data.seekCount - 1] / uint32_t(2 * wfx->nChannels); + } + break; + + default: // MINIWAVEFORMAT::TAG_PCM + duration = (uint64_t(it->data.audioBytes) * 8) / (uint64_t(wfx->wBitsPerSample) * uint64_t(wfx->nChannels)); + break; + } + + if (compact) + { + auto entry = reinterpret_cast(entries.get() + count * sizeof(ENTRYCOMPACT)); + memset(entry, 0, sizeof(ENTRYCOMPACT)); + + assert(waveOffset <= (MAX_COMPACT_DATA_SEGMENT_SIZE * uint64_t(dwAlignment))); + entry->dwOffset = uint32_t(waveOffset / dwAlignment); + + assert(dwAlignment <= 2048); + entry->dwLengthDeviation = alignedSize - it->data.audioBytes; + } + else + { + auto entry = reinterpret_cast(entries.get() + count * sizeof(ENTRY)); + memset(entry, 0, sizeof(ENTRY)); + + if (duration > 268435455) + { + wprintf(L"ERROR: Duration of audio too long to encode into wavebank (%llu > 2^28))\n", duration); + return 1; + } + + entry->Duration = uint32_t(duration); + memcpy(&entry->Format, &it->miniFmt, sizeof(MINIWAVEFORMAT)); + entry->PlayRegion.dwOffset = uint32_t(waveOffset); + entry->PlayRegion.dwLength = it->data.audioBytes; + + if (it->data.loopLength > 0) + { + entry->LoopRegion.dwStartSample = it->data.loopStart; + entry->LoopRegion.dwTotalSamples = it->data.loopLength; + } + } + + if (dwOptions & (1 << OPT_FRIENDLY_NAMES)) + { + auto cit = conversion.cbegin(); + advance(cit, it->conv); + + wchar_t wEntryName[_MAX_FNAME]; + _wsplitpath_s(cit->szSrc, nullptr, 0, nullptr, 0, wEntryName, _MAX_FNAME, nullptr, 0); + + int result = WideCharToMultiByte(CP_UTF8, WC_NO_BEST_FIT_CHARS, wEntryName, -1, &entryNames[count * ENTRYNAME_LENGTH], ENTRYNAME_LENGTH, nullptr, nullptr); + if (result <= 0) + { + memset(&entryNames[count * ENTRYNAME_LENGTH], 0, ENTRYNAME_LENGTH); + } + } + + waveOffset += alignedSize; + } + + assert(count > 0 && count == waves.size()); + + // Create wave bank + assert(*szOutputFile != 0); + + wprintf(L"writing %ls%ls wavebank %ls w/ %zu entries\n", (compact) ? L"compact " : L"", (dwOptions & (1 << OPT_STREAMING)) ? L"streaming" : L"in-memory", szOutputFile, waves.size()); + fflush(stdout); + + hFile.reset(safe_handle(CreateFileW(szOutputFile, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr))); + if (!hFile) + { + wprintf(L"ERROR: Failed opening output file %ls, %lu\n", szOutputFile, GetLastError()); + return 1; + } + + // Setup wave bank header + HEADER header = {}; + header.dwSignature = HEADER::SIGNATURE; + header.dwHeaderVersion = HEADER::VERSION; + header.dwVersion = XACT_CONTENT_VERSION; + + DWORD segmentOffset = sizeof(HEADER); + + // Write bank metadata + assert((segmentOffset % 4) == 0); + + BANKDATA data = {}; + + data.dwEntryCount = uint32_t(waves.size()); + data.dwAlignment = dwAlignment; + + GetSystemTimeAsFileTime(&data.BuildTime); + + data.dwFlags = (dwOptions & (1 << OPT_STREAMING)) ? BANKDATA::TYPE_STREAMING : BANKDATA::TYPE_BUFFER; + + if (seekEntries > 0) + { + data.dwFlags |= BANKDATA::FLAGS_SEEKTABLES; + } + + if (dwOptions & (1 << OPT_FRIENDLY_NAMES)) + { + data.dwFlags |= BANKDATA::FLAGS_ENTRYNAMES; + data.dwEntryNameElementSize = ENTRYNAME_LENGTH; + } + + if (compact) + { + data.dwFlags |= BANKDATA::FLAGS_COMPACT; + data.dwEntryMetaDataElementSize = sizeof(ENTRYCOMPACT); + memcpy(&data.CompactFormat, &compactFormat, sizeof(MINIWAVEFORMAT)); + } + else + { + data.dwEntryMetaDataElementSize = sizeof(ENTRY); + } + + { + wchar_t wBankName[_MAX_FNAME]; + _wsplitpath_s(szOutputFile, nullptr, 0, nullptr, 0, wBankName, _MAX_FNAME, nullptr, 0); + + int result = WideCharToMultiByte(CP_UTF8, WC_NO_BEST_FIT_CHARS, wBankName, -1, data.szBankName, BANKDATA::BANKNAME_LENGTH, nullptr, nullptr); + if (result <= 0) + { + memset(data.szBankName, 0, BANKDATA::BANKNAME_LENGTH); + } + } + + if (SetFilePointer(hFile.get(), LONG(segmentOffset), nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed writing bank data to %ls, SFP %lu\n", szOutputFile, GetLastError()); + return 1; + } + + DWORD bytesWritten; + if (!WriteFile(hFile.get(), &data, sizeof(data), &bytesWritten, nullptr) + || bytesWritten != sizeof(data)) + { + wprintf(L"ERROR: Failed writing bank data to %ls, %lu\n", szOutputFile, GetLastError()); + return 1; + } + + header.Segments[HEADER::SEGIDX_BANKDATA].dwOffset = segmentOffset; + header.Segments[HEADER::SEGIDX_BANKDATA].dwLength = sizeof(BANKDATA); + segmentOffset += sizeof(BANKDATA); + + // Write entry metadata + assert((segmentOffset % 4) == 0); + + if (SetFilePointer(hFile.get(), LONG(segmentOffset), nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed writing entry metadata to %ls, SFP %lu\n", szOutputFile, GetLastError()); + return 1; + } + + uint32_t entryBytes = uint32_t(waves.size() * data.dwEntryMetaDataElementSize); + if (!WriteFile(hFile.get(), entries.get(), entryBytes, &bytesWritten, nullptr) + || bytesWritten != entryBytes) + { + wprintf(L"ERROR: Failed writing entry metadata to %ls, %lu\n", szOutputFile, GetLastError()); + return 1; + } + + header.Segments[HEADER::SEGIDX_ENTRYMETADATA].dwOffset = segmentOffset; + header.Segments[HEADER::SEGIDX_ENTRYMETADATA].dwLength = entryBytes; + segmentOffset += entryBytes; + + // Write seek tables + assert((segmentOffset % 4) == 0); + + header.Segments[HEADER::SEGIDX_SEEKTABLES].dwOffset = segmentOffset; + + if (seekEntries > 0) + { + seekEntries += waves.size(); // Room for an offset per entry + + auto seekTables = std::make_unique(seekEntries); + + if (SetFilePointer(hFile.get(), LONG(segmentOffset), nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed writing seek tables to %ls, SFP %lu\n", szOutputFile, GetLastError()); + return 1; + } + + uint32_t seekoffset = 0; + uint32_t windex = 0; + for (auto it = waves.begin(); it != waves.end(); ++it, ++windex) + { + if (it->miniFmt.wFormatTag == MINIWAVEFORMAT::TAG_WMA) + { + seekTables[windex] = seekoffset * sizeof(uint32_t); + + uint32_t baseoffset = uint32_t(waves.size() + seekoffset); + seekTables[baseoffset] = it->data.seekCount; + + for (uint32_t j = 0; j < it->data.seekCount; ++j) + { + seekTables[size_t(baseoffset) + size_t(j) + 1u] = it->data.seek[j]; + } + + seekoffset += size_t(it->data.seekCount) + 1u; + } + else if (it->miniFmt.wFormatTag == MINIWAVEFORMAT::TAG_XMA) + { + seekTables[windex] = seekoffset * sizeof(uint32_t); + + uint32_t baseoffset = uint32_t(waves.size() + seekoffset); + seekTables[baseoffset] = it->data.seekCount; + + for (uint32_t j = 0; j < it->data.seekCount; ++j) + { + seekTables[size_t(baseoffset) + size_t(j) + 1u] = _byteswap_ulong(it->data.seek[j]); + } + + seekoffset += it->data.seekCount + 1; + } + else + { + seekTables[windex] = uint32_t(-1); + } + } + + uint32_t seekLen = uint32_t(sizeof(uint32_t) * seekEntries); + + if (!WriteFile(hFile.get(), seekTables.get(), seekLen, &bytesWritten, nullptr) + || bytesWritten != seekLen) + { + wprintf(L"ERROR: Failed writing seek tables to %ls, %lu\n", szOutputFile, GetLastError()); + return 1; + } + + segmentOffset += seekLen; + + header.Segments[HEADER::SEGIDX_SEEKTABLES].dwLength = seekLen; + } + else + { + header.Segments[HEADER::SEGIDX_SEEKTABLES].dwLength = 0; + } + + // Write entry names + if (dwOptions & (1 << OPT_FRIENDLY_NAMES)) + { + assert((segmentOffset % 4) == 0); + + if (SetFilePointer(hFile.get(), LONG(segmentOffset), nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed writing friendly entry names to %ls, SFP %lu\n", szOutputFile, GetLastError()); + return 1; + } + + uint32_t entryNamesBytes = uint32_t(count * data.dwEntryNameElementSize); + if (!WriteFile(hFile.get(), entryNames.get(), entryNamesBytes, &bytesWritten, nullptr) + || bytesWritten != entryNamesBytes) + { + wprintf(L"ERROR: Failed writing friendly entry names to %ls, %lu\n", szOutputFile, GetLastError()); + return 1; + } + + header.Segments[HEADER::SEGIDX_ENTRYNAMES].dwOffset = segmentOffset; + header.Segments[HEADER::SEGIDX_ENTRYNAMES].dwLength = entryNamesBytes; + segmentOffset += entryNamesBytes; + } + + // Write wave data + segmentOffset = BLOCKALIGNPAD(segmentOffset, dwAlignment); + + header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwOffset = segmentOffset; + header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwLength = uint32_t(waveOffset); + + for (auto it = waves.begin(); it != waves.end(); ++it) + { + if (SetFilePointer(hFile.get(), LONG(segmentOffset), nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed writing audio data to %ls, SFP %lu\n", szOutputFile, GetLastError()); + return 1; + } + + if (!WriteFile(hFile.get(), it->data.startAudio, it->data.audioBytes, &bytesWritten, nullptr) + || bytesWritten != it->data.audioBytes) + { + wprintf(L"ERROR: Failed writing audio data to %ls, %lu\n", szOutputFile, GetLastError()); + return 1; + } + + DWORD alignedSize = BLOCKALIGNPAD(it->data.audioBytes, dwAlignment); + + if ((uint64_t(segmentOffset) + alignedSize) > UINT32_MAX) + { + wprintf(L"ERROR: Data exceeds maximum size for wavebank\n"); + return 1; + } + + segmentOffset += alignedSize; + } + + assert(segmentOffset == (header.Segments[HEADER::SEGIDX_ENTRYWAVEDATA].dwOffset + waveOffset)); + + // Commit wave bank + if (SetFilePointer(hFile.get(), LONG(segmentOffset), nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed committing output file %ls, EOF %lu\n", szOutputFile, GetLastError()); + return 1; + } + + if (!SetEndOfFile(hFile.get())) + { + wprintf(L"ERROR: Failed committing output file %ls, EOF %lu\n", szOutputFile, GetLastError()); + return 1; + } + + if (SetFilePointer(hFile.get(), 0, nullptr, FILE_BEGIN) == INVALID_SET_FILE_POINTER) + { + wprintf(L"ERROR: Failed committing output file %ls, HDR %lu\n", szOutputFile, GetLastError()); + return 1; + } + + if (!WriteFile(hFile.get(), &header, sizeof(header), &bytesWritten, nullptr) + || bytesWritten != sizeof(header)) + { + wprintf(L"ERROR: Failed committing output file %ls, HDR %lu\n", szOutputFile, GetLastError()); + return 1; + } + + // Write C header if requested + if (*szHeaderFile) + { + wprintf(L"writing C header %ls\n", szHeaderFile); + fflush(stdout); + + FILE* file = nullptr; + if (!_wfopen_s(&file, szHeaderFile, L"wt")) + { + wchar_t wBankName[_MAX_FNAME]; + _wsplitpath_s(szOutputFile, nullptr, 0, nullptr, 0, wBankName, _MAX_FNAME, nullptr, 0); + + FileNameToIdentifier(wBankName, _MAX_FNAME); + + fprintf_s(file, "#pragma once\n\nenum XACT_WAVEBANK_%ls : unsigned int\n{\n", wBankName); + + size_t windex = 0; + for (auto it = waves.begin(); it != waves.end(); ++it, ++windex) + { + auto cit = conversion.cbegin(); + advance(cit, it->conv); + + wchar_t wEntryName[_MAX_FNAME]; + _wsplitpath_s(cit->szSrc, nullptr, 0, nullptr, 0, wEntryName, _MAX_FNAME, nullptr, 0); + + FileNameToIdentifier(wEntryName, _MAX_FNAME); + + fprintf_s(file, " XACT_WAVEBANK_%ls_%ls = %zu,\n", wBankName, wEntryName, windex); + } + + fprintf_s(file, "};\n\n#define XACT_WAVEBANK_%ls_ENTRY_COUNT %zu\n", wBankName, count); + + fclose(file); + } + else + { + wprintf(L"ERROR: Failed writing wave bank C header %ls\n", szHeaderFile); + return 1; + } + } + + return 0; +} diff --git a/Sdk/External/DirectXTK/XWBTool/xwbtool.rc b/Sdk/External/DirectXTK/XWBTool/xwbtool.rc new file mode 100644 index 0000000..129ae43 --- /dev/null +++ b/Sdk/External/DirectXTK/XWBTool/xwbtool.rc @@ -0,0 +1,115 @@ +// Microsoft Visual C++ generated resource script. +// + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#define IDC_STATIC -1 +#include + + + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (United States) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) + +///////////////////////////////////////////////////////////////////////////// +// +// Icon +// + +// Icon with lowest ID value placed first to ensure application icon +// remains consistent on all systems. +IDI_MAIN_ICON ICON "directx.ico" + + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "resource.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#define IDC_STATIC -1\r\n" + "#include \r\n" + "\r\n" + "\r\n" + "\0" +END + +3 TEXTINCLUDE +BEGIN + "\r\n" + "\0" +END + +#endif // APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 1,0,0,0 + PRODUCTVERSION 1,0,0,0 + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x1L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "Microsoft Corp" + VALUE "FileDescription", "XACT-style wave bank builder command-line tool" + VALUE "FileVersion", "1.0.0.0" + VALUE "InternalName", "xwbtool.exe" + VALUE "LegalCopyright", "Copyright (c) Microsoft Corp." + VALUE "OriginalFilename", "xwbtool.exe" + VALUE "ProductName", "DirectX Tool Kit" + VALUE "ProductVersion", "1.0.0.0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (United States) resources +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// + + +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED + diff --git a/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj new file mode 100644 index 0000000..f4d7ab3 --- /dev/null +++ b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj @@ -0,0 +1,238 @@ + + + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + {C7AB4186-54B2-4244-A533-77494763EA1D} + Win32Proj + XWBTool + 10.0.17763.0 + + + + Application + true + v141 + Unicode + + + Application + true + v141 + Unicode + + + Application + true + v141 + Unicode + + + Application + false + v141 + Unicode + + + Application + false + v141 + Unicode + + + Application + false + v141 + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + Bin\Desktop_2017\$(Platform)\$(Configuration)\ + XWBTool + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + true + false + + + Console + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + true + false + + + Console + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + true + false + + + Console + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + MaxSpeed + WIN32;NDEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + Guard + true + false + + + Console + true + true + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + MaxSpeed + WIN32;NDEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + Guard + true + false + + + Console + true + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + MaxSpeed + WIN32;NDEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + Guard + true + false + + + Console + true + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj.filters b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj.filters new file mode 100644 index 0000000..134c089 --- /dev/null +++ b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2017.vcxproj.filters @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj new file mode 100644 index 0000000..4c20474 --- /dev/null +++ b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj @@ -0,0 +1,244 @@ + + + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + {C7AB4186-54B2-4244-A533-77494763EA1D} + Win32Proj + XWBTool + 10.0 + + + + Application + true + v142 + Unicode + + + Application + true + v142 + Unicode + + + Application + true + v142 + Unicode + + + Application + false + v142 + Unicode + + + Application + false + v142 + Unicode + + + Application + false + v142 + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + XWBTool + + + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + Bin\Desktop_2019\$(Platform)\$(Configuration)\ + XWBTool + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + true + false + 26812 + + + Console + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + true + false + 26812 + + + Console + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + true + false + 26812 + + + Console + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + MaxSpeed + WIN32;NDEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + Guard + true + false + 26812 + + + Console + true + true + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + MaxSpeed + WIN32;NDEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + Guard + true + false + 26812 + + + Console + true + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + Level4 + MaxSpeed + WIN32;NDEBUG;_CONSOLE;_WIN32_WINNT=0x0601;%(PreprocessorDefinitions) + ..\Audio;..\Src;%(AdditionalIncludeDirectories) + Guard + true + false + 26812 + + + Console + true + true + true + kernel32.lib;user32.lib;version.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj.filters b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj.filters new file mode 100644 index 0000000..e950467 --- /dev/null +++ b/Sdk/External/DirectXTK/XWBTool/xwbtool_Desktop_2019.vcxproj.filters @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/Sdk/External/HopscotchMap/bhopscotch_map.h b/Sdk/External/HopscotchMap/bhopscotch_map.h new file mode 100644 index 0000000..925a1ee --- /dev/null +++ b/Sdk/External/HopscotchMap/bhopscotch_map.h @@ -0,0 +1,734 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_BHOPSCOTCH_MAP_H +#define TSL_BHOPSCOTCH_MAP_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_hash.h" + +namespace tsl { + +/** + * Similar to tsl::hopscotch_map but instead of using a list for overflowing + * elements it uses a binary search tree. It thus needs an additional template + * parameter Compare. Compare should be arithmetically coherent with KeyEqual. + * + * The binary search tree allows the map to have a worst-case scenario of O(log + * n) for search and delete, even if the hash function maps all the elements to + * the same bucket. For insert, the amortized worst case is O(log n), but the + * worst case is O(n) in case of rehash. + * + * This makes the map resistant to DoS attacks (but doesn't preclude you to have + * a good hash function, as an element in the bucket array is faster to retrieve + * than in the tree). + * + * @copydoc hopscotch_map + */ +template , + class KeyEqual = std::equal_to, class Compare = std::less, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class bhopscotch_map { + private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const std::pair& key_value) const { + return key_value.first; + } + + const key_type& operator()(std::pair& key_value) { + return key_value.first; + } + }; + + class ValueSelect { + public: + using value_type = T; + + const value_type& operator()( + const std::pair& key_value) const { + return key_value.second; + } + + value_type& operator()(std::pair& key_value) { + return key_value.second; + } + }; + + // TODO Not optimal as we have to use std::pair as ValueType + // which forbid us to move the key in the bucket array, we have to use copy. + // Optimize. + using overflow_container_type = std::map; + using ht = detail_hopscotch_hash::hopscotch_hash< + std::pair, KeySelect, ValueSelect, Hash, KeyEqual, + Allocator, NeighborhoodSize, StoreHash, GrowthPolicy, + overflow_container_type>; + + public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using key_compare = Compare; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + bhopscotch_map() : bhopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {} + + explicit bhopscotch_map(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator(), + const Compare& comp = Compare()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, + comp) {} + + bhopscotch_map(size_type bucket_count, const Allocator& alloc) + : bhopscotch_map(bucket_count, Hash(), KeyEqual(), alloc) {} + + bhopscotch_map(size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : bhopscotch_map(bucket_count, hash, KeyEqual(), alloc) {} + + explicit bhopscotch_map(const Allocator& alloc) + : bhopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {} + + template + bhopscotch_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : bhopscotch_map(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + bhopscotch_map(InputIt first, InputIt last, size_type bucket_count, + const Allocator& alloc) + : bhopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + bhopscotch_map(InputIt first, InputIt last, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : bhopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + bhopscotch_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : bhopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, + alloc) {} + + bhopscotch_map(std::initializer_list init, size_type bucket_count, + const Allocator& alloc) + : bhopscotch_map(init.begin(), init.end(), bucket_count, Hash(), + KeyEqual(), alloc) {} + + bhopscotch_map(std::initializer_list init, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : bhopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + bhopscotch_map& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + + template ::value>::type* = nullptr> + std::pair insert(P&& value) { + return m_ht.insert(std::forward

(value)); + } + + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + + template ::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return m_ht.insert(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to insert(hint, + * value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + return m_ht.try_emplace(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(bhopscotch_map& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + T& at(const Key& key) { return m_ht.at(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + T& at(const Key& key, std::size_t precalculated_hash) { + return m_ht.at(key, precalculated_hash); + } + + const T& at(const Key& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T& at(const Key& key, std::size_t precalculated_hash) const { + return m_ht.at(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + T& at(const K& key) { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + T& at(const K& key, std::size_t precalculated_hash) { + return m_ht.at(key, precalculated_hash); + } + + /** + * @copydoc at(const K& key) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + const T& at(const K& key) const { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + const T& at(const K& key, std::size_t precalculated_hash) const { + return m_ht.at(key, precalculated_hash); + } + + T& operator[](const Key& key) { return m_ht[key]; } + T& operator[](Key&& key) { return m_ht[std::move(key)]; } + + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type count(const K& key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + iterator find(const K& key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + key_compare key_comp() const { return m_ht.key_comp(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const bhopscotch_map& lhs, const bhopscotch_map& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs.first); + if (it_element_rhs == rhs.cend() || + element_lhs.second != it_element_rhs->second) { + return false; + } + } + + return true; + } + + friend bool operator!=(const bhopscotch_map& lhs, const bhopscotch_map& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(bhopscotch_map& lhs, bhopscotch_map& rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::bhopscotch_map`. + */ +template , + class KeyEqual = std::equal_to, class Compare = std::less, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, bool StoreHash = false> +using bhopscotch_pg_map = + bhopscotch_map; + +} // end namespace tsl + +#endif diff --git a/Sdk/External/HopscotchMap/bhopscotch_set.h b/Sdk/External/HopscotchMap/bhopscotch_set.h new file mode 100644 index 0000000..64716fb --- /dev/null +++ b/Sdk/External/HopscotchMap/bhopscotch_set.h @@ -0,0 +1,586 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_BHOPSCOTCH_SET_H +#define TSL_BHOPSCOTCH_SET_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_hash.h" + +namespace tsl { + +/** + * Similar to tsl::hopscotch_set but instead of using a list for overflowing + * elements it uses a binary search tree. It thus needs an additional template + * parameter Compare. Compare should be arithmetically coherent with KeyEqual. + * + * The binary search tree allows the set to have a worst-case scenario of O(log + * n) for search and delete, even if the hash function maps all the elements to + * the same bucket. For insert, the amortized worst case is O(log n), but the + * worst case is O(n) in case of rehash. + * + * This makes the set resistant to DoS attacks (but doesn't preclude you to have + * a good hash function, as an element in the bucket array is faster to retrieve + * than in the tree). + * + * @copydoc hopscotch_set + */ +template , + class KeyEqual = std::equal_to, class Compare = std::less, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class bhopscotch_set { + private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const { return key; } + + key_type& operator()(Key& key) { return key; } + }; + + using overflow_container_type = std::set; + using ht = tsl::detail_hopscotch_hash::hopscotch_hash< + Key, KeySelect, void, Hash, KeyEqual, Allocator, NeighborhoodSize, + StoreHash, GrowthPolicy, overflow_container_type>; + + public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using key_compare = Compare; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + bhopscotch_set() : bhopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {} + + explicit bhopscotch_set(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator(), + const Compare& comp = Compare()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, + comp) {} + + bhopscotch_set(size_type bucket_count, const Allocator& alloc) + : bhopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) {} + + bhopscotch_set(size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : bhopscotch_set(bucket_count, hash, KeyEqual(), alloc) {} + + explicit bhopscotch_set(const Allocator& alloc) + : bhopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {} + + template + bhopscotch_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : bhopscotch_set(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + bhopscotch_set(InputIt first, InputIt last, size_type bucket_count, + const Allocator& alloc) + : bhopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + bhopscotch_set(InputIt first, InputIt last, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : bhopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + bhopscotch_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : bhopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, + alloc) {} + + bhopscotch_set(std::initializer_list init, size_type bucket_count, + const Allocator& alloc) + : bhopscotch_set(init.begin(), init.end(), bucket_count, Hash(), + KeyEqual(), alloc) {} + + bhopscotch_set(std::initializer_list init, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : bhopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + bhopscotch_set& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to insert(hint, + * value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(bhopscotch_set& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type count(const K& key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + iterator find(const K& key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent and Compare::is_transparent exist. If so, K must + * be hashable and comparable to Key. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, class CP = Compare, + typename std::enable_if::value && + has_is_transparent::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + key_compare key_comp() const { return m_ht.key_comp(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const bhopscotch_set& lhs, const bhopscotch_set& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if (it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const bhopscotch_set& lhs, const bhopscotch_set& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(bhopscotch_set& lhs, bhopscotch_set& rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::bhopscotch_set`. + */ +template , + class KeyEqual = std::equal_to, class Compare = std::less, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, bool StoreHash = false> +using bhopscotch_pg_set = + bhopscotch_set; + +} // end namespace tsl + +#endif diff --git a/Sdk/External/HopscotchMap/hopscotch_growth_policy.h b/Sdk/External/HopscotchMap/hopscotch_growth_policy.h new file mode 100644 index 0000000..0e46386 --- /dev/null +++ b/Sdk/External/HopscotchMap/hopscotch_growth_policy.h @@ -0,0 +1,404 @@ +/** + * MIT License + * + * Copyright (c) 2018 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H +#define TSL_HOPSCOTCH_GROWTH_POLICY_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Only activate tsl_hh_assert if TSL_DEBUG is defined. + * This way we avoid the performance hit when NDEBUG is not defined with assert + * as tsl_hh_assert is used a lot (people usually compile with "-O3" and not + * "-O3 -DNDEBUG"). + */ +#ifdef TSL_DEBUG +#define tsl_hh_assert(expr) assert(expr) +#else +#define tsl_hh_assert(expr) (static_cast(0)) +#endif + +/** + * If exceptions are enabled, throw the exception passed in parameter, otherwise + * call std::terminate. + */ +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \ + (defined(_MSC_VER) && defined(_CPPUNWIND))) && \ + !defined(TSL_NO_EXCEPTIONS) +#define TSL_HH_THROW_OR_TERMINATE(ex, msg) throw ex(msg) +#else +#define TSL_HH_NO_EXCEPTIONS +#ifdef NDEBUG +#define TSL_HH_THROW_OR_TERMINATE(ex, msg) std::terminate() +#else +#include +#define TSL_HH_THROW_OR_TERMINATE(ex, msg) \ + do { \ + std::cerr << msg << std::endl; \ + std::terminate(); \ + } while (0) +#endif +#endif + +namespace tsl { +namespace hh { + +/** + * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a + * power of two. It allows the table to use a mask operation instead of a modulo + * operation to map a hash to a bucket. + * + * GrowthFactor must be a power of two >= 2. + */ +template +class power_of_two_growth_policy { + public: + /** + * Called on the hash table creation and on rehash. The number of buckets for + * the table is passed in parameter. This number is a minimum, the policy may + * update this value with a higher value if needed (but not lower). + * + * If 0 is given, min_bucket_count_in_out must still be 0 after the policy + * creation and bucket_for_hash must always return 0 in this case. + */ + explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { + if (min_bucket_count_in_out > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + if (min_bucket_count_in_out > 0) { + min_bucket_count_in_out = + round_up_to_power_of_two(min_bucket_count_in_out); + m_mask = min_bucket_count_in_out - 1; + } else { + m_mask = 0; + } + } + + /** + * Return the bucket [0, bucket_count()) to which the hash belongs. + * If bucket_count() is 0, it must always return 0. + */ + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash & m_mask; + } + + /** + * Return the bucket count to use when the bucket array grows on rehash. + */ + std::size_t next_bucket_count() const { + if ((m_mask + 1) > max_bucket_count() / GrowthFactor) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + return (m_mask + 1) * GrowthFactor; + } + + /** + * Return the maximum number of buckets supported by the policy. + */ + std::size_t max_bucket_count() const { + // Largest power of two. + return (std::numeric_limits::max() / 2) + 1; + } + + /** + * Reset the growth policy as if it was created with a bucket count of 0. + * After a clear, the policy must always return 0 when bucket_for_hash is + * called. + */ + void clear() noexcept { m_mask = 0; } + + private: + static std::size_t round_up_to_power_of_two(std::size_t value) { + if (is_power_of_two(value)) { + return value; + } + + if (value == 0) { + return 1; + } + + --value; + for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { + value |= value >> i; + } + + return value + 1; + } + + static constexpr bool is_power_of_two(std::size_t value) { + return value != 0 && (value & (value - 1)) == 0; + } + + private: + static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, + "GrowthFactor must be a power of two >= 2."); + + std::size_t m_mask; +}; + +/** + * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo + * to map a hash to a bucket. Slower but it can be useful if you want a slower + * growth. + */ +template > +class mod_growth_policy { + public: + explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { + if (min_bucket_count_in_out > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + if (min_bucket_count_in_out > 0) { + m_mod = min_bucket_count_in_out; + } else { + m_mod = 1; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash % m_mod; + } + + std::size_t next_bucket_count() const { + if (m_mod == max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + const double next_bucket_count = + std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); + if (!std::isnormal(next_bucket_count)) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + if (next_bucket_count > double(max_bucket_count())) { + return max_bucket_count(); + } else { + return std::size_t(next_bucket_count); + } + } + + std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; } + + void clear() noexcept { m_mod = 1; } + + private: + static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = + 1.0 * GrowthFactor::num / GrowthFactor::den; + static const std::size_t MAX_BUCKET_COUNT = + std::size_t(double(std::numeric_limits::max() / + REHASH_SIZE_MULTIPLICATION_FACTOR)); + + static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, + "Growth factor should be >= 1.1."); + + std::size_t m_mod; +}; + +namespace detail { + +#if SIZE_MAX >= ULLONG_MAX +#define TSL_HH_NB_PRIMES 51 +#elif SIZE_MAX >= ULONG_MAX +#define TSL_HH_NB_PRIMES 40 +#else +#define TSL_HH_NB_PRIMES 23 +#endif + +static constexpr const std::array PRIMES = {{ + 1u, + 5u, + 17u, + 29u, + 37u, + 53u, + 67u, + 79u, + 97u, + 131u, + 193u, + 257u, + 389u, + 521u, + 769u, + 1031u, + 1543u, + 2053u, + 3079u, + 6151u, + 12289u, + 24593u, + 49157u, +#if SIZE_MAX >= ULONG_MAX + 98317ul, + 196613ul, + 393241ul, + 786433ul, + 1572869ul, + 3145739ul, + 6291469ul, + 12582917ul, + 25165843ul, + 50331653ul, + 100663319ul, + 201326611ul, + 402653189ul, + 805306457ul, + 1610612741ul, + 3221225473ul, + 4294967291ul, +#endif +#if SIZE_MAX >= ULLONG_MAX + 6442450939ull, + 12884901893ull, + 25769803751ull, + 51539607551ull, + 103079215111ull, + 206158430209ull, + 412316860441ull, + 824633720831ull, + 1649267441651ull, + 3298534883309ull, + 6597069766657ull, +#endif +}}; + +template +static constexpr std::size_t mod(std::size_t hash) { + return hash % PRIMES[IPrime]; +} + +// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for +// faster modulo as the compiler can optimize the modulo code better with a +// constant known at the compilation. +static constexpr const std::array + MOD_PRIME = {{ + &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, + &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>, + &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, + &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, +#if SIZE_MAX >= ULONG_MAX + &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, + &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>, + &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>, +#endif +#if SIZE_MAX >= ULLONG_MAX + &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, + &mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>, +#endif + }}; + +} // namespace detail + +/** + * Grow the hash table by using prime numbers as bucket count. Slower than + * tsl::hh::power_of_two_growth_policy in general but will probably distribute + * the values around better in the buckets with a poor hash function. + * + * To allow the compiler to optimize the modulo operation, a lookup table is + * used with constant primes numbers. + * + * With a switch the code would look like: + * \code + * switch(iprime) { // iprime is the current prime of the hash table + * case 0: hash % 5ul; + * break; + * case 1: hash % 17ul; + * break; + * case 2: hash % 29ul; + * break; + * ... + * } + * \endcode + * + * Due to the constant variable in the modulo the compiler is able to optimize + * the operation by a series of multiplications, substractions and shifts. + * + * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) + * * 5' in a 64 bits environment. + */ +class prime_growth_policy { + public: + explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { + auto it_prime = std::lower_bound( + detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out); + if (it_prime == detail::PRIMES.end()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + m_iprime = static_cast( + std::distance(detail::PRIMES.begin(), it_prime)); + if (min_bucket_count_in_out > 0) { + min_bucket_count_in_out = *it_prime; + } else { + min_bucket_count_in_out = 0; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return detail::MOD_PRIME[m_iprime](hash); + } + + std::size_t next_bucket_count() const { + if (m_iprime + 1 >= detail::PRIMES.size()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The hash table exceeds its maximum size."); + } + + return detail::PRIMES[m_iprime + 1]; + } + + std::size_t max_bucket_count() const { return detail::PRIMES.back(); } + + void clear() noexcept { m_iprime = 0; } + + private: + unsigned int m_iprime; + + static_assert(std::numeric_limits::max() >= + detail::PRIMES.size(), + "The type of m_iprime is not big enough."); +}; + +} // namespace hh +} // namespace tsl + +#endif diff --git a/Sdk/External/HopscotchMap/hopscotch_hash.h b/Sdk/External/HopscotchMap/hopscotch_hash.h new file mode 100644 index 0000000..ad4f58e --- /dev/null +++ b/Sdk/External/HopscotchMap/hopscotch_hash.h @@ -0,0 +1,1883 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_HASH_H +#define TSL_HOPSCOTCH_HASH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_growth_policy.h" + +#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)) +#define TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR +#endif + +namespace tsl { +namespace detail_hopscotch_hash { + +template +struct make_void { + using type = void; +}; + +template +struct has_is_transparent : std::false_type {}; + +template +struct has_is_transparent::type> + : std::true_type {}; + +template +struct has_key_compare : std::false_type {}; + +template +struct has_key_compare::type> + : std::true_type {}; + +template +struct is_power_of_two_policy : std::false_type {}; + +template +struct is_power_of_two_policy> + : std::true_type {}; + +template +static T numeric_cast(U value, + const char* error_message = "numeric_cast() failed.") { + T ret = static_cast(value); + if (static_cast(ret) != value) { + TSL_HH_THROW_OR_TERMINATE(std::runtime_error, error_message); + } + + const bool is_same_signedness = + (std::is_unsigned::value && std::is_unsigned::value) || + (std::is_signed::value && std::is_signed::value); + if (!is_same_signedness && (ret < T{}) != (value < U{})) { + TSL_HH_THROW_OR_TERMINATE(std::runtime_error, error_message); + } + + return ret; +} + +/* + * smallest_type_for_min_bits::type returns the smallest type that can fit + * MinBits. + */ +static const std::size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64; +template +class smallest_type_for_min_bits {}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 0) && (MinBits <= 8)>::type> { + public: + using type = std::uint_least8_t; +}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 8) && (MinBits <= 16)>::type> { + public: + using type = std::uint_least16_t; +}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 16) && (MinBits <= 32)>::type> { + public: + using type = std::uint_least32_t; +}; + +template +class smallest_type_for_min_bits< + MinBits, typename std::enable_if<(MinBits > 32) && (MinBits <= 64)>::type> { + public: + using type = std::uint_least64_t; +}; + +/* + * Each bucket may store up to three elements: + * - An aligned storage to store a value_type object with placement-new. + * - An (optional) hash of the value in the bucket. + * - An unsigned integer of type neighborhood_bitmap used to tell us which + * buckets in the neighborhood of the current bucket contain a value with a hash + * belonging to the current bucket. + * + * For a bucket 'bct', a bit 'i' (counting from 0 and from the least significant + * bit to the most significant) set to 1 means that the bucket 'bct + i' + * contains a value with a hash belonging to bucket 'bct'. The bits used for + * that, start from the third least significant bit. The two least significant + * bits are reserved: + * - The least significant bit is set to 1 if there is a value in the bucket + * storage. + * - The second least significant bit is set to 1 if there is an overflow. More + * than NeighborhoodSize values give the same hash, all overflow values are + * stored in the m_overflow_elements list of the map. + * + * Details regarding hopscotch hashing an its implementation can be found here: + * https://tessil.github.io/2016/08/29/hopscotch-hashing.html + */ +static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; + +using truncated_hash_type = std::uint_least32_t; + +/** + * Helper class that stores a truncated hash if StoreHash is true and nothing + * otherwise. + */ +template +class hopscotch_bucket_hash { + public: + bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { return true; } + + truncated_hash_type truncated_bucket_hash() const noexcept { return 0; } + + protected: + void copy_hash(const hopscotch_bucket_hash&) noexcept {} + + void set_hash(truncated_hash_type /*hash*/) noexcept {} +}; + +template <> +class hopscotch_bucket_hash { + public: + bool bucket_hash_equal(std::size_t hash) const noexcept { + return m_hash == truncated_hash_type(hash); + } + + truncated_hash_type truncated_bucket_hash() const noexcept { return m_hash; } + + protected: + void copy_hash(const hopscotch_bucket_hash& bucket) noexcept { + m_hash = bucket.m_hash; + } + + void set_hash(truncated_hash_type hash) noexcept { m_hash = hash; } + + private: + truncated_hash_type m_hash; +}; + +template +class hopscotch_bucket : public hopscotch_bucket_hash { + private: + static const std::size_t MIN_NEIGHBORHOOD_SIZE = 4; + static const std::size_t MAX_NEIGHBORHOOD_SIZE = + SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; + + static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4."); + // We can't put a variable in the message, ensure coherence + static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); + + static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62."); + // We can't put a variable in the message, ensure coherence + static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); + + static_assert(!StoreHash || NeighborhoodSize <= 30, + "NeighborhoodSize should be <= 30 if StoreHash is true."); + // We can't put a variable in the message, ensure coherence + static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, ""); + + using bucket_hash = hopscotch_bucket_hash; + + public: + using value_type = ValueType; + using neighborhood_bitmap = typename smallest_type_for_min_bits< + NeighborhoodSize + NB_RESERVED_BITS_IN_NEIGHBORHOOD>::type; + + hopscotch_bucket() noexcept : bucket_hash(), m_neighborhood_infos(0) { + tsl_hh_assert(empty()); + } + + hopscotch_bucket(const hopscotch_bucket& bucket) noexcept( + std::is_nothrow_copy_constructible::value) + : bucket_hash(bucket), m_neighborhood_infos(0) { + if (!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) + value_type(bucket.value()); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + hopscotch_bucket(hopscotch_bucket&& bucket) noexcept( + std::is_nothrow_move_constructible::value) + : bucket_hash(std::move(bucket)), m_neighborhood_infos(0) { + if (!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) + value_type(std::move(bucket.value())); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + hopscotch_bucket& operator=(const hopscotch_bucket& bucket) noexcept( + std::is_nothrow_copy_constructible::value) { + if (this != &bucket) { + remove_value(); + + bucket_hash::operator=(bucket); + if (!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) + value_type(bucket.value()); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + return *this; + } + + hopscotch_bucket& operator=(hopscotch_bucket&&) = delete; + + ~hopscotch_bucket() noexcept { + if (!empty()) { + destroy_value(); + } + } + + neighborhood_bitmap neighborhood_infos() const noexcept { + return neighborhood_bitmap(m_neighborhood_infos >> + NB_RESERVED_BITS_IN_NEIGHBORHOOD); + } + + void set_overflow(bool has_overflow) noexcept { + if (has_overflow) { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2); + } else { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2); + } + } + + bool has_overflow() const noexcept { return (m_neighborhood_infos & 2) != 0; } + + bool empty() const noexcept { return (m_neighborhood_infos & 1) == 0; } + + void toggle_neighbor_presence(std::size_t ineighbor) noexcept { + tsl_hh_assert(ineighbor <= NeighborhoodSize); + m_neighborhood_infos = neighborhood_bitmap( + m_neighborhood_infos ^ + (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD))); + } + + bool check_neighbor_presence(std::size_t ineighbor) const noexcept { + tsl_hh_assert(ineighbor <= NeighborhoodSize); + if (((m_neighborhood_infos >> + (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & + 1) == 1) { + return true; + } + + return false; + } + + value_type& value() noexcept { + tsl_hh_assert(!empty()); + return *reinterpret_cast(std::addressof(m_value)); + } + + const value_type& value() const noexcept { + tsl_hh_assert(!empty()); + return *reinterpret_cast(std::addressof(m_value)); + } + + template + void set_value_of_empty_bucket(truncated_hash_type hash, + Args&&... value_type_args) { + tsl_hh_assert(empty()); + + ::new (static_cast(std::addressof(m_value))) + value_type(std::forward(value_type_args)...); + set_empty(false); + this->set_hash(hash); + } + + void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) { + tsl_hh_assert(empty_bucket.empty()); + if (!empty()) { + ::new (static_cast(std::addressof(empty_bucket.m_value))) + value_type(std::move(value())); + empty_bucket.copy_hash(*this); + empty_bucket.set_empty(false); + + destroy_value(); + set_empty(true); + } + } + + void remove_value() noexcept { + if (!empty()) { + destroy_value(); + set_empty(true); + } + } + + void clear() noexcept { + if (!empty()) { + destroy_value(); + } + + m_neighborhood_infos = 0; + tsl_hh_assert(empty()); + } + + static truncated_hash_type truncate_hash(std::size_t hash) noexcept { + return truncated_hash_type(hash); + } + + private: + void set_empty(bool is_empty) noexcept { + if (is_empty) { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1); + } else { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1); + } + } + + void destroy_value() noexcept { + tsl_hh_assert(!empty()); + value().~value_type(); + } + + private: + using storage = typename std::aligned_storage::type; + + neighborhood_bitmap m_neighborhood_infos; + storage m_value; +}; + +/** + * Internal common class used by (b)hopscotch_map and (b)hopscotch_set. + * + * ValueType is what will be stored by hopscotch_hash (usually std::pair + * for a map and Key for a set). + * + * KeySelect should be a FunctionObject which takes a ValueType in parameter and + * returns a reference to the key. + * + * ValueSelect should be a FunctionObject which takes a ValueType in parameter + * and returns a reference to the value. ValueSelect should be void if there is + * no value (in a set for example). + * + * OverflowContainer will be used as containers for overflown elements. Usually + * it should be a list or a set/map. + */ +template +class hopscotch_hash : private Hash, private KeyEqual, private GrowthPolicy { + private: + template + using has_mapped_type = + typename std::integral_constant::value>; + + static_assert( + noexcept(std::declval().bucket_for_hash(std::size_t(0))), + "GrowthPolicy::bucket_for_hash must be noexcept."); + static_assert(noexcept(std::declval().clear()), + "GrowthPolicy::clear must be noexcept."); + + public: + template + class hopscotch_iterator; + + using key_type = typename KeySelect::key_type; + using value_type = ValueType; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = hopscotch_iterator; + using const_iterator = hopscotch_iterator; + + private: + using hopscotch_bucket = + tsl::detail_hopscotch_hash::hopscotch_bucket; + using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap; + + using buckets_allocator = typename std::allocator_traits< + allocator_type>::template rebind_alloc; + using buckets_container_type = + std::vector; + + using overflow_container_type = OverflowContainer; + + static_assert(std::is_same::value, + "OverflowContainer should have ValueType as type."); + + static_assert(std::is_same::value, + "Invalid allocator, not the same type as the value_type."); + + using iterator_buckets = typename buckets_container_type::iterator; + using const_iterator_buckets = + typename buckets_container_type::const_iterator; + + using iterator_overflow = typename overflow_container_type::iterator; + using const_iterator_overflow = + typename overflow_container_type::const_iterator; + + public: + /** + * The `operator*()` and `operator->()` methods return a const reference and + * const pointer respectively to the stored value type. + * + * In case of a map, to get a modifiable reference to the value associated to + * a key (the `.second` in the stored pair), you have to call `value()`. + */ + template + class hopscotch_iterator { + friend class hopscotch_hash; + + private: + using iterator_bucket = typename std::conditional< + IsConst, typename hopscotch_hash::const_iterator_buckets, + typename hopscotch_hash::iterator_buckets>::type; + using iterator_overflow = typename std::conditional< + IsConst, typename hopscotch_hash::const_iterator_overflow, + typename hopscotch_hash::iterator_overflow>::type; + + hopscotch_iterator(iterator_bucket buckets_iterator, + iterator_bucket buckets_end_iterator, + iterator_overflow overflow_iterator) noexcept + : m_buckets_iterator(buckets_iterator), + m_buckets_end_iterator(buckets_end_iterator), + m_overflow_iterator(overflow_iterator) {} + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = const typename hopscotch_hash::value_type; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + + hopscotch_iterator() noexcept {} + + // Copy constructor from iterator to const_iterator. + template ::type* = nullptr> + hopscotch_iterator(const hopscotch_iterator& other) noexcept + : m_buckets_iterator(other.m_buckets_iterator), + m_buckets_end_iterator(other.m_buckets_end_iterator), + m_overflow_iterator(other.m_overflow_iterator) {} + + hopscotch_iterator(const hopscotch_iterator& other) = default; + hopscotch_iterator(hopscotch_iterator&& other) = default; + hopscotch_iterator& operator=(const hopscotch_iterator& other) = default; + hopscotch_iterator& operator=(hopscotch_iterator&& other) = default; + + const typename hopscotch_hash::key_type& key() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return KeySelect()(m_buckets_iterator->value()); + } + + return KeySelect()(*m_overflow_iterator); + } + + template < + class U = ValueSelect, + typename std::enable_if::value>::type* = nullptr> + typename std::conditional::type + value() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return U()(m_buckets_iterator->value()); + } + + return U()(*m_overflow_iterator); + } + + reference operator*() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return m_buckets_iterator->value(); + } + + return *m_overflow_iterator; + } + + pointer operator->() const { + if (m_buckets_iterator != m_buckets_end_iterator) { + return std::addressof(m_buckets_iterator->value()); + } + + return std::addressof(*m_overflow_iterator); + } + + hopscotch_iterator& operator++() { + if (m_buckets_iterator == m_buckets_end_iterator) { + ++m_overflow_iterator; + return *this; + } + + do { + ++m_buckets_iterator; + } while (m_buckets_iterator != m_buckets_end_iterator && + m_buckets_iterator->empty()); + + return *this; + } + + hopscotch_iterator operator++(int) { + hopscotch_iterator tmp(*this); + ++*this; + + return tmp; + } + + friend bool operator==(const hopscotch_iterator& lhs, + const hopscotch_iterator& rhs) { + return lhs.m_buckets_iterator == rhs.m_buckets_iterator && + lhs.m_overflow_iterator == rhs.m_overflow_iterator; + } + + friend bool operator!=(const hopscotch_iterator& lhs, + const hopscotch_iterator& rhs) { + return !(lhs == rhs); + } + + private: + iterator_bucket m_buckets_iterator; + iterator_bucket m_buckets_end_iterator; + iterator_overflow m_overflow_iterator; + }; + + public: + template < + class OC = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + hopscotch_hash(size_type bucket_count, const Hash& hash, + const KeyEqual& equal, const Allocator& alloc, + float max_load_factor) + : Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets_data(alloc), + m_overflow_elements(alloc), + m_buckets(static_empty_bucket_ptr()), + m_nb_elements(0) { + if (bucket_count > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The map exceeds its maximum size."); + } + + if (bucket_count > 0) { + static_assert(NeighborhoodSize - 1 > 0, ""); + + // Can't directly construct with the appropriate size in the initializer + // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8 + m_buckets_data.resize(bucket_count + NeighborhoodSize - 1); + m_buckets = m_buckets_data.data(); + } + + this->max_load_factor(max_load_factor); + + // Check in the constructor instead of outside of a function to avoid + // compilation issues when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "value_type must be either copy constructible or nothrow " + "move constructible."); + } + + template < + class OC = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + hopscotch_hash(size_type bucket_count, const Hash& hash, + const KeyEqual& equal, const Allocator& alloc, + float max_load_factor, const typename OC::key_compare& comp) + : Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets_data(alloc), + m_overflow_elements(comp, alloc), + m_buckets(static_empty_bucket_ptr()), + m_nb_elements(0) { + if (bucket_count > max_bucket_count()) { + TSL_HH_THROW_OR_TERMINATE(std::length_error, + "The map exceeds its maximum size."); + } + + if (bucket_count > 0) { + static_assert(NeighborhoodSize - 1 > 0, ""); + + // Can't directly construct with the appropriate size in the initializer + // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8 + m_buckets_data.resize(bucket_count + NeighborhoodSize - 1); + m_buckets = m_buckets_data.data(); + } + + this->max_load_factor(max_load_factor); + + // Check in the constructor instead of outside of a function to avoid + // compilation issues when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "value_type must be either copy constructible or nothrow " + "move constructible."); + } + + hopscotch_hash(const hopscotch_hash& other) + : Hash(other), + KeyEqual(other), + GrowthPolicy(other), + m_buckets_data(other.m_buckets_data), + m_overflow_elements(other.m_overflow_elements), + m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() + : m_buckets_data.data()), + m_nb_elements(other.m_nb_elements), + m_min_load_threshold_rehash(other.m_min_load_threshold_rehash), + m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), + m_max_load_factor(other.m_max_load_factor) {} + + hopscotch_hash(hopscotch_hash&& other) noexcept( + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& std:: + is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible< + overflow_container_type>::value) + : Hash(std::move(static_cast(other))), + KeyEqual(std::move(static_cast(other))), + GrowthPolicy(std::move(static_cast(other))), + m_buckets_data(std::move(other.m_buckets_data)), + m_overflow_elements(std::move(other.m_overflow_elements)), + m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() + : m_buckets_data.data()), + m_nb_elements(other.m_nb_elements), + m_min_load_threshold_rehash(other.m_min_load_threshold_rehash), + m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), + m_max_load_factor(other.m_max_load_factor) { + other.GrowthPolicy::clear(); + other.m_buckets_data.clear(); + other.m_overflow_elements.clear(); + other.m_buckets = static_empty_bucket_ptr(); + other.m_nb_elements = 0; + other.m_min_load_threshold_rehash = 0; + other.m_max_load_threshold_rehash = 0; + } + + hopscotch_hash& operator=(const hopscotch_hash& other) { + if (&other != this) { + Hash::operator=(other); + KeyEqual::operator=(other); + GrowthPolicy::operator=(other); + + m_buckets_data = other.m_buckets_data; + m_overflow_elements = other.m_overflow_elements; + m_buckets = m_buckets_data.empty() ? static_empty_bucket_ptr() + : m_buckets_data.data(); + m_nb_elements = other.m_nb_elements; + + m_min_load_threshold_rehash = other.m_min_load_threshold_rehash; + m_max_load_threshold_rehash = other.m_max_load_threshold_rehash; + m_max_load_factor = other.m_max_load_factor; + } + + return *this; + } + + hopscotch_hash& operator=(hopscotch_hash&& other) { + other.swap(*this); + other.clear(); + + return *this; + } + + allocator_type get_allocator() const { + return m_buckets_data.get_allocator(); + } + + /* + * Iterators + */ + iterator begin() noexcept { + auto begin = m_buckets_data.begin(); + while (begin != m_buckets_data.end() && begin->empty()) { + ++begin; + } + + return iterator(begin, m_buckets_data.end(), m_overflow_elements.begin()); + } + + const_iterator begin() const noexcept { return cbegin(); } + + const_iterator cbegin() const noexcept { + auto begin = m_buckets_data.cbegin(); + while (begin != m_buckets_data.cend() && begin->empty()) { + ++begin; + } + + return const_iterator(begin, m_buckets_data.cend(), + m_overflow_elements.cbegin()); + } + + iterator end() noexcept { + return iterator(m_buckets_data.end(), m_buckets_data.end(), + m_overflow_elements.end()); + } + + const_iterator end() const noexcept { return cend(); } + + const_iterator cend() const noexcept { + return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), + m_overflow_elements.cend()); + } + + /* + * Capacity + */ + bool empty() const noexcept { return m_nb_elements == 0; } + + size_type size() const noexcept { return m_nb_elements; } + + size_type max_size() const noexcept { return m_buckets_data.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { + for (auto& bucket : m_buckets_data) { + bucket.clear(); + } + + m_overflow_elements.clear(); + m_nb_elements = 0; + } + + std::pair insert(const value_type& value) { + return insert_impl(value); + } + + template ::value>::type* = nullptr> + std::pair insert(P&& value) { + return insert_impl(value_type(std::forward

(value))); + } + + std::pair insert(value_type&& value) { + return insert_impl(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + if (hint != cend() && + compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(value).first; + } + + template ::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return emplace_hint(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + if (hint != cend() && + compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(std::move(value)).first; + } + + template + void insert(InputIt first, InputIt last) { + if (std::is_base_of< + std::forward_iterator_tag, + typename std::iterator_traits::iterator_category>::value) { + const auto nb_elements_insert = std::distance(first, last); + const std::size_t nb_elements_in_buckets = + m_nb_elements - m_overflow_elements.size(); + const std::size_t nb_free_buckets = + m_max_load_threshold_rehash - nb_elements_in_buckets; + tsl_hh_assert(m_nb_elements >= m_overflow_elements.size()); + tsl_hh_assert(m_max_load_threshold_rehash >= nb_elements_in_buckets); + + if (nb_elements_insert > 0 && + nb_free_buckets < std::size_t(nb_elements_insert)) { + reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert)); + } + } + + for (; first != last; ++first) { + insert(*first); + } + } + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return insert_or_assign_impl(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return insert_or_assign_impl(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(k, std::forward(obj)).first; + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(std::move(k), std::forward(obj)).first; + } + + template + std::pair emplace(Args&&... args) { + return insert(value_type(std::forward(args)...)); + } + + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return insert(hint, value_type(std::forward(args)...)); + } + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return try_emplace_impl(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return try_emplace_impl(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + return mutable_iterator(hint); + } + + return try_emplace(k, std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + if (hint != cend() && compare_keys(KeySelect()(*hint), k)) { + return mutable_iterator(hint); + } + + return try_emplace(std::move(k), std::forward(args)...).first; + } + + /** + * Here to avoid `template size_type erase(const K& key)` being used + * when we use an iterator instead of a const_iterator. + */ + iterator erase(iterator pos) { return erase(const_iterator(pos)); } + + iterator erase(const_iterator pos) { + const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key())); + + if (pos.m_buckets_iterator != pos.m_buckets_end_iterator) { + auto it_bucket = + m_buckets_data.begin() + + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator); + erase_from_bucket(*it_bucket, ibucket_for_hash); + + return ++iterator(it_bucket, m_buckets_data.end(), + m_overflow_elements.begin()); + } else { + auto it_next_overflow = + erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash); + return iterator(m_buckets_data.end(), m_buckets_data.end(), + it_next_overflow); + } + } + + iterator erase(const_iterator first, const_iterator last) { + if (first == last) { + return mutable_iterator(first); + } + + auto to_delete = erase(first); + while (to_delete != last) { + to_delete = erase(to_delete); + } + + return to_delete; + } + + template + size_type erase(const K& key) { + return erase(key, hash_key(key)); + } + + template + size_type erase(const K& key, std::size_t hash) { + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, m_buckets + ibucket_for_hash); + if (bucket_found != nullptr) { + erase_from_bucket(*bucket_found, ibucket_for_hash); + + return 1; + } + + if (m_buckets[ibucket_for_hash].has_overflow()) { + auto it_overflow = find_in_overflow(key); + if (it_overflow != m_overflow_elements.end()) { + erase_from_overflow(it_overflow, ibucket_for_hash); + + return 1; + } + } + + return 0; + } + + void swap(hopscotch_hash& other) { + using std::swap; + + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(m_buckets_data, other.m_buckets_data); + swap(m_overflow_elements, other.m_overflow_elements); + swap(m_buckets, other.m_buckets); + swap(m_nb_elements, other.m_nb_elements); + swap(m_min_load_threshold_rehash, other.m_min_load_threshold_rehash); + swap(m_max_load_threshold_rehash, other.m_max_load_threshold_rehash); + swap(m_max_load_factor, other.m_max_load_factor); + } + + /* + * Lookup + */ + template ::value>::type* = nullptr> + typename U::value_type& at(const K& key) { + return at(key, hash_key(key)); + } + + template ::value>::type* = nullptr> + typename U::value_type& at(const K& key, std::size_t hash) { + return const_cast( + static_cast(this)->at(key, hash)); + } + + template ::value>::type* = nullptr> + const typename U::value_type& at(const K& key) const { + return at(key, hash_key(key)); + } + + template ::value>::type* = nullptr> + const typename U::value_type& at(const K& key, std::size_t hash) const { + using T = typename U::value_type; + + const T* value = + find_value_impl(key, hash, m_buckets + bucket_for_hash(hash)); + if (value == nullptr) { + TSL_HH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key."); + } else { + return *value; + } + } + + template ::value>::type* = nullptr> + typename U::value_type& operator[](K&& key) { + using T = typename U::value_type; + + const std::size_t hash = hash_key(key); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + T* value = find_value_impl(key, hash, m_buckets + ibucket_for_hash); + if (value != nullptr) { + return *value; + } else { + return insert_value(ibucket_for_hash, hash, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple()) + .first.value(); + } + } + + template + size_type count(const K& key) const { + return count(key, hash_key(key)); + } + + template + size_type count(const K& key, std::size_t hash) const { + return count_impl(key, hash, m_buckets + bucket_for_hash(hash)); + } + + template + iterator find(const K& key) { + return find(key, hash_key(key)); + } + + template + iterator find(const K& key, std::size_t hash) { + return find_impl(key, hash, m_buckets + bucket_for_hash(hash)); + } + + template + const_iterator find(const K& key) const { + return find(key, hash_key(key)); + } + + template + const_iterator find(const K& key, std::size_t hash) const { + return find_impl(key, hash, m_buckets + bucket_for_hash(hash)); + } + + template + bool contains(const K& key) const { + return contains(key, hash_key(key)); + } + + template + bool contains(const K& key, std::size_t hash) const { + return count(key, hash) != 0; + } + + template + std::pair equal_range(const K& key) { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) { + iterator it = find(key, hash); + return std::make_pair(it, (it == end()) ? it : std::next(it)); + } + + template + std::pair equal_range(const K& key) const { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range( + const K& key, std::size_t hash) const { + const_iterator it = find(key, hash); + return std::make_pair(it, (it == cend()) ? it : std::next(it)); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { + /* + * So that the last bucket can have NeighborhoodSize neighbors, the size of + * the bucket array is a little bigger than the real number of buckets when + * not empty. We could use some of the buckets at the beginning, but it is + * faster this way as we avoid extra checks. + */ + if (m_buckets_data.empty()) { + return 0; + } + + return m_buckets_data.size() - NeighborhoodSize + 1; + } + + size_type max_bucket_count() const { + const std::size_t max_bucket_count = + std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size()); + return max_bucket_count - NeighborhoodSize + 1; + } + + /* + * Hash policy + */ + float load_factor() const { + if (bucket_count() == 0) { + return 0; + } + + return float(m_nb_elements) / float(bucket_count()); + } + + float max_load_factor() const { return m_max_load_factor; } + + void max_load_factor(float ml) { + m_max_load_factor = std::max(0.1f, std::min(ml, 0.95f)); + m_min_load_threshold_rehash = + size_type(float(bucket_count()) * MIN_LOAD_FACTOR_FOR_REHASH); + m_max_load_threshold_rehash = + size_type(float(bucket_count()) * m_max_load_factor); + } + + void rehash(size_type count_) { + count_ = std::max(count_, + size_type(std::ceil(float(size()) / max_load_factor()))); + rehash_impl(count_); + } + + void reserve(size_type count_) { + rehash(size_type(std::ceil(float(count_) / max_load_factor()))); + } + + /* + * Observers + */ + hasher hash_function() const { return static_cast(*this); } + + key_equal key_eq() const { return static_cast(*this); } + + /* + * Other + */ + iterator mutable_iterator(const_iterator pos) { + if (pos.m_buckets_iterator != pos.m_buckets_end_iterator) { + // Get a non-const iterator + auto it = m_buckets_data.begin() + + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator); + return iterator(it, m_buckets_data.end(), m_overflow_elements.begin()); + } else { + // Get a non-const iterator + auto it = mutable_overflow_iterator(pos.m_overflow_iterator); + return iterator(m_buckets_data.end(), m_buckets_data.end(), it); + } + } + + size_type overflow_size() const noexcept { + return m_overflow_elements.size(); + } + + template ::value>::type* = nullptr> + typename U::key_compare key_comp() const { + return m_overflow_elements.key_comp(); + } + + private: + template + std::size_t hash_key(const K& key) const { + return Hash::operator()(key); + } + + template + bool compare_keys(const K1& key1, const K2& key2) const { + return KeyEqual::operator()(key1, key2); + } + + std::size_t bucket_for_hash(std::size_t hash) const { + const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); + tsl_hh_assert(bucket < m_buckets_data.size() || + (bucket == 0 && m_buckets_data.empty())); + + return bucket; + } + + template ::value>::type* = nullptr> + void rehash_impl(size_type count_) { + hopscotch_hash new_map = new_hopscotch_hash(count_); + + if (!m_overflow_elements.empty()) { + new_map.m_overflow_elements.swap(m_overflow_elements); + new_map.m_nb_elements += new_map.m_overflow_elements.size(); + + for (const value_type& value : new_map.m_overflow_elements) { + const std::size_t ibucket_for_hash = + new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value))); + new_map.m_buckets[ibucket_for_hash].set_overflow(true); + } + } + +#ifndef TSL_HH_NO_EXCEPTIONS + try { +#endif + const bool use_stored_hash = + USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for (auto it_bucket = m_buckets_data.begin(); + it_bucket != m_buckets_data.end(); ++it_bucket) { + if (it_bucket->empty()) { + continue; + } + + const std::size_t hash = + use_stored_hash ? it_bucket->truncated_bucket_hash() + : new_map.hash_key(KeySelect()(it_bucket->value())); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_value(ibucket_for_hash, hash, + std::move(it_bucket->value())); + + erase_from_bucket(*it_bucket, bucket_for_hash(hash)); + } +#ifndef TSL_HH_NO_EXCEPTIONS + } + /* + * The call to insert_value may throw an exception if an element is added to + * the overflow list and the memory allocation fails. Rollback the elements + * in this case. + */ + catch (...) { + m_overflow_elements.swap(new_map.m_overflow_elements); + + const bool use_stored_hash = + USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for (auto it_bucket = new_map.m_buckets_data.begin(); + it_bucket != new_map.m_buckets_data.end(); ++it_bucket) { + if (it_bucket->empty()) { + continue; + } + + const std::size_t hash = + use_stored_hash ? it_bucket->truncated_bucket_hash() + : hash_key(KeySelect()(it_bucket->value())); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // The elements we insert were not in the overflow list before the + // switch. They will not be go in the overflow list if we rollback the + // switch. + insert_value(ibucket_for_hash, hash, std::move(it_bucket->value())); + } + + throw; + } +#endif + + new_map.swap(*this); + } + + template ::value && + !std::is_nothrow_move_constructible::value>::type* = nullptr> + void rehash_impl(size_type count_) { + hopscotch_hash new_map = new_hopscotch_hash(count_); + + const bool use_stored_hash = + USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for (const hopscotch_bucket& bucket : m_buckets_data) { + if (bucket.empty()) { + continue; + } + + const std::size_t hash = + use_stored_hash ? bucket.truncated_bucket_hash() + : new_map.hash_key(KeySelect()(bucket.value())); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_value(ibucket_for_hash, hash, bucket.value()); + } + + for (const value_type& value : m_overflow_elements) { + const std::size_t hash = new_map.hash_key(KeySelect()(value)); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_value(ibucket_for_hash, hash, value); + } + + new_map.swap(*this); + } + +#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR + iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { + return std::next(m_overflow_elements.begin(), + std::distance(m_overflow_elements.cbegin(), it)); + } +#else + iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { + return m_overflow_elements.erase(it, it); + } +#endif + + // iterator is in overflow list + iterator_overflow erase_from_overflow(const_iterator_overflow pos, + std::size_t ibucket_for_hash) { +#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR + auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos)); +#else + auto it_next = m_overflow_elements.erase(pos); +#endif + m_nb_elements--; + + // Check if we can remove the overflow flag + tsl_hh_assert(m_buckets[ibucket_for_hash].has_overflow()); + for (const value_type& value : m_overflow_elements) { + const std::size_t bucket_for_value = + bucket_for_hash(hash_key(KeySelect()(value))); + if (bucket_for_value == ibucket_for_hash) { + return it_next; + } + } + + m_buckets[ibucket_for_hash].set_overflow(false); + return it_next; + } + + /** + * bucket_for_value is the bucket in which the value is. + * ibucket_for_hash is the bucket where the value belongs. + */ + void erase_from_bucket(hopscotch_bucket& bucket_for_value, + std::size_t ibucket_for_hash) noexcept { + const std::size_t ibucket_for_value = + std::distance(m_buckets_data.data(), &bucket_for_value); + tsl_hh_assert(ibucket_for_value >= ibucket_for_hash); + + bucket_for_value.remove_value(); + m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_value - + ibucket_for_hash); + m_nb_elements--; + } + + template + std::pair insert_or_assign_impl(K&& key, M&& obj) { + auto it = try_emplace_impl(std::forward(key), std::forward(obj)); + if (!it.second) { + it.first.value() = std::forward(obj); + } + + return it; + } + + template + std::pair try_emplace_impl(P&& key, Args&&... args_value) { + const std::size_t hash = hash_key(key); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // Check if already presents + auto it_find = find_impl(key, hash, m_buckets + ibucket_for_hash); + if (it_find != end()) { + return std::make_pair(it_find, false); + } + + return insert_value( + ibucket_for_hash, hash, std::piecewise_construct, + std::forward_as_tuple(std::forward

(key)), + std::forward_as_tuple(std::forward(args_value)...)); + } + + template + std::pair insert_impl(P&& value) { + const std::size_t hash = hash_key(KeySelect()(value)); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // Check if already presents + auto it_find = + find_impl(KeySelect()(value), hash, m_buckets + ibucket_for_hash); + if (it_find != end()) { + return std::make_pair(it_find, false); + } + + return insert_value(ibucket_for_hash, hash, std::forward

(value)); + } + + template + std::pair insert_value(std::size_t ibucket_for_hash, + std::size_t hash, + Args&&... value_type_args) { + if ((m_nb_elements - m_overflow_elements.size()) >= + m_max_load_threshold_rehash) { + rehash(GrowthPolicy::next_bucket_count()); + ibucket_for_hash = bucket_for_hash(hash); + } + + std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash); + if (ibucket_empty < m_buckets_data.size()) { + do { + tsl_hh_assert(ibucket_empty >= ibucket_for_hash); + + // Empty bucket is in range of NeighborhoodSize, use it + if (ibucket_empty - ibucket_for_hash < NeighborhoodSize) { + auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, hash, + std::forward(value_type_args)...); + return std::make_pair( + iterator(it, m_buckets_data.end(), m_overflow_elements.begin()), + true); + } + } + // else, try to swap values to get a closer empty bucket + while (swap_empty_bucket_closer(ibucket_empty)); + } + + // Load factor is too low or a rehash will not change the neighborhood, put + // the value in overflow list + if (size() < m_min_load_threshold_rehash || + !will_neighborhood_change_on_rehash(ibucket_for_hash)) { + auto it = insert_in_overflow(ibucket_for_hash, + std::forward(value_type_args)...); + return std::make_pair( + iterator(m_buckets_data.end(), m_buckets_data.end(), it), true); + } + + rehash(GrowthPolicy::next_bucket_count()); + ibucket_for_hash = bucket_for_hash(hash); + + return insert_value(ibucket_for_hash, hash, + std::forward(value_type_args)...); + } + + /* + * Return true if a rehash will change the position of a key-value in the + * neighborhood of ibucket_neighborhood_check. In this case a rehash is needed + * instead of puting the value in overflow list. + */ + bool will_neighborhood_change_on_rehash( + size_t ibucket_neighborhood_check) const { + std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count(); + GrowthPolicy expand_growth_policy(expand_bucket_count); + + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(expand_bucket_count); + for (size_t ibucket = ibucket_neighborhood_check; + ibucket < m_buckets_data.size() && + (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; + ++ibucket) { + tsl_hh_assert(!m_buckets[ibucket].empty()); + + const size_t hash = + use_stored_hash ? m_buckets[ibucket].truncated_bucket_hash() + : hash_key(KeySelect()(m_buckets[ibucket].value())); + if (bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) { + return true; + } + } + + return false; + } + + /* + * Return the index of an empty bucket in m_buckets_data. + * If none, the returned index equals m_buckets_data.size() + */ + std::size_t find_empty_bucket(std::size_t ibucket_start) const { + const std::size_t limit = std::min( + ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets_data.size()); + for (; ibucket_start < limit; ibucket_start++) { + if (m_buckets[ibucket_start].empty()) { + return ibucket_start; + } + } + + return m_buckets_data.size(); + } + + /* + * Insert value in ibucket_empty where value originally belongs to + * ibucket_for_hash + * + * Return bucket iterator to ibucket_empty + */ + template + iterator_buckets insert_in_bucket(std::size_t ibucket_empty, + std::size_t ibucket_for_hash, + std::size_t hash, + Args&&... value_type_args) { + tsl_hh_assert(ibucket_empty >= ibucket_for_hash); + tsl_hh_assert(m_buckets[ibucket_empty].empty()); + m_buckets[ibucket_empty].set_value_of_empty_bucket( + hopscotch_bucket::truncate_hash(hash), + std::forward(value_type_args)...); + + tsl_hh_assert(!m_buckets[ibucket_for_hash].empty()); + m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - + ibucket_for_hash); + m_nb_elements++; + + return m_buckets_data.begin() + ibucket_empty; + } + + template < + class... Args, class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, + Args&&... value_type_args) { + auto it = m_overflow_elements.emplace( + m_overflow_elements.end(), std::forward(value_type_args)...); + + m_buckets[ibucket_for_hash].set_overflow(true); + m_nb_elements++; + + return it; + } + + template ::value>::type* = nullptr> + iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, + Args&&... value_type_args) { + auto it = + m_overflow_elements.emplace(std::forward(value_type_args)...) + .first; + + m_buckets[ibucket_for_hash].set_overflow(true); + m_nb_elements++; + + return it; + } + + /* + * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it + * while keeping the neighborhood conditions correct. + * + * If a swap was possible, the position of ibucket_empty_in_out will be closer + * to 0 and true will re returned. + */ + bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) { + tsl_hh_assert(ibucket_empty_in_out >= NeighborhoodSize); + const std::size_t neighborhood_start = + ibucket_empty_in_out - NeighborhoodSize + 1; + + for (std::size_t to_check = neighborhood_start; + to_check < ibucket_empty_in_out; to_check++) { + neighborhood_bitmap neighborhood_infos = + m_buckets[to_check].neighborhood_infos(); + std::size_t to_swap = to_check; + + while (neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) { + if ((neighborhood_infos & 1) == 1) { + tsl_hh_assert(m_buckets[ibucket_empty_in_out].empty()); + tsl_hh_assert(!m_buckets[to_swap].empty()); + + m_buckets[to_swap].swap_value_into_empty_bucket( + m_buckets[ibucket_empty_in_out]); + + tsl_hh_assert(!m_buckets[to_check].check_neighbor_presence( + ibucket_empty_in_out - to_check)); + tsl_hh_assert( + m_buckets[to_check].check_neighbor_presence(to_swap - to_check)); + + m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - + to_check); + m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check); + + ibucket_empty_in_out = to_swap; + + return true; + } + + to_swap++; + neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); + } + } + + return false; + } + + template ::value>::type* = nullptr> + typename U::value_type* find_value_impl(const K& key, std::size_t hash, + hopscotch_bucket* bucket_for_hash) { + return const_cast( + static_cast(this)->find_value_impl( + key, hash, bucket_for_hash)); + } + + /* + * Avoid the creation of an iterator to just get the value for operator[] and + * at() in maps. Faster this way. + * + * Return null if no value for the key (TODO use std::optional when + * available). + */ + template ::value>::type* = nullptr> + const typename U::value_type* find_value_impl( + const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + const hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, bucket_for_hash); + if (bucket_found != nullptr) { + return std::addressof(ValueSelect()(bucket_found->value())); + } + + if (bucket_for_hash->has_overflow()) { + auto it_overflow = find_in_overflow(key); + if (it_overflow != m_overflow_elements.end()) { + return std::addressof(ValueSelect()(*it_overflow)); + } + } + + return nullptr; + } + + template + size_type count_impl(const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + if (find_in_buckets(key, hash, bucket_for_hash) != nullptr) { + return 1; + } else if (bucket_for_hash->has_overflow() && + find_in_overflow(key) != m_overflow_elements.cend()) { + return 1; + } else { + return 0; + } + } + + template + iterator find_impl(const K& key, std::size_t hash, + hopscotch_bucket* bucket_for_hash) { + hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, bucket_for_hash); + if (bucket_found != nullptr) { + return iterator(m_buckets_data.begin() + + std::distance(m_buckets_data.data(), bucket_found), + m_buckets_data.end(), m_overflow_elements.begin()); + } + + if (!bucket_for_hash->has_overflow()) { + return end(); + } + + return iterator(m_buckets_data.end(), m_buckets_data.end(), + find_in_overflow(key)); + } + + template + const_iterator find_impl(const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + const hopscotch_bucket* bucket_found = + find_in_buckets(key, hash, bucket_for_hash); + if (bucket_found != nullptr) { + return const_iterator( + m_buckets_data.cbegin() + + std::distance(m_buckets_data.data(), bucket_found), + m_buckets_data.cend(), m_overflow_elements.cbegin()); + } + + if (!bucket_for_hash->has_overflow()) { + return cend(); + } + + return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), + find_in_overflow(key)); + } + + template + hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, + hopscotch_bucket* bucket_for_hash) { + const hopscotch_bucket* bucket_found = + static_cast(this)->find_in_buckets( + key, hash, bucket_for_hash); + return const_cast(bucket_found); + } + + /** + * Return a pointer to the bucket which has the value, nullptr otherwise. + */ + template + const hopscotch_bucket* find_in_buckets( + const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const { + (void)hash; // Avoid warning of unused variable when StoreHash is false; + + // TODO Try to optimize the function. + // I tried to use ffs and __builtin_ffs functions but I could not reduce + // the time the function takes with -march=native + + neighborhood_bitmap neighborhood_infos = + bucket_for_hash->neighborhood_infos(); + while (neighborhood_infos != 0) { + if ((neighborhood_infos & 1) == 1) { + // Check StoreHash before calling bucket_hash_equal. Functionally it + // doesn't change anythin. If StoreHash is false, bucket_hash_equal is a + // no-op. Avoiding the call is there to help GCC optimizes `hash` + // parameter away, it seems to not be able to do without this hint. + if ((!StoreHash || bucket_for_hash->bucket_hash_equal(hash)) && + compare_keys(KeySelect()(bucket_for_hash->value()), key)) { + return bucket_for_hash; + } + } + + ++bucket_for_hash; + neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); + } + + return nullptr; + } + + template < + class K, class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + iterator_overflow find_in_overflow(const K& key) { + return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), + [&](const value_type& value) { + return compare_keys(key, KeySelect()(value)); + }); + } + + template < + class K, class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + const_iterator_overflow find_in_overflow(const K& key) const { + return std::find_if(m_overflow_elements.cbegin(), + m_overflow_elements.cend(), + [&](const value_type& value) { + return compare_keys(key, KeySelect()(value)); + }); + } + + template ::value>::type* = nullptr> + iterator_overflow find_in_overflow(const K& key) { + return m_overflow_elements.find(key); + } + + template ::value>::type* = nullptr> + const_iterator_overflow find_in_overflow(const K& key) const { + return m_overflow_elements.find(key); + } + + template < + class U = OverflowContainer, + typename std::enable_if::value>::type* = nullptr> + hopscotch_hash new_hopscotch_hash(size_type bucket_count) { + return hopscotch_hash(bucket_count, static_cast(*this), + static_cast(*this), get_allocator(), + m_max_load_factor); + } + + template ::value>::type* = nullptr> + hopscotch_hash new_hopscotch_hash(size_type bucket_count) { + return hopscotch_hash(bucket_count, static_cast(*this), + static_cast(*this), get_allocator(), + m_max_load_factor, m_overflow_elements.key_comp()); + } + + public: + static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0; + static constexpr float DEFAULT_MAX_LOAD_FACTOR = + (NeighborhoodSize <= 30) ? 0.8f : 0.9f; + + private: + static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12 * NeighborhoodSize; + static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.1f; + + /** + * We can only use the hash on rehash if the size of the hash type is the same + * as the stored one or if we use a power of two modulo. In the case of the + * power of two modulo, we just mask the least significant bytes, we just have + * to check that the truncated_hash_type didn't truncated too much bytes. + */ + template ::value>::type* = nullptr> + static bool USE_STORED_HASH_ON_REHASH(size_type /*bucket_count*/) { + return StoreHash; + } + + template ::value>::type* = nullptr> + static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { + (void)bucket_count; + if (StoreHash && is_power_of_two_policy::value) { + tsl_hh_assert(bucket_count > 0); + return (bucket_count - 1) <= + std::numeric_limits::max(); + } else { + return false; + } + } + + /** + * Return an always valid pointer to an static empty hopscotch_bucket. + */ + hopscotch_bucket* static_empty_bucket_ptr() { + static hopscotch_bucket empty_bucket; + return &empty_bucket; + } + + private: + buckets_container_type m_buckets_data; + overflow_container_type m_overflow_elements; + + /** + * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points + * to static_empty_bucket_ptr. This variable is useful to avoid the cost of + * checking if m_buckets_data is empty when trying to find an element. + * + * TODO Remove m_buckets_data and only use a pointer+size instead of a + * pointer+vector to save some space in the hopscotch_hash object. + */ + hopscotch_bucket* m_buckets; + + size_type m_nb_elements; + + /** + * Min size of the hash table before a rehash can occurs automatically (except + * if m_max_load_threshold_rehash os reached). If the neighborhood of a bucket + * is full before the min is reacher, the elements are put into + * m_overflow_elements. + */ + size_type m_min_load_threshold_rehash; + + /** + * Max size of the hash table before a rehash occurs automatically to grow the + * table. + */ + size_type m_max_load_threshold_rehash; + + float m_max_load_factor; +}; + +} // end namespace detail_hopscotch_hash + +} // end namespace tsl + +#endif diff --git a/Sdk/External/HopscotchMap/hopscotch_map.h b/Sdk/External/HopscotchMap/hopscotch_map.h new file mode 100644 index 0000000..15c9e39 --- /dev/null +++ b/Sdk/External/HopscotchMap/hopscotch_map.h @@ -0,0 +1,735 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_MAP_H +#define TSL_HOPSCOTCH_MAP_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_hash.h" + +namespace tsl { + +/** + * Implementation of a hash map using the hopscotch hashing algorithm. + * + * The Key and the value T must be either nothrow move-constructible, + * copy-constructible or both. + * + * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if + * StoreHash is false. When StoreHash is true, 32-bits of the hash will be + * stored alongside the neighborhood limiting the NeighborhoodSize to <= 30. + * There is no memory usage difference between 'NeighborhoodSize 62; StoreHash + * false' and 'NeighborhoodSize 30; StoreHash true'. + * + * Storing the hash may improve performance on insert during the rehash process + * if the hash takes time to compute. It may also improve read performance if + * the KeyEqual function takes time (or incurs a cache-miss). If used with + * simple Hash and KeyEqual it may slow things down. + * + * StoreHash can only be set if the GrowthPolicy is set to + * tsl::power_of_two_growth_policy. + * + * GrowthPolicy defines how the map grows and consequently how a hash value is + * mapped to a bucket. By default the map uses tsl::power_of_two_growth_policy. + * This policy keeps the number of buckets to a power of two and uses a mask to + * map the hash to a bucket instead of the slow modulo. You may define your own + * growth policy, check tsl::power_of_two_growth_policy for the interface. + * + * If the destructors of Key or T throw an exception, behaviour of the class is + * undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective + * insert, invalidate the iterators if a displacement is needed to resolve a + * collision (which mean that most of the time, insert will invalidate the + * iterators). Or if there is a rehash. + * - erase: iterator on the erased element is the only one which become + * invalid. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class hopscotch_map { + private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const std::pair& key_value) const { + return key_value.first; + } + + key_type& operator()(std::pair& key_value) { + return key_value.first; + } + }; + + class ValueSelect { + public: + using value_type = T; + + const value_type& operator()(const std::pair& key_value) const { + return key_value.second; + } + + value_type& operator()(std::pair& key_value) { + return key_value.second; + } + }; + + using overflow_container_type = std::list, Allocator>; + using ht = detail_hopscotch_hash::hopscotch_hash< + std::pair, KeySelect, ValueSelect, Hash, KeyEqual, Allocator, + NeighborhoodSize, StoreHash, GrowthPolicy, overflow_container_type>; + + public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + hopscotch_map() : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {} + + explicit hopscotch_map(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} + + hopscotch_map(size_type bucket_count, const Allocator& alloc) + : hopscotch_map(bucket_count, Hash(), KeyEqual(), alloc) {} + + hopscotch_map(size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : hopscotch_map(bucket_count, hash, KeyEqual(), alloc) {} + + explicit hopscotch_map(const Allocator& alloc) + : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {} + + template + hopscotch_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : hopscotch_map(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + hopscotch_map(InputIt first, InputIt last, size_type bucket_count, + const Allocator& alloc) + : hopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + hopscotch_map(InputIt first, InputIt last, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : hopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + hopscotch_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : hopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, + alloc) {} + + hopscotch_map(std::initializer_list init, size_type bucket_count, + const Allocator& alloc) + : hopscotch_map(init.begin(), init.end(), bucket_count, Hash(), + KeyEqual(), alloc) {} + + hopscotch_map(std::initializer_list init, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : hopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + hopscotch_map& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + + template ::value>::type* = nullptr> + std::pair insert(P&& value) { + return m_ht.insert(std::forward

(value)); + } + + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + + template ::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return m_ht.insert(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to insert(hint, + * value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + return m_ht.try_emplace(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type erase(const K& key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(hopscotch_map& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + T& at(const Key& key) { return m_ht.at(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + T& at(const Key& key, std::size_t precalculated_hash) { + return m_ht.at(key, precalculated_hash); + } + + const T& at(const Key& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T& at(const Key& key, std::size_t precalculated_hash) const { + return m_ht.at(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + T& at(const K& key) { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + T& at(const K& key, std::size_t precalculated_hash) { + return m_ht.at(key, precalculated_hash); + } + + /** + * @copydoc at(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const T& at(const K& key) const { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const T& at(const K& key, std::size_t precalculated_hash) const { + return m_ht.at(key, precalculated_hash); + } + + T& operator[](const Key& key) { return m_ht[key]; } + T& operator[](Key&& key) { return m_ht[std::move(key)]; } + + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type count(const K& key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + iterator find(const K& key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const_iterator find(const K& key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const hopscotch_map& lhs, const hopscotch_map& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs.first); + if (it_element_rhs == rhs.cend() || + element_lhs.second != it_element_rhs->second) { + return false; + } + } + + return true; + } + + friend bool operator!=(const hopscotch_map& lhs, const hopscotch_map& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(hopscotch_map& lhs, hopscotch_map& rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::hopscotch_map`. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, bool StoreHash = false> +using hopscotch_pg_map = + hopscotch_map; + +} // end namespace tsl + +#endif diff --git a/Sdk/External/HopscotchMap/hopscotch_set.h b/Sdk/External/HopscotchMap/hopscotch_set.h new file mode 100644 index 0000000..203b4b7 --- /dev/null +++ b/Sdk/External/HopscotchMap/hopscotch_set.h @@ -0,0 +1,592 @@ +/** + * MIT License + * + * Copyright (c) 2017 Thibaut Goetghebuer-Planchon + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_SET_H +#define TSL_HOPSCOTCH_SET_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hopscotch_hash.h" + +namespace tsl { + +/** + * Implementation of a hash set using the hopscotch hashing algorithm. + * + * The Key must be either nothrow move-constructible, copy-constructible or + * both. + * + * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if + * StoreHash is false. When StoreHash is true, 32-bits of the hash will be + * stored alongside the neighborhood limiting the NeighborhoodSize to <= 30. + * There is no memory usage difference between 'NeighborhoodSize 62; StoreHash + * false' and 'NeighborhoodSize 30; StoreHash true'. + * + * Storing the hash may improve performance on insert during the rehash process + * if the hash takes time to compute. It may also improve read performance if + * the KeyEqual function takes time (or incurs a cache-miss). If used with + * simple Hash and KeyEqual it may slow things down. + * + * StoreHash can only be set if the GrowthPolicy is set to + * tsl::power_of_two_growth_policy. + * + * GrowthPolicy defines how the set grows and consequently how a hash value is + * mapped to a bucket. By default the set uses tsl::power_of_two_growth_policy. + * This policy keeps the number of buckets to a power of two and uses a mask to + * set the hash to a bucket instead of the slow modulo. You may define your own + * growth policy, check tsl::power_of_two_growth_policy for the interface. + * + * If the destructor of Key throws an exception, behaviour of the class is + * undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective + * insert, invalidate the iterators if a displacement is needed to resolve a + * collision (which mean that most of the time, insert will invalidate the + * iterators). Or if there is a rehash. + * - erase: iterator on the erased element is the only one which become + * invalid. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class hopscotch_set { + private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const { return key; } + + key_type& operator()(Key& key) { return key; } + }; + + using overflow_container_type = std::list; + using ht = detail_hopscotch_hash::hopscotch_hash< + Key, KeySelect, void, Hash, KeyEqual, Allocator, NeighborhoodSize, + StoreHash, GrowthPolicy, overflow_container_type>; + + public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {} + + explicit hopscotch_set(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} + + hopscotch_set(size_type bucket_count, const Allocator& alloc) + : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) {} + + hopscotch_set(size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : hopscotch_set(bucket_count, hash, KeyEqual(), alloc) {} + + explicit hopscotch_set(const Allocator& alloc) + : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {} + + template + hopscotch_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : hopscotch_set(bucket_count, hash, equal, alloc) { + insert(first, last); + } + + template + hopscotch_set(InputIt first, InputIt last, size_type bucket_count, + const Allocator& alloc) + : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} + + template + hopscotch_set(InputIt first, InputIt last, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) {} + + hopscotch_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) + : hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, + alloc) {} + + hopscotch_set(std::initializer_list init, size_type bucket_count, + const Allocator& alloc) + : hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), + KeyEqual(), alloc) {} + + hopscotch_set(std::initializer_list init, size_type bucket_count, + const Hash& hash, const Allocator& alloc) + : hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), + alloc) {} + + hopscotch_set& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to insert(hint, + * value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type erase(const K& key) { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup to the value if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type count(const K& key) const { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + iterator find(const K& key) { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const_iterator find(const K& key) const { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const { return m_ht.contains(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key) const { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) { + return m_ht.equal_range(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key) { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key, + std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range(const K& key) const { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template < + class K, class KE = KeyEqual, + typename std::enable_if::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if (it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) { lhs.swap(rhs); } + + private: + ht m_ht; +}; + +/** + * Same as `tsl::hopscotch_set`. + */ +template , + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, bool StoreHash = false> +using hopscotch_pg_set = + hopscotch_set; + +} // end namespace tsl + +#endif diff --git a/Sdk/External/beehive/LICENSE b/Sdk/External/beehive/LICENSE new file mode 100644 index 0000000..03ce0ce --- /dev/null +++ b/Sdk/External/beehive/LICENSE @@ -0,0 +1,17 @@ +Copyright (c) 2018 Chris Bush + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. \ No newline at end of file diff --git a/Sdk/External/beehive/beehive.hpp b/Sdk/External/beehive/beehive.hpp new file mode 100644 index 0000000..10327ff --- /dev/null +++ b/Sdk/External/beehive/beehive.hpp @@ -0,0 +1,726 @@ +#ifndef BEEHIVE_BEHAVIOR_TREE_HPP +#define BEEHIVE_BEHAVIOR_TREE_HPP + +#include +#include +#include +#include +#include +#include +#include + +/*! + \file beehive.hpp +*/ + +namespace beehive +{ + +/*! + \brief The status returned by process functions. +*/ +enum class Status +{ + FAILURE = 0, //!< Returned when the process function has failed. + RUNNING, //!< Returned when the outcome of process has not been determined yet. + SUCCESS //!< Returns when the process has succeeded. +}; + +/*! + \brief Pass a TreeState instance to #beehive::Tree's process function in order to resume Running nodes. Instantiate with #beehive::Tree::make_state. +*/ +struct TreeState { + // For internal use only. + size_t resume_index() const { + return _resume_index; + } + // For internal use only. + size_t offset() const { + return _offset; + } +private: + TreeState(size_t tree_id): _tree_id(tree_id) {} + + size_t _tree_id; + size_t _resume_index{}; + size_t _offset{}; + + template + friend class Tree; + template + friend struct Node; +}; + +enum class NodeType : unsigned char +{ + None = 0, + + Root, + Leaf, + Selector, + Sequence, + Inverter, + Succeeder +}; + +/*! + \brief A handle on a process function. This should not be built directly, see #beehive::Builder. +*/ +template +struct Node +{ + using ProcessFunction = std::function; + + Node(ProcessFunction process, NodeType nodeType): _process(move(process)), _type(nodeType) {} + + Status process(C &context, TreeState &state) const + { + return _process(context, *this, state); + } + + size_t child_count() const { + return _child_count; + } + + size_t descendant_count() const { + // Only calculate on the first call + if (_descendant_count == 0 && _child_count > 0) { + _descendant_count = _child_count; + auto *child = first_child(); + for (size_t i = 0; i < _child_count; ++i) { + _descendant_count += child->descendant_count(); + child = child->next_sibling(); + } + } + return _descendant_count; + } + + void add_child() { + ++_child_count; + } + + Node const *first_child() const { + if (_child_count == 0) { + return nullptr; + } + // Tree nodes are stored contiguously in depth-first order. + // Therefore, first child is always current pointer plus 1. + return this + 1; + } + + Node const *next_sibling() const { + // Tree nodes are stored contiguously in depth-first order. + return this + descendant_count() + 1; + } + + /*! + \brief Returns this node's index in its tree. + */ + size_t index() const { + return _index; + } + + /*! + \brief Updates the given tree state so that the tree can resume at this (composite) node with the child generator starting at the given child index. + */ + void save_state_at_child_index(TreeState &state, size_t child_index) const { + if (_type == NodeType::Selector) + return; + + state._resume_index = index(); + assert(child_index < child_count()); + state._offset = child_index; + } + + /*! + \brief Clears the given tree state so that subsequent process() calls do not resume. + */ + void clear_state(TreeState &state) const { + state._resume_index = 0; + state._offset = 0; + } + +private: + template + friend class Tree; + + size_t _index{}; + size_t _child_count{}; + NodeType _type{}; + mutable size_t _descendant_count{}; + ProcessFunction _process; +}; + +template +using Generator = std::function const *()>; + +/*! + \brief Composites define how to run the process() function on the child range. + + The generator function returns the next child in the child array or nullptr after the + end of the child array. If the previous call to process() returned RUNNING status, + the first result of the generator will be the same child as was returned when the + previous called returned the RUNNING status. This allows composites to resume + where they left off. + + The child pointer returned is only valid within the scope of the composite function + body. +*/ +template +using Composite = std::function const &, TreeState &)>; + +/*! + \brief Composite that returns success if all children return success. +*/ +template +Status sequence(C &context, Generator const &next_child, TreeState &state) +{ + while (auto const *child = next_child()) + { + auto status = child->process(context, state); + if (status != Status::SUCCESS) + { + return status; + } + } + return Status::SUCCESS; +} + +/*! + \brief Composite that returns success on the first successful call. +*/ +template +Status selector(C &context, Generator const &next_child, TreeState &state) +{ + while (auto const *child = next_child()) + { + auto status = child->process(context, state); + if (status != Status::FAILURE) + { + return status; + } + } + return Status::FAILURE; +} + +/*! + \brief A decorator is a composite that may only have a single child. +*/ +template +using Decorator = std::function const &child, TreeState &state)>; + +/*! + \brief Decorator that just returns the result of the child. Not very useful... +*/ +template +Status forwarder(C &context, Node const &child, TreeState &state) +{ + return child.process(context, state); +} + +/*! + \brief Decorator that inverts the result of its child node. +*/ +template +Status inverter(C &context, Node const &child, TreeState &state) +{ + const auto status = child.process(context, state); + if (status == Status::RUNNING) + { + return status; + } + return status == Status::FAILURE ? Status::SUCCESS : Status::FAILURE; +} + +/*! + \brief Decorator that returns success regardless of the child result. +*/ +template +Status succeeder(C &context, Node const &child, TreeState &state) +{ + child.process(context, state); + return Status::SUCCESS; +} + + +template +using BasicLeaf = std::function; //!< Leaf nodes are the `process()` function taking the mutable context and must return a status. + +template +using Leaf = BasicLeaf; //!< A Leaf function takes a Context & and returns a Status. + +template +using BoolLeaf = BasicLeaf; //!< A Leaf function returning bool returns SUCCESS on true and FAILURE on false. It is not possible to return RUNNING from such a function. + +template +using VoidLeaf = BasicLeaf; //!< A Leaf function returning anything other than bool or Status can be added using #beehive::BuilderBase::void_leaf. The return value is ignored and SUCCESS is returned. + +/*! + \brief A leaf that always succeeds. Not very useful... +*/ +template +Status noop(C &) +{ + return Status::SUCCESS; +} + +/*! + \brief The behavior tree class which passes the ContextType around. See #beehive::Builder for making one. +*/ +template>> +class Tree +{ +public: + using Context = ContextType; + + /*! + \brief Process with the given context reference. + */ + Status process(Context &context) const; + + /*! + \brief Process with the given state and context reference. + */ + Status process(TreeState &state, Context &context) const; + + /*! + \brief Retrieves the nodes, for debugging purposes. + */ + std::vector, A> const &nodes() const { + return _nodes; + } + + /*! + \brief Creates a state object that can be passed to subsequent process() calls. + */ + TreeState make_state() const { + return {_id}; + } + +private: + static size_t id() { + static size_t id{}; + return ++id; + } + + template + friend class BuilderBase; + + template + friend class Builder; + + /*! + \brief Constructs a tree with the given nodes. + See #beehive::Builder. + */ + Tree(std::vector, A> nodes); + + std::vector, A> _nodes; + size_t _id{id()}; +}; + +template +Tree::Tree(std::vector, A> nodes) + : _nodes(move(nodes)) +{ + size_t i = 0; + for (auto &node : _nodes) { + node._index = i++; + } +} + +template +Status Tree::process(Context &context) const +{ + TreeState state{_id}; + return _nodes[0].process(context, state); +} + +template +Status Tree::process(TreeState &state, Context &context) const +{ + assert(state._tree_id == _id); // another tree's state used with this tree + return _nodes.at(state.resume_index()).process(context, state); +} + +/// @cond +template +auto make_branch(Decorator f) -> typename Node::ProcessFunction; + +template +auto make_branch(Composite f) -> typename Node::ProcessFunction; + +template +auto make_leaf(Leaf f) -> typename Node::ProcessFunction; + +template +auto make_leaf(VoidLeaf f) -> typename Node::ProcessFunction; + +template +auto make_leaf(BoolLeaf f) -> typename Node::ProcessFunction; +/// @endcond + +template +class Builder; + +/*! + \brief A helper for building trees which can be instantiated as #beehive::Builder. +*/ +template +class BuilderBase +{ +public: + /// @cond + enum class Type + { + COMPOSITE, + DECORATOR, + }; + /// @endcond + + /*! + \brief Adds the given composite to the tree. Composites have one or more children. + + \note The composite builder must call end() to signify end of child list. + */ + BuilderBase composite(Composite composite); + + /*! + \brief Adds the given decorator to the tree. Decorators have exactly one child. + + \note The decorator builder must call end() to signify the end of the child list. + */ + BuilderBase decorator(Decorator decorator); + + // Note: "no known conversion" warnings here could indicate that you forgot to return something from your lambda. + /*! + \brief Adds the given leaf to the tree. Leaves have no children. + */ + BuilderBase &leaf(Leaf leaf); + + /*! + \brief Convenience wrapper so that bool functions can be used. Translates true + result to Status::SUCCESS, false to Status::FAILURE and never returns Status:RUNNING. + */ + BuilderBase &leaf(BoolLeaf leaf); + + /*! + \brief Convenience wrapper for a void function, or really a function returning any type other than bool or Status. This always returns Status::SUCCESS. + */ + BuilderBase &void_leaf(VoidLeaf leaf); + + /*! + \brief Copies another tree as a subtree at the current node. + */ + BuilderBase &tree(Tree const &subtree); + + /*! + \brief Closes the composite or decorator branch. + + Each call to composite() or decorator() must have a corresponding end(). + */ + BuilderBase &end(); + + /*! + \brief Finalizes the tree by returning a copy. This will assert if done while + a decorator or composite branch is still 'open'. + */ + virtual Tree build() const &; + + /*! + \brief Finalizes the tree by returning a tree constructed with the builder's + root node. The builder is then invalid. + */ + virtual Tree build() &&; + + /*! + \brief Shorthand for `composite(&sequence)`. + */ + BuilderBase sequence(); + + /*! + \brief Shorthand for `composite(&selector)`. + */ + BuilderBase selector(); + + /*! + \brief Shorthand for `decorator(&inverter)`. + */ + BuilderBase inverter(); + + /*! + \brief Shorthand for `decorator(&succeeder)`. + */ + BuilderBase succeeder(); + +protected: + /// @cond + BuilderBase(BuilderBase &parent, size_t offset, Type type) + : _parent(parent) + , _offset(offset) + , _type(type) + {} + + Node &node() { + return nodes()[_offset]; + } + + virtual std::vector, A> &nodes() { + return _parent.nodes(); + } + +private: + size_t add_child(typename Node::ProcessFunction &&fn, NodeType nodeType) { + node().add_child(); + nodes().emplace_back(Node(std::move(fn), nodeType)); + return nodes().size() - 1; + } + + template + BuilderBase &_leaf(LeafType &&leaf); + + template + BuilderBase _branch(BranchType &&branch, NodeType nodeType); + + BuilderBase &_parent; + size_t _offset{}; + Type _type{}; + + /// @endcond +}; + +/*! + \brief Defines the tree structure and instantiates it. + + This Builder pattern is inspired by arvidsson's implementation, BrainTree. + \sa #beehive::BuilderBase +*/ +template>> +class Builder + : public BuilderBase +{ +public: + /*! + \brief The context type. + */ + using Context = C; + + /*! + \brief Begins construction of a tree. + */ + Builder() + : BuilderBase(*this, 0, BuilderBase::Type::DECORATOR) + { + auto root = make_branch(Decorator(&forwarder)); + _nodes.emplace_back(Node(std::move(root), NodeType::Root)); + } + + Builder(Builder const &) = delete; //!< Deleted copy constructor. + Builder(Builder &&) = default; //!< Move constructor. + Builder &operator=(Builder const &) = delete; //!< Deleted copy assignment operator. + Builder &operator=(Builder &&) = default; //!< Move assignment operator. + + virtual Tree build() const & override + { + assert(_nodes[0].child_count() > 0); // must have at least one leaf node added + return {_nodes}; + } + + virtual Tree build() && override + { + assert(_nodes[0].child_count() > 0); // must have at least one leaf node added + return {std::move(_nodes)}; + } + +private: + virtual std::vector, Allocator> &nodes() override { + return _nodes; + } + + std::vector, Allocator> _nodes; +}; + +/// @cond +template +auto make_branch(Decorator f) -> typename Node::ProcessFunction +{ + return [process = move(f)](C &context, Node const &self, TreeState &state) + { + assert(self.child_count() == 1); // invariant violation! + auto &child = *(&self + 1); + return process(context, child, state); + }; +} + +template +auto make_branch(Composite f) -> typename Node::ProcessFunction +{ + return [process = move(f)](C &context, Node const &self, TreeState &state) + { + size_t i = 0; + auto *child = self.first_child(); + if (self.index() == state.resume_index()) { + for (; i < state.offset(); ++i) { + child = child->next_sibling(); + } + } + auto generator = [&self, &i, &child]() -> Node const * { + if (i++ == self.child_count()) { + return nullptr; + } + auto c = child; + child = child->next_sibling(); + return c; + }; + auto status = process(context, generator, state); + if (status == Status::RUNNING) { + self.save_state_at_child_index(state, i - 1); + } else { + self.clear_state(state); + } + return status; + }; +} + +template +auto make_leaf(Leaf f) -> typename Node::ProcessFunction +{ + return [process = move(f)](C &context, Node const &self, TreeState &state) + { + assert(self.child_count() == 0); // invariant violation! + return process(context); + }; +} + +template +auto make_leaf(VoidLeaf f) -> typename Node::ProcessFunction +{ + return make_leaf(Leaf{[void_process = move(f)](C &context) + { + void_process(context); + return Status::SUCCESS; + }}); +} + +template +auto make_leaf(BoolLeaf f) -> typename Node::ProcessFunction +{ + return make_leaf(Leaf{[bool_process = move(f)](C &context) + { + const bool result = bool_process(context); + return result ? Status::SUCCESS : Status::FAILURE; + }}); +} + +template +auto BuilderBase::composite(Composite composite) -> BuilderBase +{ + return _branch(std::move(composite)); +} + +template +auto BuilderBase::decorator(Decorator decorator) -> BuilderBase +{ + return _branch(std::move(decorator)); +} + +template +template +auto BuilderBase::_branch(BranchType &&branch, NodeType nodeType) -> BuilderBase +{ + assert((_type != Type::DECORATOR) || node().child_count() == 0); // Decorators may only have one child! + auto type = std::is_same< + typename std::decay::type, + Decorator + >::value ? Type::DECORATOR : Type::COMPOSITE; + auto child_offset = add_child(make_branch(move(branch)), nodeType); + return {*this, child_offset, type}; +} + +template +template +auto BuilderBase::_leaf(LeafType &&leaf) -> BuilderBase & +{ + assert((_type != Type::DECORATOR) || node().child_count() == 0); // Decorators may only have one child! + add_child(make_leaf(move(leaf)), NodeType::Leaf); + return *this; +} + +template +auto BuilderBase::leaf(Leaf leaf) -> BuilderBase & +{ + return _leaf(std::move(leaf)); +} + +template +auto BuilderBase::leaf(BoolLeaf leaf) -> BuilderBase & +{ + return _leaf(std::move(leaf)); +} + +template +auto BuilderBase::void_leaf(VoidLeaf leaf) -> BuilderBase & +{ + return _leaf(std::move(leaf)); +} + +template +auto BuilderBase::tree(Tree const &subtree) -> BuilderBase & +{ + assert((_type != Type::DECORATOR) || node().child_count() == 0); // Decorators may only have one child! + auto const &subtree_nodes = subtree.nodes(); + copy(subtree_nodes.begin(), subtree_nodes.end(), back_inserter(nodes())); + node().add_child(); + return *this; +} + +template +auto BuilderBase::end() -> BuilderBase & +{ + assert(node().child_count() > 0); // can't have composite/decorator without children! + return _parent; +} + +template +auto BuilderBase::build() const & -> Tree +{ + assert(false); // unterminated tree! + return {{}}; +} + +template +auto BuilderBase::build() && -> Tree +{ + assert(false); // unterminated tree! + return {{}}; +} + +template +auto BuilderBase::selector()->BuilderBase +{ + return _branch(Composite{&beehive::selector}, NodeType::Selector); +} + +template +auto BuilderBase::sequence()->BuilderBase +{ + return _branch(Composite{&beehive::sequence}, NodeType::Sequence); +} + +template +auto BuilderBase::inverter()->BuilderBase +{ + return _branch(Composite{&beehive::inverter}, NodeType::Inverter); +} + +template +auto BuilderBase::succeeder()->BuilderBase +{ + return _branch(Composite{&beehive::succeeder}, NodeType::Succeeder); +} + +/// @endcond + +} // namespace beehive + +#endif diff --git a/Sdk/External/entt/LICENSE b/Sdk/External/entt/LICENSE new file mode 100644 index 0000000..1c5f480 --- /dev/null +++ b/Sdk/External/entt/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017-2020 Michele Caini + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copy of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copy or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Sdk/External/entt/entt.hpp b/Sdk/External/entt/entt.hpp new file mode 100644 index 0000000..b085a2d --- /dev/null +++ b/Sdk/External/entt/entt.hpp @@ -0,0 +1,20185 @@ +// #include "core/algorithm.hpp" +#ifndef ENTT_CORE_ALGORITHM_HPP +#define ENTT_CORE_ALGORITHM_HPP + + +#include +#include +#include +#include +#include +// #include "utility.hpp" +#ifndef ENTT_CORE_UTILITY_HPP +#define ENTT_CORE_UTILITY_HPP + + +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/*! @brief Identity function object (waiting for C++20). */ +struct identity { + /** + * @brief Returns its argument unchanged. + * @tparam Type Type of the argument. + * @param value The actual argument. + * @return The submitted value as-is. + */ + template + [[nodiscard]] constexpr Type && operator()(Type &&value) const ENTT_NOEXCEPT { + return std::forward(value); + } +}; + + +/** + * @brief Constant utility to disambiguate overloaded members of a class. + * @tparam Type Type of the desired overload. + * @tparam Class Type of class to which the member belongs. + * @param member A valid pointer to a member. + * @return Pointer to the member. + */ +template +[[nodiscard]] constexpr auto overload(Type Class:: *member) ENTT_NOEXCEPT { return member; } + + +/** + * @brief Constant utility to disambiguate overloaded functions. + * @tparam Func Function type of the desired overload. + * @param func A valid pointer to a function. + * @return Pointer to the function. + */ +template +[[nodiscard]] constexpr auto overload(Func *func) ENTT_NOEXCEPT { return func; } + + +/** + * @brief Helper type for visitors. + * @tparam Func Types of function objects. + */ +template +struct overloaded: Func... { + using Func::operator()...; +}; + + +/** + * @brief Deduction guide. + * @tparam Func Types of function objects. + */ +template +overloaded(Func...) -> overloaded; + + +/** + * @brief Basic implementation of a y-combinator. + * @tparam Func Type of a potentially recursive function. + */ +template +struct y_combinator { + /** + * @brief Constructs a y-combinator from a given function. + * @param recursive A potentially recursive function. + */ + y_combinator(Func recursive): + func{std::move(recursive)} + {} + + /** + * @brief Invokes a y-combinator and therefore its underlying function. + * @tparam Args Types of arguments to use to invoke the underlying function. + * @param args Parameters to use to invoke the underlying function. + * @return Return value of the underlying function, if any. + */ + template + decltype(auto) operator()(Args &&... args) const { + return func(*this, std::forward(args)...); + } + + /*! @copydoc operator()() */ + template + decltype(auto) operator()(Args &&... args) { + return func(*this, std::forward(args)...); + } + +private: + Func func; +}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Function object to wrap `std::sort` in a class type. + * + * Unfortunately, `std::sort` cannot be passed as template argument to a class + * template or a function template.
+ * This class fills the gap by wrapping some flavors of `std::sort` in a + * function object. + */ +struct std_sort { + /** + * @brief Sorts the elements in a range. + * + * Sorts the elements in a range using the given binary comparison function. + * + * @tparam It Type of random access iterator. + * @tparam Compare Type of comparison function object. + * @tparam Args Types of arguments to forward to the sort function. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param compare A valid comparison function object. + * @param args Arguments to forward to the sort function, if any. + */ + template, typename... Args> + void operator()(It first, It last, Compare compare = Compare{}, Args &&... args) const { + std::sort(std::forward(args)..., std::move(first), std::move(last), std::move(compare)); + } +}; + + +/*! @brief Function object for performing insertion sort. */ +struct insertion_sort { + /** + * @brief Sorts the elements in a range. + * + * Sorts the elements in a range using the given binary comparison function. + * + * @tparam It Type of random access iterator. + * @tparam Compare Type of comparison function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param compare A valid comparison function object. + */ + template> + void operator()(It first, It last, Compare compare = Compare{}) const { + if(first < last) { + for(auto it = first+1; it < last; ++it) { + auto value = std::move(*it); + auto pre = it; + + for(; pre > first && compare(value, *(pre-1)); --pre) { + *pre = std::move(*(pre-1)); + } + + *pre = std::move(value); + } + } + } +}; + + +/** + * @brief Function object for performing LSD radix sort. + * @tparam Bit Number of bits processed per pass. + * @tparam N Maximum number of bits to sort. + */ +template +struct radix_sort { + static_assert((N % Bit) == 0, "The maximum number of bits to sort must be a multiple of the number of bits processed per pass"); + + /** + * @brief Sorts the elements in a range. + * + * Sorts the elements in a range using the given _getter_ to access the + * actual data to be sorted. + * + * This implementation is inspired by the online book + * [Physically Based Rendering](http://www.pbr-book.org/3ed-2018/Primitives_and_Intersection_Acceleration/Bounding_Volume_Hierarchies.html#RadixSort). + * + * @tparam It Type of random access iterator. + * @tparam Getter Type of _getter_ function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param getter A valid _getter_ function object. + */ + template + void operator()(It first, It last, Getter getter = Getter{}) const { + if(first < last) { + static constexpr auto mask = (1 << Bit) - 1; + static constexpr auto buckets = 1 << Bit; + static constexpr auto passes = N / Bit; + + using value_type = typename std::iterator_traits::value_type; + std::vector aux(std::distance(first, last)); + + auto part = [getter = std::move(getter)](auto from, auto to, auto out, auto start) { + std::size_t index[buckets]{}; + std::size_t count[buckets]{}; + + std::for_each(from, to, [&getter, &count, start](const value_type &item) { + ++count[(getter(item) >> start) & mask]; + }); + + std::for_each(std::next(std::begin(index)), std::end(index), [index = std::begin(index), count = std::begin(count)](auto &item) mutable { + item = *(index++) + *(count++); + }); + + std::for_each(from, to, [&getter, &out, &index, start](value_type &item) { + out[index[(getter(item) >> start) & mask]++] = std::move(item); + }); + }; + + for(std::size_t pass = 0; pass < (passes & ~1); pass += 2) { + part(first, last, aux.begin(), pass * Bit); + part(aux.begin(), aux.end(), first, (pass + 1) * Bit); + } + + if constexpr(passes & 1) { + part(first, last, aux.begin(), (passes - 1) * Bit); + std::move(aux.begin(), aux.end(), first); + } + } + } +}; + + +} + + +#endif + +// #include "core/attribute.h" +#ifndef ENTT_CORE_ATTRIBUTE_H +#define ENTT_CORE_ATTRIBUTE_H + + +#ifndef ENTT_EXPORT +# if defined _WIN32 || defined __CYGWIN__ || defined _MSC_VER +# define ENTT_EXPORT __declspec(dllexport) +# define ENTT_IMPORT __declspec(dllimport) +# define ENTT_HIDDEN +# elif defined __GNUC__ && __GNUC__ >= 4 +# define ENTT_EXPORT __attribute__((visibility("default"))) +# define ENTT_IMPORT __attribute__((visibility("default"))) +# define ENTT_HIDDEN __attribute__((visibility("hidden"))) +# else /* Unsupported compiler */ +# define ENTT_EXPORT +# define ENTT_IMPORT +# define ENTT_HIDDEN +# endif +#endif + + +#ifndef ENTT_API +# if defined ENTT_API_EXPORT +# define ENTT_API ENTT_EXPORT +# elif defined ENTT_API_IMPORT +# define ENTT_API ENTT_IMPORT +# else /* No API */ +# define ENTT_API +# endif +#endif + + +#endif + +// #include "core/family.hpp" +#ifndef ENTT_CORE_FAMILY_HPP +#define ENTT_CORE_FAMILY_HPP + + +// #include "../config/config.h" + +// #include "fwd.hpp" +#ifndef ENTT_CORE_FWD_HPP +#define ENTT_CORE_FWD_HPP + + +// #include "../config/config.h" + + + +namespace entt { + + +/*! @brief Alias declaration for type identifiers. */ +using id_type = ENTT_ID_TYPE; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Dynamic identifier generator. + * + * Utility class template that can be used to assign unique identifiers to types + * at runtime. Use different specializations to create separate sets of + * identifiers. + */ +template +class family { + inline static ENTT_MAYBE_ATOMIC(id_type) identifier{}; + +public: + /*! @brief Unsigned integer type. */ + using family_type = id_type; + + /*! @brief Statically generated unique identifier for the given type. */ + template + // at the time I'm writing, clang crashes during compilation if auto is used instead of family_type + inline static const family_type type = identifier++; +}; + + +} + + +#endif + +// #include "core/hashed_string.hpp" +#ifndef ENTT_CORE_HASHED_STRING_HPP +#define ENTT_CORE_HASHED_STRING_HPP + + +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +struct fnv1a_traits; + + +template<> +struct fnv1a_traits { + using type = std::uint32_t; + static constexpr std::uint32_t offset = 2166136261; + static constexpr std::uint32_t prime = 16777619; +}; + + +template<> +struct fnv1a_traits { + using type = std::uint64_t; + static constexpr std::uint64_t offset = 14695981039346656037ull; + static constexpr std::uint64_t prime = 1099511628211ull; +}; + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Zero overhead unique identifier. + * + * A hashed string is a compile-time tool that allows users to use + * human-readable identifers in the codebase while using their numeric + * counterparts at runtime.
+ * Because of that, a hashed string can also be used in constant expressions if + * required. + * + * @tparam Char Character type. + */ +template +class basic_hashed_string { + using traits_type = internal::fnv1a_traits; + + struct const_wrapper { + // non-explicit constructor on purpose + constexpr const_wrapper(const Char *curr) ENTT_NOEXCEPT: str{curr} {} + const Char *str; + }; + + // Fowler–Noll–Vo hash function v. 1a - the good + [[nodiscard]] static constexpr id_type helper(const Char *curr) ENTT_NOEXCEPT { + auto value = traits_type::offset; + + while(*curr != 0) { + value = (value ^ static_cast(*(curr++))) * traits_type::prime; + } + + return value; + } + +public: + /*! @brief Character type. */ + using value_type = Char; + /*! @brief Unsigned integer type. */ + using hash_type = id_type; + + /** + * @brief Returns directly the numeric representation of a string. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * const auto value = basic_hashed_string::to_value("my.png"); + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + * @return The numeric representation of the string. + */ + template + [[nodiscard]] static constexpr hash_type value(const value_type (&str)[N]) ENTT_NOEXCEPT { + return helper(str); + } + + /** + * @brief Returns directly the numeric representation of a string. + * @param wrapper Helps achieving the purpose by relying on overloading. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const_wrapper wrapper) ENTT_NOEXCEPT { + return helper(wrapper.str); + } + + /** + * @brief Returns directly the numeric representation of a string view. + * @param str Human-readable identifer. + * @param size Length of the string to hash. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const value_type *str, std::size_t size) ENTT_NOEXCEPT { + id_type partial{traits_type::offset}; + while(size--) { partial = (partial^(str++)[0])*traits_type::prime; } + return partial; + } + + /*! @brief Constructs an empty hashed string. */ + constexpr basic_hashed_string() ENTT_NOEXCEPT + : str{nullptr}, hash{} + {} + + /** + * @brief Constructs a hashed string from an array of const characters. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * basic_hashed_string hs{"my.png"}; + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param curr Human-readable identifer. + */ + template + constexpr basic_hashed_string(const value_type (&curr)[N]) ENTT_NOEXCEPT + : str{curr}, hash{helper(curr)} + {} + + /** + * @brief Explicit constructor on purpose to avoid constructing a hashed + * string directly from a `const value_type *`. + * @param wrapper Helps achieving the purpose by relying on overloading. + */ + explicit constexpr basic_hashed_string(const_wrapper wrapper) ENTT_NOEXCEPT + : str{wrapper.str}, hash{helper(wrapper.str)} + {} + + /** + * @brief Returns the human-readable representation of a hashed string. + * @return The string used to initialize the instance. + */ + [[nodiscard]] constexpr const value_type * data() const ENTT_NOEXCEPT { + return str; + } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr hash_type value() const ENTT_NOEXCEPT { + return hash; + } + + /*! @copydoc data */ + [[nodiscard]] constexpr operator const value_type *() const ENTT_NOEXCEPT { return data(); } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr operator hash_type() const ENTT_NOEXCEPT { return value(); } + + /** + * @brief Compares two hashed strings. + * @param other Hashed string with which to compare. + * @return True if the two hashed strings are identical, false otherwise. + */ + [[nodiscard]] constexpr bool operator==(const basic_hashed_string &other) const ENTT_NOEXCEPT { + return hash == other.hash; + } + +private: + const value_type *str; + hash_type hash; +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the character type of the hashed string directly from a + * human-readable identifer provided to the constructor. + * + * @tparam Char Character type. + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + */ +template +basic_hashed_string(const Char (&str)[N]) ENTT_NOEXCEPT +-> basic_hashed_string; + + +/** + * @brief Compares two hashed strings. + * @tparam Char Character type. + * @param lhs A valid hashed string. + * @param rhs A valid hashed string. + * @return True if the two hashed strings are identical, false otherwise. + */ +template +[[nodiscard]] constexpr bool operator!=(const basic_hashed_string &lhs, const basic_hashed_string &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/*! @brief Aliases for common character types. */ +using hashed_string = basic_hashed_string; + + +/*! @brief Aliases for common character types. */ +using hashed_wstring = basic_hashed_string; + + +} + + +/** + * @brief User defined literal for hashed strings. + * @param str The literal without its suffix. + * @return A properly initialized hashed string. + */ +[[nodiscard]] constexpr entt::hashed_string operator"" ENTT_HS_SUFFIX(const char *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_string{str}; +} + + +/** + * @brief User defined literal for hashed wstrings. + * @param str The literal without its suffix. + * @return A properly initialized hashed wstring. + */ +[[nodiscard]] constexpr entt::hashed_wstring operator"" ENTT_HWS_SUFFIX(const wchar_t *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_wstring{str}; +} + + +#endif + +// #include "core/ident.hpp" +#ifndef ENTT_CORE_IDENT_HPP +#define ENTT_CORE_IDENT_HPP + + +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Types identifiers. + * + * Variable template used to generate identifiers at compile-time for the given + * types. Use the `get` member function to know what's the identifier associated + * to the specific type. + * + * @note + * Identifiers are constant expression and can be used in any context where such + * an expression is required. As an example: + * @code{.cpp} + * using id = entt::identifier; + * + * switch(a_type_identifier) { + * case id::type: + * // ... + * break; + * case id::type: + * // ... + * break; + * default: + * // ... + * } + * @endcode + * + * @tparam Types List of types for which to generate identifiers. + */ +template +class identifier { + using tuple_type = std::tuple...>; + + template + [[nodiscard]] static constexpr id_type get(std::index_sequence) { + static_assert(std::disjunction_v...>, "Invalid type"); + return (0 + ... + (std::is_same_v> ? id_type(Indexes) : id_type{})); + } + +public: + /*! @brief Unsigned integer type. */ + using identifier_type = id_type; + + /*! @brief Statically generated unique identifier for the given type. */ + template + static constexpr identifier_type type = get>(std::index_sequence_for{}); +}; + + +} + + +#endif + +// #include "core/monostate.hpp" +#ifndef ENTT_CORE_MONOSTATE_HPP +#define ENTT_CORE_MONOSTATE_HPP + + +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Minimal implementation of the monostate pattern. + * + * A minimal, yet complete configuration system built on top of the monostate + * pattern. Thread safe by design, it works only with basic types like `int`s or + * `bool`s.
+ * Multiple types and therefore more than one value can be associated with a + * single key. Because of this, users must pay attention to use the same type + * both during an assignment and when they try to read back their data. + * Otherwise, they can incur in unexpected results. + */ +template +struct monostate { + /** + * @brief Assigns a value of a specific type to a given key. + * @tparam Type Type of the value to assign. + * @param val User data to assign to the given key. + */ + template + void operator=(Type val) const ENTT_NOEXCEPT { + value = val; + } + + /** + * @brief Gets a value of a specific type for a given key. + * @tparam Type Type of the value to get. + * @return Stored value, if any. + */ + template + operator Type() const ENTT_NOEXCEPT { + return value; + } + +private: + template + inline static ENTT_MAYBE_ATOMIC(Type) value{}; +}; + + +/** + * @brief Helper variable template. + * @tparam Value Value used to differentiate between different variables. + */ +template +inline monostate monostate_v = {}; + + +} + + +#endif + +// #include "core/type_info.hpp" +#ifndef ENTT_CORE_TYPE_INFO_HPP +#define ENTT_CORE_TYPE_INFO_HPP + + +#include +// #include "../config/config.h" + +// #include "../core/attribute.h" +#ifndef ENTT_CORE_ATTRIBUTE_H +#define ENTT_CORE_ATTRIBUTE_H + + +#ifndef ENTT_EXPORT +# if defined _WIN32 || defined __CYGWIN__ || defined _MSC_VER +# define ENTT_EXPORT __declspec(dllexport) +# define ENTT_IMPORT __declspec(dllimport) +# define ENTT_HIDDEN +# elif defined __GNUC__ && __GNUC__ >= 4 +# define ENTT_EXPORT __attribute__((visibility("default"))) +# define ENTT_IMPORT __attribute__((visibility("default"))) +# define ENTT_HIDDEN __attribute__((visibility("hidden"))) +# else /* Unsupported compiler */ +# define ENTT_EXPORT +# define ENTT_IMPORT +# define ENTT_HIDDEN +# endif +#endif + + +#ifndef ENTT_API +# if defined ENTT_API_EXPORT +# define ENTT_API ENTT_EXPORT +# elif defined ENTT_API_IMPORT +# define ENTT_API ENTT_IMPORT +# else /* No API */ +# define ENTT_API +# endif +#endif + + +#endif + +// #include "hashed_string.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +struct ENTT_API type_index { + [[nodiscard]] static id_type next() ENTT_NOEXCEPT { + static ENTT_MAYBE_ATOMIC(id_type) value{}; + return value++; + } +}; + + +template +[[nodiscard]] constexpr auto type_name() ENTT_NOEXCEPT { +#if defined ENTT_PRETTY_FUNCTION + std::string_view pretty_function{ENTT_PRETTY_FUNCTION}; + auto first = pretty_function.find_first_not_of(' ', pretty_function.find_first_of(ENTT_PRETTY_FUNCTION_PREFIX)+1); + auto value = pretty_function.substr(first, pretty_function.find_last_of(ENTT_PRETTY_FUNCTION_SUFFIX) - first); + return value; +#else + return std::string_view{}; +#endif +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Type index. + * @tparam Type Type for which to generate a sequential identifier. + */ +template +struct ENTT_API type_index { + /** + * @brief Returns the sequential identifier of a given type. + * @return The sequential identifier of a given type. + */ + [[nodiscard]] static id_type value() ENTT_NOEXCEPT { + static const id_type value = internal::type_index::next(); + return value; + } +}; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * indexable, false otherwise. + * @tparam Type Potentially indexable type. + */ +template +struct has_type_index: std::false_type {}; + + +/*! @brief has_type_index */ +template +struct has_type_index::value())>>: std::true_type {}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially indexable type. + */ +template +inline constexpr bool has_type_index_v = has_type_index::value; + + +/** + * @brief Type info. + * @tparam Type Type for which to generate information. + */ +template +struct type_info { + /** + * @brief Returns the numeric representation of a given type. + * @return The numeric representation of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr id_type id() ENTT_NOEXCEPT { + constexpr auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + static const auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#else + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + return type_index::value(); + } +#endif + + /** + * @brief Returns the name of a given type. + * @return The name of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + constexpr auto value = internal::type_name(); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static std::string_view name() ENTT_NOEXCEPT { + static const auto value = internal::type_name(); + return value; + } +#else + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + return internal::type_name(); + } +#endif +}; + + +} + + +#endif + +// #include "core/type_traits.hpp" +#ifndef ENTT_CORE_TYPE_TRAITS_HPP +#define ENTT_CORE_TYPE_TRAITS_HPP + + +#include +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Using declaration to be used to _repeat_ the same type a number of + * times equal to the size of a given parameter pack. + * @tparam Type A type to repeat. + */ +template +using unpack_as_t = Type; + + +/** + * @brief Helper variable template to be used to _repeat_ the same value a + * number of times equal to the size of a given parameter pack. + * @tparam Value A value to repeat. + */ +template +inline constexpr auto unpack_as_v = Value; + + +/** + * @brief Wraps a static constant. + * @tparam Value A static constant. + */ +template +using integral_constant = std::integral_constant; + + +/** + * @brief Alias template to ease the creation of named values. + * @tparam Value A constant value at least convertible to `id_type`. + */ +template +using tag = integral_constant; + + +/** + * @brief Utility class to disambiguate overloaded functions. + * @tparam N Number of choices available. + */ +template +struct choice_t + // Unfortunately, doxygen cannot parse such a construct. + /*! @cond TURN_OFF_DOXYGEN */ + : choice_t + /*! @endcond */ +{}; + + +/*! @copybrief choice_t */ +template<> +struct choice_t<0> {}; + + +/** + * @brief Variable template for the choice trick. + * @tparam N Number of choices available. + */ +template +inline constexpr choice_t choice{}; + + +/*! @brief A class to use to push around lists of types, nothing more. */ +template +struct type_list {}; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_size; + + +/** + * @brief Compile-time number of elements in a type list. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_size> + : std::integral_constant +{}; + + +/** + * @brief Helper variable template. + * @tparam List Type list. + */ +template +inline constexpr auto type_list_size_v = type_list_size::value; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_cat; + + +/*! @brief Concatenates multiple type lists. */ +template<> +struct type_list_cat<> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list<>; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the first type list. + * @tparam Other Types provided by the second type list. + * @tparam List Other type lists, if any. + */ +template +struct type_list_cat, type_list, List...> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = typename type_list_cat, List...>::type; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_cat> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list; +}; + + +/** + * @brief Helper type. + * @tparam List Type lists to concatenate. + */ +template +using type_list_cat_t = typename type_list_cat::type; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_unique; + + +/** + * @brief Removes duplicates types from a type list. + * @tparam Type One of the types provided by the given type list. + * @tparam Other The other types provided by the given type list. + */ +template +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = std::conditional_t< + std::disjunction_v...>, + typename type_list_unique>::type, + type_list_cat_t, typename type_list_unique>::type> + >; +}; + + +/*! @brief Removes duplicates types from a type list. */ +template<> +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = type_list<>; +}; + + +/** + * @brief Helper type. + * @tparam Type A type list. + */ +template +using type_list_unique_t = typename type_list_unique::type; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * equality comparable, false otherwise. + * @tparam Type Potentially equality comparable type. + */ +template> +struct is_equality_comparable: std::false_type {}; + + +/*! @copydoc is_equality_comparable */ +template +struct is_equality_comparable() == std::declval())>> + : std::true_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially equality comparable type. + */ +template +inline constexpr auto is_equality_comparable_v = is_equality_comparable::value; + + +/** + * @brief Provides the member constant `value` to true if a given type is empty + * and the empty type optimization is enabled, false otherwise. + * @tparam Type Potential empty type. + */ +template +struct is_eto_eligible + : ENTT_IS_EMPTY(Type) +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potential empty type. + */ +template +inline constexpr auto is_eto_eligible_v = is_eto_eligible::value; + + +/** + * @brief Extracts the class of a non-static member object or function. + * @tparam Member A pointer to a non-static member object or function. + */ +template +class member_class { + static_assert(std::is_member_pointer_v, "Invalid pointer type to non-static member object or function"); + + template + static Class * clazz(Ret(Class:: *)(Args...)); + + template + static Class * clazz(Ret(Class:: *)(Args...) const); + + template + static Class * clazz(Type Class:: *); + +public: + /*! @brief The class of the given non-static member object or function. */ + using type = std::remove_pointer_t()))>; +}; + + +/** + * @brief Helper type. + * @tparam Member A pointer to a non-static member object or function. + */ +template +using member_class_t = typename member_class::type; + + +} + + +#endif + +// #include "core/utility.hpp" + +// #include "entity/actor.hpp" +#ifndef ENTT_ENTITY_ACTOR_HPP +#define ENTT_ENTITY_ACTOR_HPP + + +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + +// #include "registry.hpp" +#ifndef ENTT_ENTITY_REGISTRY_HPP +#define ENTT_ENTITY_REGISTRY_HPP + + +#include +#include +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/algorithm.hpp" +#ifndef ENTT_CORE_ALGORITHM_HPP +#define ENTT_CORE_ALGORITHM_HPP + + +#include +#include +#include +#include +#include +// #include "utility.hpp" +#ifndef ENTT_CORE_UTILITY_HPP +#define ENTT_CORE_UTILITY_HPP + + +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/*! @brief Identity function object (waiting for C++20). */ +struct identity { + /** + * @brief Returns its argument unchanged. + * @tparam Type Type of the argument. + * @param value The actual argument. + * @return The submitted value as-is. + */ + template + [[nodiscard]] constexpr Type && operator()(Type &&value) const ENTT_NOEXCEPT { + return std::forward(value); + } +}; + + +/** + * @brief Constant utility to disambiguate overloaded members of a class. + * @tparam Type Type of the desired overload. + * @tparam Class Type of class to which the member belongs. + * @param member A valid pointer to a member. + * @return Pointer to the member. + */ +template +[[nodiscard]] constexpr auto overload(Type Class:: *member) ENTT_NOEXCEPT { return member; } + + +/** + * @brief Constant utility to disambiguate overloaded functions. + * @tparam Func Function type of the desired overload. + * @param func A valid pointer to a function. + * @return Pointer to the function. + */ +template +[[nodiscard]] constexpr auto overload(Func *func) ENTT_NOEXCEPT { return func; } + + +/** + * @brief Helper type for visitors. + * @tparam Func Types of function objects. + */ +template +struct overloaded: Func... { + using Func::operator()...; +}; + + +/** + * @brief Deduction guide. + * @tparam Func Types of function objects. + */ +template +overloaded(Func...) -> overloaded; + + +/** + * @brief Basic implementation of a y-combinator. + * @tparam Func Type of a potentially recursive function. + */ +template +struct y_combinator { + /** + * @brief Constructs a y-combinator from a given function. + * @param recursive A potentially recursive function. + */ + y_combinator(Func recursive): + func{std::move(recursive)} + {} + + /** + * @brief Invokes a y-combinator and therefore its underlying function. + * @tparam Args Types of arguments to use to invoke the underlying function. + * @param args Parameters to use to invoke the underlying function. + * @return Return value of the underlying function, if any. + */ + template + decltype(auto) operator()(Args &&... args) const { + return func(*this, std::forward(args)...); + } + + /*! @copydoc operator()() */ + template + decltype(auto) operator()(Args &&... args) { + return func(*this, std::forward(args)...); + } + +private: + Func func; +}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Function object to wrap `std::sort` in a class type. + * + * Unfortunately, `std::sort` cannot be passed as template argument to a class + * template or a function template.
+ * This class fills the gap by wrapping some flavors of `std::sort` in a + * function object. + */ +struct std_sort { + /** + * @brief Sorts the elements in a range. + * + * Sorts the elements in a range using the given binary comparison function. + * + * @tparam It Type of random access iterator. + * @tparam Compare Type of comparison function object. + * @tparam Args Types of arguments to forward to the sort function. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param compare A valid comparison function object. + * @param args Arguments to forward to the sort function, if any. + */ + template, typename... Args> + void operator()(It first, It last, Compare compare = Compare{}, Args &&... args) const { + std::sort(std::forward(args)..., std::move(first), std::move(last), std::move(compare)); + } +}; + + +/*! @brief Function object for performing insertion sort. */ +struct insertion_sort { + /** + * @brief Sorts the elements in a range. + * + * Sorts the elements in a range using the given binary comparison function. + * + * @tparam It Type of random access iterator. + * @tparam Compare Type of comparison function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param compare A valid comparison function object. + */ + template> + void operator()(It first, It last, Compare compare = Compare{}) const { + if(first < last) { + for(auto it = first+1; it < last; ++it) { + auto value = std::move(*it); + auto pre = it; + + for(; pre > first && compare(value, *(pre-1)); --pre) { + *pre = std::move(*(pre-1)); + } + + *pre = std::move(value); + } + } + } +}; + + +/** + * @brief Function object for performing LSD radix sort. + * @tparam Bit Number of bits processed per pass. + * @tparam N Maximum number of bits to sort. + */ +template +struct radix_sort { + static_assert((N % Bit) == 0, "The maximum number of bits to sort must be a multiple of the number of bits processed per pass"); + + /** + * @brief Sorts the elements in a range. + * + * Sorts the elements in a range using the given _getter_ to access the + * actual data to be sorted. + * + * This implementation is inspired by the online book + * [Physically Based Rendering](http://www.pbr-book.org/3ed-2018/Primitives_and_Intersection_Acceleration/Bounding_Volume_Hierarchies.html#RadixSort). + * + * @tparam It Type of random access iterator. + * @tparam Getter Type of _getter_ function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param getter A valid _getter_ function object. + */ + template + void operator()(It first, It last, Getter getter = Getter{}) const { + if(first < last) { + static constexpr auto mask = (1 << Bit) - 1; + static constexpr auto buckets = 1 << Bit; + static constexpr auto passes = N / Bit; + + using value_type = typename std::iterator_traits::value_type; + std::vector aux(std::distance(first, last)); + + auto part = [getter = std::move(getter)](auto from, auto to, auto out, auto start) { + std::size_t index[buckets]{}; + std::size_t count[buckets]{}; + + std::for_each(from, to, [&getter, &count, start](const value_type &item) { + ++count[(getter(item) >> start) & mask]; + }); + + std::for_each(std::next(std::begin(index)), std::end(index), [index = std::begin(index), count = std::begin(count)](auto &item) mutable { + item = *(index++) + *(count++); + }); + + std::for_each(from, to, [&getter, &out, &index, start](value_type &item) { + out[index[(getter(item) >> start) & mask]++] = std::move(item); + }); + }; + + for(std::size_t pass = 0; pass < (passes & ~1); pass += 2) { + part(first, last, aux.begin(), pass * Bit); + part(aux.begin(), aux.end(), first, (pass + 1) * Bit); + } + + if constexpr(passes & 1) { + part(first, last, aux.begin(), (passes - 1) * Bit); + std::move(aux.begin(), aux.end(), first); + } + } + } +}; + + +} + + +#endif + +// #include "../core/fwd.hpp" +#ifndef ENTT_CORE_FWD_HPP +#define ENTT_CORE_FWD_HPP + + +// #include "../config/config.h" + + + +namespace entt { + + +/*! @brief Alias declaration for type identifiers. */ +using id_type = ENTT_ID_TYPE; + + +} + + +#endif + +// #include "../core/type_info.hpp" +#ifndef ENTT_CORE_TYPE_INFO_HPP +#define ENTT_CORE_TYPE_INFO_HPP + + +#include +// #include "../config/config.h" + +// #include "../core/attribute.h" +#ifndef ENTT_CORE_ATTRIBUTE_H +#define ENTT_CORE_ATTRIBUTE_H + + +#ifndef ENTT_EXPORT +# if defined _WIN32 || defined __CYGWIN__ || defined _MSC_VER +# define ENTT_EXPORT __declspec(dllexport) +# define ENTT_IMPORT __declspec(dllimport) +# define ENTT_HIDDEN +# elif defined __GNUC__ && __GNUC__ >= 4 +# define ENTT_EXPORT __attribute__((visibility("default"))) +# define ENTT_IMPORT __attribute__((visibility("default"))) +# define ENTT_HIDDEN __attribute__((visibility("hidden"))) +# else /* Unsupported compiler */ +# define ENTT_EXPORT +# define ENTT_IMPORT +# define ENTT_HIDDEN +# endif +#endif + + +#ifndef ENTT_API +# if defined ENTT_API_EXPORT +# define ENTT_API ENTT_EXPORT +# elif defined ENTT_API_IMPORT +# define ENTT_API ENTT_IMPORT +# else /* No API */ +# define ENTT_API +# endif +#endif + + +#endif + +// #include "hashed_string.hpp" +#ifndef ENTT_CORE_HASHED_STRING_HPP +#define ENTT_CORE_HASHED_STRING_HPP + + +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +struct fnv1a_traits; + + +template<> +struct fnv1a_traits { + using type = std::uint32_t; + static constexpr std::uint32_t offset = 2166136261; + static constexpr std::uint32_t prime = 16777619; +}; + + +template<> +struct fnv1a_traits { + using type = std::uint64_t; + static constexpr std::uint64_t offset = 14695981039346656037ull; + static constexpr std::uint64_t prime = 1099511628211ull; +}; + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Zero overhead unique identifier. + * + * A hashed string is a compile-time tool that allows users to use + * human-readable identifers in the codebase while using their numeric + * counterparts at runtime.
+ * Because of that, a hashed string can also be used in constant expressions if + * required. + * + * @tparam Char Character type. + */ +template +class basic_hashed_string { + using traits_type = internal::fnv1a_traits; + + struct const_wrapper { + // non-explicit constructor on purpose + constexpr const_wrapper(const Char *curr) ENTT_NOEXCEPT: str{curr} {} + const Char *str; + }; + + // Fowler–Noll–Vo hash function v. 1a - the good + [[nodiscard]] static constexpr id_type helper(const Char *curr) ENTT_NOEXCEPT { + auto value = traits_type::offset; + + while(*curr != 0) { + value = (value ^ static_cast(*(curr++))) * traits_type::prime; + } + + return value; + } + +public: + /*! @brief Character type. */ + using value_type = Char; + /*! @brief Unsigned integer type. */ + using hash_type = id_type; + + /** + * @brief Returns directly the numeric representation of a string. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * const auto value = basic_hashed_string::to_value("my.png"); + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + * @return The numeric representation of the string. + */ + template + [[nodiscard]] static constexpr hash_type value(const value_type (&str)[N]) ENTT_NOEXCEPT { + return helper(str); + } + + /** + * @brief Returns directly the numeric representation of a string. + * @param wrapper Helps achieving the purpose by relying on overloading. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const_wrapper wrapper) ENTT_NOEXCEPT { + return helper(wrapper.str); + } + + /** + * @brief Returns directly the numeric representation of a string view. + * @param str Human-readable identifer. + * @param size Length of the string to hash. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const value_type *str, std::size_t size) ENTT_NOEXCEPT { + id_type partial{traits_type::offset}; + while(size--) { partial = (partial^(str++)[0])*traits_type::prime; } + return partial; + } + + /*! @brief Constructs an empty hashed string. */ + constexpr basic_hashed_string() ENTT_NOEXCEPT + : str{nullptr}, hash{} + {} + + /** + * @brief Constructs a hashed string from an array of const characters. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * basic_hashed_string hs{"my.png"}; + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param curr Human-readable identifer. + */ + template + constexpr basic_hashed_string(const value_type (&curr)[N]) ENTT_NOEXCEPT + : str{curr}, hash{helper(curr)} + {} + + /** + * @brief Explicit constructor on purpose to avoid constructing a hashed + * string directly from a `const value_type *`. + * @param wrapper Helps achieving the purpose by relying on overloading. + */ + explicit constexpr basic_hashed_string(const_wrapper wrapper) ENTT_NOEXCEPT + : str{wrapper.str}, hash{helper(wrapper.str)} + {} + + /** + * @brief Returns the human-readable representation of a hashed string. + * @return The string used to initialize the instance. + */ + [[nodiscard]] constexpr const value_type * data() const ENTT_NOEXCEPT { + return str; + } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr hash_type value() const ENTT_NOEXCEPT { + return hash; + } + + /*! @copydoc data */ + [[nodiscard]] constexpr operator const value_type *() const ENTT_NOEXCEPT { return data(); } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr operator hash_type() const ENTT_NOEXCEPT { return value(); } + + /** + * @brief Compares two hashed strings. + * @param other Hashed string with which to compare. + * @return True if the two hashed strings are identical, false otherwise. + */ + [[nodiscard]] constexpr bool operator==(const basic_hashed_string &other) const ENTT_NOEXCEPT { + return hash == other.hash; + } + +private: + const value_type *str; + hash_type hash; +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the character type of the hashed string directly from a + * human-readable identifer provided to the constructor. + * + * @tparam Char Character type. + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + */ +template +basic_hashed_string(const Char (&str)[N]) ENTT_NOEXCEPT +-> basic_hashed_string; + + +/** + * @brief Compares two hashed strings. + * @tparam Char Character type. + * @param lhs A valid hashed string. + * @param rhs A valid hashed string. + * @return True if the two hashed strings are identical, false otherwise. + */ +template +[[nodiscard]] constexpr bool operator!=(const basic_hashed_string &lhs, const basic_hashed_string &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/*! @brief Aliases for common character types. */ +using hashed_string = basic_hashed_string; + + +/*! @brief Aliases for common character types. */ +using hashed_wstring = basic_hashed_string; + + +} + + +/** + * @brief User defined literal for hashed strings. + * @param str The literal without its suffix. + * @return A properly initialized hashed string. + */ +[[nodiscard]] constexpr entt::hashed_string operator"" ENTT_HS_SUFFIX(const char *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_string{str}; +} + + +/** + * @brief User defined literal for hashed wstrings. + * @param str The literal without its suffix. + * @return A properly initialized hashed wstring. + */ +[[nodiscard]] constexpr entt::hashed_wstring operator"" ENTT_HWS_SUFFIX(const wchar_t *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_wstring{str}; +} + + +#endif + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +struct ENTT_API type_index { + [[nodiscard]] static id_type next() ENTT_NOEXCEPT { + static ENTT_MAYBE_ATOMIC(id_type) value{}; + return value++; + } +}; + + +template +[[nodiscard]] constexpr auto type_name() ENTT_NOEXCEPT { +#if defined ENTT_PRETTY_FUNCTION + std::string_view pretty_function{ENTT_PRETTY_FUNCTION}; + auto first = pretty_function.find_first_not_of(' ', pretty_function.find_first_of(ENTT_PRETTY_FUNCTION_PREFIX)+1); + auto value = pretty_function.substr(first, pretty_function.find_last_of(ENTT_PRETTY_FUNCTION_SUFFIX) - first); + return value; +#else + return std::string_view{}; +#endif +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Type index. + * @tparam Type Type for which to generate a sequential identifier. + */ +template +struct ENTT_API type_index { + /** + * @brief Returns the sequential identifier of a given type. + * @return The sequential identifier of a given type. + */ + [[nodiscard]] static id_type value() ENTT_NOEXCEPT { + static const id_type value = internal::type_index::next(); + return value; + } +}; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * indexable, false otherwise. + * @tparam Type Potentially indexable type. + */ +template +struct has_type_index: std::false_type {}; + + +/*! @brief has_type_index */ +template +struct has_type_index::value())>>: std::true_type {}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially indexable type. + */ +template +inline constexpr bool has_type_index_v = has_type_index::value; + + +/** + * @brief Type info. + * @tparam Type Type for which to generate information. + */ +template +struct type_info { + /** + * @brief Returns the numeric representation of a given type. + * @return The numeric representation of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr id_type id() ENTT_NOEXCEPT { + constexpr auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + static const auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#else + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + return type_index::value(); + } +#endif + + /** + * @brief Returns the name of a given type. + * @return The name of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + constexpr auto value = internal::type_name(); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static std::string_view name() ENTT_NOEXCEPT { + static const auto value = internal::type_name(); + return value; + } +#else + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + return internal::type_name(); + } +#endif +}; + + +} + + +#endif + +// #include "../core/type_traits.hpp" +#ifndef ENTT_CORE_TYPE_TRAITS_HPP +#define ENTT_CORE_TYPE_TRAITS_HPP + + +#include +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Using declaration to be used to _repeat_ the same type a number of + * times equal to the size of a given parameter pack. + * @tparam Type A type to repeat. + */ +template +using unpack_as_t = Type; + + +/** + * @brief Helper variable template to be used to _repeat_ the same value a + * number of times equal to the size of a given parameter pack. + * @tparam Value A value to repeat. + */ +template +inline constexpr auto unpack_as_v = Value; + + +/** + * @brief Wraps a static constant. + * @tparam Value A static constant. + */ +template +using integral_constant = std::integral_constant; + + +/** + * @brief Alias template to ease the creation of named values. + * @tparam Value A constant value at least convertible to `id_type`. + */ +template +using tag = integral_constant; + + +/** + * @brief Utility class to disambiguate overloaded functions. + * @tparam N Number of choices available. + */ +template +struct choice_t + // Unfortunately, doxygen cannot parse such a construct. + /*! @cond TURN_OFF_DOXYGEN */ + : choice_t + /*! @endcond */ +{}; + + +/*! @copybrief choice_t */ +template<> +struct choice_t<0> {}; + + +/** + * @brief Variable template for the choice trick. + * @tparam N Number of choices available. + */ +template +inline constexpr choice_t choice{}; + + +/*! @brief A class to use to push around lists of types, nothing more. */ +template +struct type_list {}; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_size; + + +/** + * @brief Compile-time number of elements in a type list. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_size> + : std::integral_constant +{}; + + +/** + * @brief Helper variable template. + * @tparam List Type list. + */ +template +inline constexpr auto type_list_size_v = type_list_size::value; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_cat; + + +/*! @brief Concatenates multiple type lists. */ +template<> +struct type_list_cat<> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list<>; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the first type list. + * @tparam Other Types provided by the second type list. + * @tparam List Other type lists, if any. + */ +template +struct type_list_cat, type_list, List...> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = typename type_list_cat, List...>::type; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_cat> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list; +}; + + +/** + * @brief Helper type. + * @tparam List Type lists to concatenate. + */ +template +using type_list_cat_t = typename type_list_cat::type; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_unique; + + +/** + * @brief Removes duplicates types from a type list. + * @tparam Type One of the types provided by the given type list. + * @tparam Other The other types provided by the given type list. + */ +template +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = std::conditional_t< + std::disjunction_v...>, + typename type_list_unique>::type, + type_list_cat_t, typename type_list_unique>::type> + >; +}; + + +/*! @brief Removes duplicates types from a type list. */ +template<> +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = type_list<>; +}; + + +/** + * @brief Helper type. + * @tparam Type A type list. + */ +template +using type_list_unique_t = typename type_list_unique::type; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * equality comparable, false otherwise. + * @tparam Type Potentially equality comparable type. + */ +template> +struct is_equality_comparable: std::false_type {}; + + +/*! @copydoc is_equality_comparable */ +template +struct is_equality_comparable() == std::declval())>> + : std::true_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially equality comparable type. + */ +template +inline constexpr auto is_equality_comparable_v = is_equality_comparable::value; + + +/** + * @brief Provides the member constant `value` to true if a given type is empty + * and the empty type optimization is enabled, false otherwise. + * @tparam Type Potential empty type. + */ +template +struct is_eto_eligible + : ENTT_IS_EMPTY(Type) +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potential empty type. + */ +template +inline constexpr auto is_eto_eligible_v = is_eto_eligible::value; + + +/** + * @brief Extracts the class of a non-static member object or function. + * @tparam Member A pointer to a non-static member object or function. + */ +template +class member_class { + static_assert(std::is_member_pointer_v, "Invalid pointer type to non-static member object or function"); + + template + static Class * clazz(Ret(Class:: *)(Args...)); + + template + static Class * clazz(Ret(Class:: *)(Args...) const); + + template + static Class * clazz(Type Class:: *); + +public: + /*! @brief The class of the given non-static member object or function. */ + using type = std::remove_pointer_t()))>; +}; + + +/** + * @brief Helper type. + * @tparam Member A pointer to a non-static member object or function. + */ +template +using member_class_t = typename member_class::type; + + +} + + +#endif + +// #include "../signal/sigh.hpp" +#ifndef ENTT_SIGNAL_SIGH_HPP +#define ENTT_SIGNAL_SIGH_HPP + + +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + +// #include "delegate.hpp" +#ifndef ENTT_SIGNAL_DELEGATE_HPP +#define ENTT_SIGNAL_DELEGATE_HPP + + +#include +#include +#include +#include +#include +// #include "../config/config.h" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +auto function_pointer(Ret(*)(Args...)) -> Ret(*)(Args...); + + +template +auto function_pointer(Ret(*)(Type, Args...), Other &&) -> Ret(*)(Args...); + + +template +auto function_pointer(Ret(Class:: *)(Args...), Other &&...) -> Ret(*)(Args...); + + +template +auto function_pointer(Ret(Class:: *)(Args...) const, Other &&...) -> Ret(*)(Args...); + + +template +auto function_pointer(Type Class:: *, Other &&...) -> Type(*)(); + + +template +using function_pointer_t = decltype(internal::function_pointer(std::declval()...)); + + +template +[[nodiscard]] constexpr auto index_sequence_for(Ret(*)(Args...)) { + return std::index_sequence_for{}; +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/*! @brief Used to wrap a function or a member of a specified type. */ +template +struct connect_arg_t {}; + + +/*! @brief Constant of type connect_arg_t used to disambiguate calls. */ +template +inline constexpr connect_arg_t connect_arg{}; + + +/** + * @brief Basic delegate implementation. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is a function type. + */ +template +class delegate; + + +/** + * @brief Utility class to use to send around functions and members. + * + * Unmanaged delegate for function pointers and members. Users of this class are + * in charge of disconnecting instances before deleting them. + * + * A delegate can be used as a general purpose invoker without memory overhead + * for free functions possibly with payloads and bound or unbound members. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +class delegate { + template + [[nodiscard]] auto wrap(std::index_sequence) ENTT_NOEXCEPT { + return [](const void *, Args... args) -> Ret { + [[maybe_unused]] const auto arguments = std::forward_as_tuple(std::forward(args)...); + return Ret(std::invoke(Candidate, std::forward>>(std::get(arguments))...)); + }; + } + + template + [[nodiscard]] auto wrap(Type &, std::index_sequence) ENTT_NOEXCEPT { + return [](const void *payload, Args... args) -> Ret { + [[maybe_unused]] const auto arguments = std::forward_as_tuple(std::forward(args)...); + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, *curr, std::forward>>(std::get(arguments))...)); + }; + } + + template + [[nodiscard]] auto wrap(Type *, std::index_sequence) ENTT_NOEXCEPT { + return [](const void *payload, Args... args) -> Ret { + [[maybe_unused]] const auto arguments = std::forward_as_tuple(std::forward(args)...); + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, curr, std::forward>>(std::get(arguments))...)); + }; + } + +public: + /*! @brief Function type of the contained target. */ + using function_type = Ret(const void *, Args...); + /*! @brief Function type of the delegate. */ + using type = Ret(Args...); + /*! @brief Return type of the delegate. */ + using result_type = Ret; + + /*! @brief Default constructor. */ + delegate() ENTT_NOEXCEPT + : fn{nullptr}, data{nullptr} + {} + + /** + * @brief Constructs a delegate and connects a free function or an unbound + * member. + * @tparam Candidate Function or member to connect to the delegate. + */ + template + delegate(connect_arg_t) ENTT_NOEXCEPT { + connect(); + } + + /** + * @brief Constructs a delegate and connects a free function with payload or + * a bound member. + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + delegate(connect_arg_t, Type &&value_or_instance) ENTT_NOEXCEPT { + connect(std::forward(value_or_instance)); + } + + /** + * @brief Constructs a delegate and connects an user defined function with + * optional payload. + * @param function Function to connect to the delegate. + * @param payload User defined arbitrary data. + */ + delegate(function_type *function, const void *payload = nullptr) ENTT_NOEXCEPT { + connect(function, payload); + } + + /** + * @brief Connects a free function or an unbound member to a delegate. + * @tparam Candidate Function or member to connect to the delegate. + */ + template + void connect() ENTT_NOEXCEPT { + data = nullptr; + + if constexpr(std::is_invocable_r_v) { + fn = [](const void *, Args... args) -> Ret { + return Ret(std::invoke(Candidate, std::forward(args)...)); + }; + } else if constexpr(std::is_member_pointer_v) { + fn = wrap(internal::index_sequence_for>>(internal::function_pointer_t{})); + } else { + fn = wrap(internal::index_sequence_for(internal::function_pointer_t{})); + } + } + + /** + * @brief Connects a free function with payload or a bound member to a + * delegate. + * + * The delegate isn't responsible for the connected object or the payload. + * Users must always guarantee that the lifetime of the instance overcomes + * the one of the delegate.
+ * When used to connect a free function with payload, its signature must be + * such that the instance is the first argument before the ones used to + * define the delegate itself. + * + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid reference that fits the purpose. + */ + template + void connect(Type &value_or_instance) ENTT_NOEXCEPT { + data = &value_or_instance; + + if constexpr(std::is_invocable_r_v) { + fn = [](const void *payload, Args... args) -> Ret { + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, *curr, std::forward(args)...)); + }; + } else { + fn = wrap(value_or_instance, internal::index_sequence_for(internal::function_pointer_t{})); + } + } + + /** + * @brief Connects a free function with payload or a bound member to a + * delegate. + * + * @sa connect(Type &) + * + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid pointer that fits the purpose. + */ + template + void connect(Type *value_or_instance) ENTT_NOEXCEPT { + data = value_or_instance; + + if constexpr(std::is_invocable_r_v) { + fn = [](const void *payload, Args... args) -> Ret { + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, curr, std::forward(args)...)); + }; + } else { + fn = wrap(value_or_instance, internal::index_sequence_for(internal::function_pointer_t{})); + } + } + + /** + * @brief Connects an user defined function with optional payload to a + * delegate. + * + * The delegate isn't responsible for the connected object or the payload. + * Users must always guarantee that the lifetime of an instance overcomes + * the one of the delegate.
+ * The payload is returned as the first argument to the target function in + * all cases. + * + * @param function Function to connect to the delegate. + * @param payload User defined arbitrary data. + */ + void connect(function_type *function, const void *payload = nullptr) ENTT_NOEXCEPT { + fn = function; + data = payload; + } + + /** + * @brief Resets a delegate. + * + * After a reset, a delegate cannot be invoked anymore. + */ + void reset() ENTT_NOEXCEPT { + fn = nullptr; + data = nullptr; + } + + /** + * @brief Returns the instance or the payload linked to a delegate, if any. + * @return An opaque pointer to the underlying data. + */ + [[nodiscard]] const void * instance() const ENTT_NOEXCEPT { + return data; + } + + /** + * @brief Triggers a delegate. + * + * The delegate invokes the underlying function and returns the result. + * + * @warning + * Attempting to trigger an invalid delegate results in undefined + * behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * delegate has not yet been set. + * + * @param args Arguments to use to invoke the underlying function. + * @return The value returned by the underlying function. + */ + Ret operator()(Args... args) const { + ENTT_ASSERT(fn); + return fn(data, std::forward(args)...); + } + + /** + * @brief Checks whether a delegate actually stores a listener. + * @return False if the delegate is empty, true otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + // no need to test also data + return !(fn == nullptr); + } + + /** + * @brief Compares the contents of two delegates. + * @param other Delegate with which to compare. + * @return False if the two contents differ, true otherwise. + */ + [[nodiscard]] bool operator==(const delegate &other) const ENTT_NOEXCEPT { + return fn == other.fn && data == other.data; + } + +private: + function_type *fn; + const void *data; +}; + + +/** + * @brief Compares the contents of two delegates. + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + * @param lhs A valid delegate object. + * @param rhs A valid delegate object. + * @return True if the two contents differ, false otherwise. + */ +template +[[nodiscard]] bool operator!=(const delegate &lhs, const delegate &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/** + * @brief Deduction guide. + * @tparam Candidate Function or member to connect to the delegate. + */ +template +delegate(connect_arg_t) ENTT_NOEXCEPT +-> delegate>>; + + +/** + * @brief Deduction guide. + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + */ +template +delegate(connect_arg_t, Type &&) ENTT_NOEXCEPT +-> delegate>>; + + +/*! @brief Deduction guide. */ +template +delegate(Ret(*)(const void *, Args...), const void * = nullptr) ENTT_NOEXCEPT +-> delegate; + + +} + + +#endif + +// #include "fwd.hpp" +#ifndef ENTT_SIGNAL_FWD_HPP +#define ENTT_SIGNAL_FWD_HPP + + +namespace entt { + + +template +class delegate; + + +class dispatcher; + + +template +class emitter; + + +class connection; + + +struct scoped_connection; + + +template +class sink; + + +template +class sigh; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Sink class. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is a function type. + * + * @tparam Function A valid function type. + */ +template +class sink; + + +/** + * @brief Unmanaged signal handler. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is a function type. + * + * @tparam Function A valid function type. + */ +template +class sigh; + + +/** + * @brief Unmanaged signal handler. + * + * It works directly with references to classes and pointers to member functions + * as well as pointers to free functions. Users of this class are in charge of + * disconnecting instances before deleting them. + * + * This class serves mainly two purposes: + * + * * Creating signals to use later to notify a bunch of listeners. + * * Collecting results from a set of functions like in a voting system. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +class sigh { + /*! @brief A sink is allowed to modify a signal. */ + friend class sink; + +public: + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Sink type. */ + using sink_type = sink; + + /** + * @brief Instance type when it comes to connecting member functions. + * @tparam Class Type of class to which the member function belongs. + */ + template + using instance_type = Class *; + + /** + * @brief Number of listeners connected to the signal. + * @return Number of listeners currently connected. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return calls.size(); + } + + /** + * @brief Returns false if at least a listener is connected to the signal. + * @return True if the signal has no listeners connected, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return calls.empty(); + } + + /** + * @brief Triggers a signal. + * + * All the listeners are notified. Order isn't guaranteed. + * + * @param args Arguments to use to invoke listeners. + */ + void publish(Args... args) const { + for(auto &&call: std::as_const(calls)) { + call(args...); + } + } + + /** + * @brief Collects return values from the listeners. + * + * The collector must expose a call operator with the following properties: + * + * * The return type is either `void` or such that it's convertible to + * `bool`. In the second case, a true value will stop the iteration. + * * The list of parameters is empty if `Ret` is `void`, otherwise it + * contains a single element such that `Ret` is convertible to it. + * + * @tparam Func Type of collector to use, if any. + * @param func A valid function object. + * @param args Arguments to use to invoke listeners. + */ + template + void collect(Func func, Args... args) const { + for(auto &&call: calls) { + if constexpr(std::is_void_v) { + if constexpr(std::is_invocable_r_v) { + call(args...); + if(func()) { break; } + } else { + call(args...); + func(); + } + } else { + if constexpr(std::is_invocable_r_v) { + if(func(call(args...))) { break; } + } else { + func(call(args...)); + } + } + } + } + +private: + std::vector> calls; +}; + + +/** + * @brief Connection class. + * + * Opaque object the aim of which is to allow users to release an already + * estabilished connection without having to keep a reference to the signal or + * the sink that generated it. + */ +class connection { + /*! @brief A sink is allowed to create connection objects. */ + template + friend class sink; + + connection(delegate fn, void *ref) + : disconnect{fn}, signal{ref} + {} + +public: + /*! @brief Default constructor. */ + connection() = default; + + /** + * @brief Checks whether a connection is properly initialized. + * @return True if the connection is properly initialized, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(disconnect); + } + + /*! @brief Breaks the connection. */ + void release() { + if(disconnect) { + disconnect(signal); + disconnect.reset(); + } + } + +private: + delegate disconnect; + void *signal{}; +}; + + +/** + * @brief Scoped connection class. + * + * Opaque object the aim of which is to allow users to release an already + * estabilished connection without having to keep a reference to the signal or + * the sink that generated it.
+ * A scoped connection automatically breaks the link between the two objects + * when it goes out of scope. + */ +struct scoped_connection { + /*! @brief Default constructor. */ + scoped_connection() = default; + + /** + * @brief Constructs a scoped connection from a basic connection. + * @param other A valid connection object. + */ + scoped_connection(const connection &other) + : conn{other} + {} + + /*! @brief Default copy constructor, deleted on purpose. */ + scoped_connection(const scoped_connection &) = delete; + + /*! @brief Automatically breaks the link on destruction. */ + ~scoped_connection() { + conn.release(); + } + + /** + * @brief Default copy assignment operator, deleted on purpose. + * @return This scoped connection. + */ + scoped_connection & operator=(const scoped_connection &) = delete; + + /** + * @brief Acquires a connection. + * @param other The connection object to acquire. + * @return This scoped connection. + */ + scoped_connection & operator=(connection other) { + conn = std::move(other); + return *this; + } + + /** + * @brief Checks whether a scoped connection is properly initialized. + * @return True if the connection is properly initialized, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(conn); + } + + /*! @brief Breaks the connection. */ + void release() { + conn.release(); + } + +private: + connection conn; +}; + + +/** + * @brief Sink class. + * + * A sink is used to connect listeners to signals and to disconnect them.
+ * The function type for a listener is the one of the signal to which it + * belongs. + * + * The clear separation between a signal and a sink permits to store the former + * as private data member without exposing the publish functionality to the + * users of the class. + * + * @warning + * Lifetime of a sink must not overcome that of the signal to which it refers. + * In any other case, attempting to use a sink results in undefined behavior. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +class sink { + using signal_type = sigh; + using difference_type = typename std::iterator_traits::difference_type; + + template + static void release(Type value_or_instance, void *signal) { + sink{*static_cast(signal)}.disconnect(value_or_instance); + } + + template + static void release(void *signal) { + sink{*static_cast(signal)}.disconnect(); + } + +public: + /** + * @brief Constructs a sink that is allowed to modify a given signal. + * @param ref A valid reference to a signal object. + */ + sink(sigh &ref) ENTT_NOEXCEPT + : offset{}, + signal{&ref} + {} + + /** + * @brief Returns false if at least a listener is connected to the sink. + * @return True if the sink has no listeners connected, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return signal->calls.empty(); + } + + /** + * @brief Returns a sink that connects before a given free function or an + * unbound member. + * @tparam Function A valid free function pointer. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before() { + delegate call{}; + call.template connect(); + + const auto &calls = signal->calls; + const auto it = std::find(calls.cbegin(), calls.cend(), std::move(call)); + + sink other{*this}; + other.offset = std::distance(it, calls.cend()); + return other; + } + + /** + * @brief Returns a sink that connects before a free function with payload + * or a bound member. + * @tparam Candidate Member or free function to look for. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before(Type &&value_or_instance) { + delegate call{}; + call.template connect(value_or_instance); + + const auto &calls = signal->calls; + const auto it = std::find(calls.cbegin(), calls.cend(), std::move(call)); + + sink other{*this}; + other.offset = std::distance(it, calls.cend()); + return other; + } + + /** + * @brief Returns a sink that connects before a given instance or specific + * payload. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before(Type &value_or_instance) { + return before(&value_or_instance); + } + + /** + * @brief Returns a sink that connects before a given instance or specific + * payload. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid pointer that fits the purpose. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before(Type *value_or_instance) { + sink other{*this}; + + if(value_or_instance) { + const auto &calls = signal->calls; + const auto it = std::find_if(calls.cbegin(), calls.cend(), [value_or_instance](const auto &delegate) { + return delegate.instance() == value_or_instance; + }); + + other.offset = std::distance(it, calls.cend()); + } + + return other; + } + + /** + * @brief Returns a sink that connects before anything else. + * @return A properly initialized sink object. + */ + [[nodiscard]] sink before() { + sink other{*this}; + other.offset = signal->calls.size(); + return other; + } + + /** + * @brief Connects a free function or an unbound member to a signal. + * + * The signal handler performs checks to avoid multiple connections for the + * same function. + * + * @tparam Candidate Function or member to connect to the signal. + * @return A properly initialized connection object. + */ + template + connection connect() { + disconnect(); + + delegate call{}; + call.template connect(); + signal->calls.insert(signal->calls.end() - offset, std::move(call)); + + delegate conn{}; + conn.template connect<&release>(); + return { std::move(conn), signal }; + } + + /** + * @brief Connects a free function with payload or a bound member to a + * signal. + * + * The signal isn't responsible for the connected object or the payload. + * Users must always guarantee that the lifetime of the instance overcomes + * the one of the signal. On the other side, the signal handler performs + * checks to avoid multiple connections for the same function.
+ * When used to connect a free function with payload, its signature must be + * such that the instance is the first argument before the ones used to + * define the signal itself. + * + * @tparam Candidate Function or member to connect to the signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + * @return A properly initialized connection object. + */ + template + connection connect(Type &&value_or_instance) { + disconnect(value_or_instance); + + delegate call{}; + call.template connect(value_or_instance); + signal->calls.insert(signal->calls.end() - offset, std::move(call)); + + delegate conn{}; + conn.template connect<&release>(value_or_instance); + return { std::move(conn), signal }; + } + + /** + * @brief Disconnects a free function or an unbound member from a signal. + * @tparam Candidate Function or member to disconnect from the signal. + */ + template + void disconnect() { + auto &calls = signal->calls; + delegate call{}; + call.template connect(); + calls.erase(std::remove(calls.begin(), calls.end(), std::move(call)), calls.end()); + } + + /** + * @brief Disconnects a free function with payload or a bound member from a + * signal. + * @tparam Candidate Function or member to disconnect from the signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type &&value_or_instance) { + auto &calls = signal->calls; + delegate call{}; + call.template connect(value_or_instance); + calls.erase(std::remove(calls.begin(), calls.end(), std::move(call)), calls.end()); + } + + /** + * @brief Disconnects free functions with payload or bound members from a + * signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type &value_or_instance) { + disconnect(&value_or_instance); + } + + /** + * @brief Disconnects free functions with payload or bound members from a + * signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type *value_or_instance) { + if(value_or_instance) { + auto &calls = signal->calls; + calls.erase(std::remove_if(calls.begin(), calls.end(), [value_or_instance](const auto &delegate) { + return delegate.instance() == value_or_instance; + }), calls.end()); + } + } + + /*! @brief Disconnects all the listeners from a signal. */ + void disconnect() { + signal->calls.clear(); + } + +private: + difference_type offset; + signal_type *signal; +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the function type of a sink directly from the signal it + * refers to. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +sink(sigh &) ENTT_NOEXCEPT -> sink; + + +} + + +#endif + +// #include "entity.hpp" +#ifndef ENTT_ENTITY_ENTITY_HPP +#define ENTT_ENTITY_ENTITY_HPP + + +#include +#include +#include +// #include "../config/config.h" + + + +namespace entt { + + +/** + * @brief Entity traits. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is an accepted entity type. + */ +template +struct entt_traits; + + +/** + * @brief Entity traits for enumeration types. + * @tparam Type The type to check. + */ +template +struct entt_traits>> + : entt_traits> +{}; + + +/** + * @brief Entity traits for a 16 bits entity identifier. + * + * A 16 bits entity identifier guarantees: + * + * * 12 bits for the entity number (up to 4k entities). + * * 4 bit for the version (resets in [0-15]). + */ +template<> +struct entt_traits { + /*! @brief Underlying entity type. */ + using entity_type = std::uint16_t; + /*! @brief Underlying version type. */ + using version_type = std::uint8_t; + /*! @brief Difference type. */ + using difference_type = std::int16_t; + + /*! @brief Mask to use to get the entity number out of an identifier. */ + static constexpr entity_type entity_mask = 0xFFF; + /*! @brief Mask to use to get the version out of an identifier. */ + static constexpr entity_type version_mask = 0xF; + /*! @brief Extent of the entity number within an identifier. */ + static constexpr std::size_t entity_shift = 12u; +}; + + +/** + * @brief Entity traits for a 32 bits entity identifier. + * + * A 32 bits entity identifier guarantees: + * + * * 20 bits for the entity number (suitable for almost all the games). + * * 12 bit for the version (resets in [0-4095]). + */ +template<> +struct entt_traits { + /*! @brief Underlying entity type. */ + using entity_type = std::uint32_t; + /*! @brief Underlying version type. */ + using version_type = std::uint16_t; + /*! @brief Difference type. */ + using difference_type = std::int32_t; + + /*! @brief Mask to use to get the entity number out of an identifier. */ + static constexpr entity_type entity_mask = 0xFFFFF; + /*! @brief Mask to use to get the version out of an identifier. */ + static constexpr entity_type version_mask = 0xFFF; + /*! @brief Extent of the entity number within an identifier. */ + static constexpr std::size_t entity_shift = 20u; +}; + + +/** + * @brief Entity traits for a 64 bits entity identifier. + * + * A 64 bits entity identifier guarantees: + * + * * 32 bits for the entity number (an indecently large number). + * * 32 bit for the version (an indecently large number). + */ +template<> +struct entt_traits { + /*! @brief Underlying entity type. */ + using entity_type = std::uint64_t; + /*! @brief Underlying version type. */ + using version_type = std::uint32_t; + /*! @brief Difference type. */ + using difference_type = std::int64_t; + + /*! @brief Mask to use to get the entity number out of an identifier. */ + static constexpr entity_type entity_mask = 0xFFFFFFFF; + /*! @brief Mask to use to get the version out of an identifier. */ + static constexpr entity_type version_mask = 0xFFFFFFFF; + /*! @brief Extent of the entity number within an identifier. */ + static constexpr std::size_t entity_shift = 32u; +}; + + +/** + * @brief Converts an entity type to its underlying type. + * @tparam Entity The value type. + * @param entity The value to convert. + * @return The integral representation of the given value. + */ +template +[[nodiscard]] constexpr auto to_integral(const Entity entity) ENTT_NOEXCEPT { + return static_cast::entity_type>(entity); +} + + +/*! @brief Null object for all entity identifiers. */ +struct null_t { + /** + * @brief Converts the null object to identifiers of any type. + * @tparam Entity Type of entity identifier. + * @return The null representation for the given identifier. + */ + template + [[nodiscard]] constexpr operator Entity() const ENTT_NOEXCEPT { + return Entity{entt_traits::entity_mask}; + } + + /** + * @brief Compares two null objects. + * @return True in all cases. + */ + [[nodiscard]] constexpr bool operator==(null_t) const ENTT_NOEXCEPT { + return true; + } + + /** + * @brief Compares two null objects. + * @return False in all cases. + */ + [[nodiscard]] constexpr bool operator!=(null_t) const ENTT_NOEXCEPT { + return false; + } + + /** + * @brief Compares a null object and an entity identifier of any type. + * @tparam Entity Type of entity identifier. + * @param entity Entity identifier with which to compare. + * @return False if the two elements differ, true otherwise. + */ + template + [[nodiscard]] constexpr bool operator==(const Entity entity) const ENTT_NOEXCEPT { + return (to_integral(entity) & entt_traits::entity_mask) == to_integral(static_cast(*this)); + } + + /** + * @brief Compares a null object and an entity identifier of any type. + * @tparam Entity Type of entity identifier. + * @param entity Entity identifier with which to compare. + * @return True if the two elements differ, false otherwise. + */ + template + [[nodiscard]] constexpr bool operator!=(const Entity entity) const ENTT_NOEXCEPT { + return !(entity == *this); + } +}; + + +/** + * @brief Compares a null object and an entity identifier of any type. + * @tparam Entity Type of entity identifier. + * @param entity Entity identifier with which to compare. + * @param other A null object yet to be converted. + * @return False if the two elements differ, true otherwise. + */ +template +[[nodiscard]] constexpr bool operator==(const Entity entity, null_t other) ENTT_NOEXCEPT { + return other.operator==(entity); +} + + +/** + * @brief Compares a null object and an entity identifier of any type. + * @tparam Entity Type of entity identifier. + * @param entity Entity identifier with which to compare. + * @param other A null object yet to be converted. + * @return True if the two elements differ, false otherwise. + */ +template +[[nodiscard]] constexpr bool operator!=(const Entity entity, null_t other) ENTT_NOEXCEPT { + return !(other == entity); +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Compile-time constant for null entities. + * + * There exist implicit conversions from this variable to entity identifiers of + * any allowed type. Similarly, there exist comparision operators between the + * null entity and any other entity identifier. + */ +inline constexpr null_t null{}; + + +} + + +#endif + +// #include "fwd.hpp" +#ifndef ENTT_ENTITY_FWD_HPP +#define ENTT_ENTITY_FWD_HPP + + +// #include "../core/fwd.hpp" + + + +namespace entt { + + +template +class basic_registry; + + +template +class basic_view; + + +template +class basic_runtime_view; + + +template +class basic_group; + + +template +class basic_observer; + + +template +struct basic_actor; + + +template +struct basic_handle; + + +template +class basic_snapshot; + + +template +class basic_snapshot_loader; + + +template +class basic_continuous_loader; + + +/*! @brief Default entity identifier. */ +enum class entity: id_type {}; + + +/*! @brief Alias declaration for the most common use case. */ +using registry = basic_registry; + + +/*! @brief Alias declaration for the most common use case. */ +using observer = basic_observer; + + +/*! @brief Alias declaration for the most common use case. */ +using actor [[deprecated("Consider using the handle class instead")]] = basic_actor; + + +/*! @brief Alias declaration for the most common use case. */ +using handle = basic_handle; + + +/*! @brief Alias declaration for the most common use case. */ +using const_handle = basic_handle; + + +/*! @brief Alias declaration for the most common use case. */ +using snapshot = basic_snapshot; + + +/*! @brief Alias declaration for the most common use case. */ +using snapshot_loader = basic_snapshot_loader; + + +/*! @brief Alias declaration for the most common use case. */ +using continuous_loader = basic_continuous_loader; + + +/** + * @brief Alias declaration for the most common use case. + * @tparam Types Types of components iterated by the view. + */ +template +using view = basic_view; + + +/*! @brief Alias declaration for the most common use case. */ +using runtime_view = basic_runtime_view; + + +/** + * @brief Alias declaration for the most common use case. + * @tparam Types Types of components iterated by the group. + */ +template +using group = basic_group; + + +} + + +#endif + +// #include "group.hpp" +#ifndef ENTT_ENTITY_GROUP_HPP +#define ENTT_ENTITY_GROUP_HPP + + +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/type_traits.hpp" + +// #include "entity.hpp" + +// #include "fwd.hpp" + +// #include "pool.hpp" +#ifndef ENTT_ENTITY_POOL_HPP +#define ENTT_ENTITY_POOL_HPP + + +#include +// #include "storage.hpp" +#ifndef ENTT_ENTITY_STORAGE_HPP +#define ENTT_ENTITY_STORAGE_HPP + + +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/algorithm.hpp" + +// #include "../core/type_traits.hpp" + +// #include "entity.hpp" + +// #include "sparse_set.hpp" +#ifndef ENTT_ENTITY_SPARSE_SET_HPP +#define ENTT_ENTITY_SPARSE_SET_HPP + + +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/algorithm.hpp" + +// #include "entity.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Basic sparse set implementation. + * + * Sparse set or packed array or whatever is the name users give it.
+ * Two arrays: an _external_ one and an _internal_ one; a _sparse_ one and a + * _packed_ one; one used for direct access through contiguous memory, the other + * one used to get the data through an extra level of indirection.
+ * This is largely used by the registry to offer users the fastest access ever + * to the components. Views and groups in general are almost entirely designed + * around sparse sets. + * + * This type of data structure is widely documented in the literature and on the + * web. This is nothing more than a customized implementation suitable for the + * purpose of the framework. + * + * @note + * Internal data structures arrange elements to maximize performance. There are + * no guarantees that entities are returned in the insertion order when iterate + * a sparse set. Do not make assumption on the order in any case. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class sparse_set { + static_assert(ENTT_PAGE_SIZE && ((ENTT_PAGE_SIZE & (ENTT_PAGE_SIZE - 1)) == 0), "ENTT_PAGE_SIZE must be a power of two"); + static constexpr auto entt_per_page = ENTT_PAGE_SIZE / sizeof(Entity); + + using traits_type = entt_traits; + using page_type = std::unique_ptr; + + class sparse_set_iterator final { + friend class sparse_set; + + using packed_type = std::vector; + using index_type = typename traits_type::difference_type; + + sparse_set_iterator(const packed_type &ref, const index_type idx) ENTT_NOEXCEPT + : packed{&ref}, index{idx} + {} + + public: + using difference_type = index_type; + using value_type = Entity; + using pointer = const value_type *; + using reference = const value_type &; + using iterator_category = std::random_access_iterator_tag; + + sparse_set_iterator() ENTT_NOEXCEPT = default; + + sparse_set_iterator & operator++() ENTT_NOEXCEPT { + return --index, *this; + } + + sparse_set_iterator operator++(int) ENTT_NOEXCEPT { + iterator orig = *this; + return ++(*this), orig; + } + + sparse_set_iterator & operator--() ENTT_NOEXCEPT { + return ++index, *this; + } + + sparse_set_iterator operator--(int) ENTT_NOEXCEPT { + sparse_set_iterator orig = *this; + return operator--(), orig; + } + + sparse_set_iterator & operator+=(const difference_type value) ENTT_NOEXCEPT { + index -= value; + return *this; + } + + sparse_set_iterator operator+(const difference_type value) const ENTT_NOEXCEPT { + sparse_set_iterator copy = *this; + return (copy += value); + } + + sparse_set_iterator & operator-=(const difference_type value) ENTT_NOEXCEPT { + return (*this += -value); + } + + sparse_set_iterator operator-(const difference_type value) const ENTT_NOEXCEPT { + return (*this + -value); + } + + difference_type operator-(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return other.index - index; + } + + [[nodiscard]] reference operator[](const difference_type value) const { + const auto pos = size_type(index-value-1u); + return (*packed)[pos]; + } + + [[nodiscard]] bool operator==(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return other.index == index; + } + + [[nodiscard]] bool operator!=(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + [[nodiscard]] bool operator<(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return index > other.index; + } + + [[nodiscard]] bool operator>(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return index < other.index; + } + + [[nodiscard]] bool operator<=(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return !(*this > other); + } + + [[nodiscard]] bool operator>=(const sparse_set_iterator &other) const ENTT_NOEXCEPT { + return !(*this < other); + } + + [[nodiscard]] pointer operator->() const { + const auto pos = size_type(index-1u); + return &(*packed)[pos]; + } + + [[nodiscard]] reference operator*() const { + return *operator->(); + } + + private: + const packed_type *packed; + index_type index; + }; + + [[nodiscard]] auto page(const Entity entt) const ENTT_NOEXCEPT { + return size_type{(to_integral(entt) & traits_type::entity_mask) / entt_per_page}; + } + + [[nodiscard]] auto offset(const Entity entt) const ENTT_NOEXCEPT { + return size_type{to_integral(entt) & (entt_per_page - 1)}; + } + + [[nodiscard]] page_type & assure(const std::size_t pos) { + if(!(pos < sparse.size())) { + sparse.resize(pos+1); + } + + if(!sparse[pos]) { + sparse[pos].reset(new entity_type[entt_per_page]); + // null is safe in all cases for our purposes + for(auto *first = sparse[pos].get(), *last = first + entt_per_page; first != last; ++first) { + *first = null; + } + } + + return sparse[pos]; + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Random access iterator type. */ + using iterator = sparse_set_iterator; + /*! @brief Reverse iterator type. */ + using reverse_iterator = const entity_type *; + + /*! @brief Default constructor. */ + sparse_set() = default; + + /*! @brief Default move constructor. */ + sparse_set(sparse_set &&) = default; + + /*! @brief Default destructor. */ + virtual ~sparse_set() = default; + + /*! @brief Default move assignment operator. @return This sparse set. */ + sparse_set & operator=(sparse_set &&) = default; + + /** + * @brief Increases the capacity of a sparse set. + * + * If the new capacity is greater than the current capacity, new storage is + * allocated, otherwise the method does nothing. + * + * @param cap Desired capacity. + */ + void reserve(const size_type cap) { + packed.reserve(cap); + } + + /** + * @brief Returns the number of elements that a sparse set has currently + * allocated space for. + * @return Capacity of the sparse set. + */ + [[nodiscard]] size_type capacity() const ENTT_NOEXCEPT { + return packed.capacity(); + } + + /*! @brief Requests the removal of unused capacity. */ + void shrink_to_fit() { + // conservative approach + if(packed.empty()) { + sparse.clear(); + } + + sparse.shrink_to_fit(); + packed.shrink_to_fit(); + } + + /** + * @brief Returns the extent of a sparse set. + * + * The extent of a sparse set is also the size of the internal sparse array. + * There is no guarantee that the internal packed array has the same size. + * Usually the size of the internal sparse array is equal or greater than + * the one of the internal packed array. + * + * @return Extent of the sparse set. + */ + [[nodiscard]] size_type extent() const ENTT_NOEXCEPT { + return sparse.size() * entt_per_page; + } + + /** + * @brief Returns the number of elements in a sparse set. + * + * The number of elements is also the size of the internal packed array. + * There is no guarantee that the internal sparse array has the same size. + * Usually the size of the internal sparse array is equal or greater than + * the one of the internal packed array. + * + * @return Number of elements. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return packed.size(); + } + + /** + * @brief Checks whether a sparse set is empty. + * @return True if the sparse set is empty, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return packed.empty(); + } + + /** + * @brief Direct access to the internal packed array. + * + * The returned pointer is such that range `[data(), data() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the internal packed array. + */ + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return packed.data(); + } + + /** + * @brief Returns an iterator to the beginning. + * + * The returned iterator points to the first entity of the internal packed + * array. If the sparse set is empty, the returned iterator will be equal to + * `end()`. + * + * @return An iterator to the first entity of the internal packed array. + */ + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + const typename traits_type::difference_type pos = packed.size(); + return iterator{packed, pos}; + } + + /** + * @brief Returns an iterator to the end. + * + * The returned iterator points to the element following the last entity in + * the internal packed array. Attempting to dereference the returned + * iterator results in undefined behavior. + * + * @return An iterator to the element following the last entity of the + * internal packed array. + */ + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return iterator{packed, {}}; + } + + /** + * @brief Returns a reverse iterator to the beginning. + * + * The returned iterator points to the first entity of the reversed internal + * packed array. If the sparse set is empty, the returned iterator will be + * equal to `rend()`. + * + * @return An iterator to the first entity of the reversed internal packed + * array. + */ + [[nodiscard]] reverse_iterator rbegin() const ENTT_NOEXCEPT { + return packed.data(); + } + + /** + * @brief Returns a reverse iterator to the end. + * + * The returned iterator points to the element following the last entity in + * the reversed internal packed array. Attempting to dereference the + * returned iterator results in undefined behavior. + * + * @return An iterator to the element following the last entity of the + * reversed internal packed array. + */ + [[nodiscard]] reverse_iterator rend() const ENTT_NOEXCEPT { + return rbegin() + packed.size(); + } + + /** + * @brief Finds an entity. + * @param entt A valid entity identifier. + * @return An iterator to the given entity if it's found, past the end + * iterator otherwise. + */ + [[nodiscard]] iterator find(const entity_type entt) const { + return contains(entt) ? --(end() - index(entt)) : end(); + } + + /** + * @brief Checks if a sparse set contains an entity. + * @param entt A valid entity identifier. + * @return True if the sparse set contains the entity, false otherwise. + */ + [[nodiscard]] bool contains(const entity_type entt) const { + const auto curr = page(entt); + // testing against null permits to avoid accessing the packed array + return (curr < sparse.size() && sparse[curr] && sparse[curr][offset(entt)] != null); + } + + /** + * @brief Returns the position of an entity in a sparse set. + * + * @warning + * Attempting to get the position of an entity that doesn't belong to the + * sparse set results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * sparse set doesn't contain the given entity. + * + * @param entt A valid entity identifier. + * @return The position of the entity in the sparse set. + */ + [[nodiscard]] size_type index(const entity_type entt) const { + ENTT_ASSERT(contains(entt)); + return size_type{to_integral(sparse[page(entt)][offset(entt)])}; + } + + /** + * @brief Assigns an entity to a sparse set. + * + * @warning + * Attempting to assign an entity that already belongs to the sparse set + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * sparse set already contains the given entity. + * + * @param entt A valid entity identifier. + */ + void emplace(const entity_type entt) { + ENTT_ASSERT(!contains(entt)); + assure(page(entt))[offset(entt)] = entity_type(static_cast(packed.size())); + packed.push_back(entt); + } + + /** + * @brief Assigns one or more entities to a sparse set. + * + * @warning + * Attempting to assign an entity that already belongs to the sparse set + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * sparse set already contains the given entity. + * + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + */ + template + void insert(It first, It last) { + auto next = static_cast(packed.size()); + packed.insert(packed.end(), first, last); + + while(first != last) { + const auto entt = *(first++); + ENTT_ASSERT(!contains(entt)); + assure(page(entt))[offset(entt)] = entity_type(next++); + } + } + + /** + * @brief Removes an entity from a sparse set. + * + * @warning + * Attempting to remove an entity that doesn't belong to the sparse set + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * sparse set doesn't contain the given entity. + * + * @param entt A valid entity identifier. + */ + void erase(const entity_type entt) { + ENTT_ASSERT(contains(entt)); + const auto curr = page(entt); + const auto pos = offset(entt); + packed[size_type{to_integral(sparse[curr][pos])}] = packed.back(); + sparse[page(packed.back())][offset(packed.back())] = sparse[curr][pos]; + sparse[curr][pos] = null; + packed.pop_back(); + } + + /** + * @brief Swaps two entities in the internal packed array. + * + * For what it's worth, this function affects both the internal sparse array + * and the internal packed array. Users should not care of that anyway. + * + * @warning + * Attempting to swap entities that don't belong to the sparse set results + * in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * sparse set doesn't contain the given entities. + * + * @param lhs A valid entity identifier. + * @param rhs A valid entity identifier. + */ + virtual void swap(const entity_type lhs, const entity_type rhs) { + auto &from = sparse[page(lhs)][offset(lhs)]; + auto &to = sparse[page(rhs)][offset(rhs)]; + std::swap(packed[size_type{to_integral(from)}], packed[size_type{to_integral(to)}]); + std::swap(from, to); + } + + /** + * @brief Sort elements according to the given comparison function. + * + * Sort the elements so that iterating the range with a couple of iterators + * returns them in the expected order. See `begin` and `end` for more + * details. + * + * The comparison function object must return `true` if the first element + * is _less_ than the second one, `false` otherwise. The signature of the + * comparison function should be equivalent to the following: + * + * @code{.cpp} + * bool(const Entity, const Entity); + * @endcode + * + * Moreover, the comparison function object shall induce a + * _strict weak ordering_ on the values. + * + * The sort function oject must offer a member function template + * `operator()` that accepts three arguments: + * + * * An iterator to the first element of the range to sort. + * * An iterator past the last element of the range to sort. + * * A comparison function to use to compare the elements. + * + * @tparam Compare Type of comparison function object. + * @tparam Sort Type of sort function object. + * @tparam Args Types of arguments to forward to the sort function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param compare A valid comparison function object. + * @param algo A valid sort function object. + * @param args Arguments to forward to the sort function object, if any. + */ + template + void sort(iterator first, iterator last, Compare compare, Sort algo = Sort{}, Args &&... args) { + ENTT_ASSERT(!(last < first)); + ENTT_ASSERT(!(last > end())); + + const auto length = std::distance(first, last); + const auto skip = std::distance(last, end()); + const auto to = packed.rend() - skip; + const auto from = to - length; + + algo(from, to, std::move(compare), std::forward(args)...); + + for(size_type pos = skip, end = skip+length; pos < end; ++pos) { + sparse[page(packed[pos])][offset(packed[pos])] = entity_type(static_cast(pos)); + } + } + + /** + * @brief Sort elements according to the given comparison function. + * + * @sa sort + * + * This function is a slightly slower version of `sort` that invokes the + * caller to indicate which entities are swapped.
+ * It's recommended when the caller wants to sort its own data structures to + * align them with the order induced in the sparse set. + * + * The signature of the callback should be equivalent to the following: + * + * @code{.cpp} + * bool(const Entity, const Entity); + * @endcode + * + * @tparam Apply Type of function object to invoke to notify the caller. + * @tparam Compare Type of comparison function object. + * @tparam Sort Type of sort function object. + * @tparam Args Types of arguments to forward to the sort function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param apply A valid function object to use as a callback. + * @param compare A valid comparison function object. + * @param algo A valid sort function object. + * @param args Arguments to forward to the sort function object, if any. + */ + template + void arrange(iterator first, iterator last, Apply apply, Compare compare, Sort algo = Sort{}, Args &&... args) { + ENTT_ASSERT(!(last < first)); + ENTT_ASSERT(!(last > end())); + + const auto length = std::distance(first, last); + const auto skip = std::distance(last, end()); + const auto to = packed.rend() - skip; + const auto from = to - length; + + algo(from, to, std::move(compare), std::forward(args)...); + + for(size_type pos = skip, end = skip+length; pos < end; ++pos) { + auto curr = pos; + auto next = index(packed[curr]); + + while(curr != next) { + apply(packed[curr], packed[next]); + sparse[page(packed[curr])][offset(packed[curr])] = entity_type(static_cast(curr)); + + curr = next; + next = index(packed[curr]); + } + } + } + + /** + * @brief Sort entities according to their order in another sparse set. + * + * Entities that are part of both the sparse sets are ordered internally + * according to the order they have in `other`. All the other entities goes + * to the end of the list and there are no guarantees on their order.
+ * In other terms, this function can be used to impose the same order on two + * sets by using one of them as a master and the other one as a slave. + * + * Iterating the sparse set with a couple of iterators returns elements in + * the expected order after a call to `respect`. See `begin` and `end` for + * more details. + * + * @param other The sparse sets that imposes the order of the entities. + */ + void respect(const sparse_set &other) { + const auto to = other.end(); + auto from = other.begin(); + + size_type pos = packed.size() - 1; + + while(pos && from != to) { + if(contains(*from)) { + if(*from != packed[pos]) { + swap(packed[pos], *from); + } + + --pos; + } + + ++from; + } + } + + /** + * @brief Clears a sparse set. + */ + void clear() ENTT_NOEXCEPT { + sparse.clear(); + packed.clear(); + } + +private: + std::vector sparse; + std::vector packed; +}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Basic storage implementation. + * + * This class is a refinement of a sparse set that associates an object to an + * entity. The main purpose of this class is to extend sparse sets to store + * components in a registry. It guarantees fast access both to the elements and + * to the entities. + * + * @note + * Entities and objects have the same order. It's guaranteed both in case of raw + * access (either to entities or objects) and when using random or input access + * iterators. + * + * @note + * Internal data structures arrange elements to maximize performance. There are + * no guarantees that objects are returned in the insertion order when iterate + * a storage. Do not make assumption on the order in any case. + * + * @warning + * Empty types aren't explicitly instantiated. Therefore, many of the functions + * normally available for non-empty types will not be available for empty ones. + * + * @sa sparse_set + * + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Type Type of objects assigned to the entities. + */ +template> +class storage: public sparse_set { + static_assert(std::is_move_constructible_v && std::is_move_assignable_v, "The managed type must be at least move constructible and assignable"); + + using underlying_type = sparse_set; + using traits_type = entt_traits; + + template + class storage_iterator final { + friend class storage; + + using instance_type = std::conditional_t, std::vector>; + using index_type = typename traits_type::difference_type; + + storage_iterator(instance_type &ref, const index_type idx) ENTT_NOEXCEPT + : instances{&ref}, index{idx} + {} + + public: + using difference_type = index_type; + using value_type = Type; + using pointer = std::conditional_t; + using reference = std::conditional_t; + using iterator_category = std::random_access_iterator_tag; + + storage_iterator() ENTT_NOEXCEPT = default; + + storage_iterator & operator++() ENTT_NOEXCEPT { + return --index, *this; + } + + storage_iterator operator++(int) ENTT_NOEXCEPT { + storage_iterator orig = *this; + return ++(*this), orig; + } + + storage_iterator & operator--() ENTT_NOEXCEPT { + return ++index, *this; + } + + storage_iterator operator--(int) ENTT_NOEXCEPT { + storage_iterator orig = *this; + return operator--(), orig; + } + + storage_iterator & operator+=(const difference_type value) ENTT_NOEXCEPT { + index -= value; + return *this; + } + + storage_iterator operator+(const difference_type value) const ENTT_NOEXCEPT { + storage_iterator copy = *this; + return (copy += value); + } + + storage_iterator & operator-=(const difference_type value) ENTT_NOEXCEPT { + return (*this += -value); + } + + storage_iterator operator-(const difference_type value) const ENTT_NOEXCEPT { + return (*this + -value); + } + + difference_type operator-(const storage_iterator &other) const ENTT_NOEXCEPT { + return other.index - index; + } + + [[nodiscard]] reference operator[](const difference_type value) const ENTT_NOEXCEPT { + const auto pos = size_type(index-value-1); + return (*instances)[pos]; + } + + [[nodiscard]] bool operator==(const storage_iterator &other) const ENTT_NOEXCEPT { + return other.index == index; + } + + [[nodiscard]] bool operator!=(const storage_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + [[nodiscard]] bool operator<(const storage_iterator &other) const ENTT_NOEXCEPT { + return index > other.index; + } + + [[nodiscard]] bool operator>(const storage_iterator &other) const ENTT_NOEXCEPT { + return index < other.index; + } + + [[nodiscard]] bool operator<=(const storage_iterator &other) const ENTT_NOEXCEPT { + return !(*this > other); + } + + [[nodiscard]] bool operator>=(const storage_iterator &other) const ENTT_NOEXCEPT { + return !(*this < other); + } + + [[nodiscard]] pointer operator->() const ENTT_NOEXCEPT { + const auto pos = size_type(index-1u); + return &(*instances)[pos]; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return *operator->(); + } + + private: + instance_type *instances; + index_type index; + }; + +public: + /*! @brief Type of the objects associated with the entities. */ + using object_type = Type; + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Random access iterator type. */ + using iterator = storage_iterator; + /*! @brief Constant random access iterator type. */ + using const_iterator = storage_iterator; + /*! @brief Reverse iterator type. */ + using reverse_iterator = Type *; + /*! @brief Constant reverse iterator type. */ + using const_reverse_iterator = const Type *; + + + /** + * @brief Increases the capacity of a storage. + * + * If the new capacity is greater than the current capacity, new storage is + * allocated, otherwise the method does nothing. + * + * @param cap Desired capacity. + */ + void reserve(const size_type cap) { + underlying_type::reserve(cap); + instances.reserve(cap); + } + + /*! @brief Requests the removal of unused capacity. */ + void shrink_to_fit() { + underlying_type::shrink_to_fit(); + instances.shrink_to_fit(); + } + + /** + * @brief Direct access to the array of objects. + * + * The returned pointer is such that range `[raw(), raw() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Objects are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the array of objects. + */ + [[nodiscard]] const object_type * raw() const ENTT_NOEXCEPT { + return instances.data(); + } + + /*! @copydoc raw */ + [[nodiscard]] object_type * raw() ENTT_NOEXCEPT { + return const_cast(std::as_const(*this).raw()); + } + + /** + * @brief Returns an iterator to the beginning. + * + * The returned iterator points to the first instance of the internal array. + * If the storage is empty, the returned iterator will be equal to `end()`. + * + * @return An iterator to the first instance of the internal array. + */ + [[nodiscard]] const_iterator cbegin() const ENTT_NOEXCEPT { + const typename traits_type::difference_type pos = underlying_type::size(); + return const_iterator{instances, pos}; + } + + /*! @copydoc cbegin */ + [[nodiscard]] const_iterator begin() const ENTT_NOEXCEPT { + return cbegin(); + } + + /*! @copydoc begin */ + [[nodiscard]] iterator begin() ENTT_NOEXCEPT { + const typename traits_type::difference_type pos = underlying_type::size(); + return iterator{instances, pos}; + } + + /** + * @brief Returns an iterator to the end. + * + * The returned iterator points to the element following the last instance + * of the internal array. Attempting to dereference the returned iterator + * results in undefined behavior. + * + * @return An iterator to the element following the last instance of the + * internal array. + */ + [[nodiscard]] const_iterator cend() const ENTT_NOEXCEPT { + return const_iterator{instances, {}}; + } + + /*! @copydoc cend */ + [[nodiscard]] const_iterator end() const ENTT_NOEXCEPT { + return cend(); + } + + /*! @copydoc end */ + [[nodiscard]] iterator end() ENTT_NOEXCEPT { + return iterator{instances, {}}; + } + + /** + * @brief Returns a reverse iterator to the beginning. + * + * The returned iterator points to the first instance of the reversed + * internal array. If the storage is empty, the returned iterator will be + * equal to `rend()`. + * + * @return An iterator to the first instance of the reversed internal array. + */ + [[nodiscard]] const_reverse_iterator crbegin() const ENTT_NOEXCEPT { + return instances.data(); + } + + /*! @copydoc crbegin */ + [[nodiscard]] const_reverse_iterator rbegin() const ENTT_NOEXCEPT { + return crbegin(); + } + + /*! @copydoc rbegin */ + [[nodiscard]] reverse_iterator rbegin() ENTT_NOEXCEPT { + return instances.data(); + } + + /** + * @brief Returns a reverse iterator to the end. + * + * The returned iterator points to the element following the last instance + * of the reversed internal array. Attempting to dereference the returned + * iterator results in undefined behavior. + * + * @return An iterator to the element following the last instance of the + * reversed internal array. + */ + [[nodiscard]] const_reverse_iterator crend() const ENTT_NOEXCEPT { + return crbegin() + instances.size(); + } + + /*! @copydoc crend */ + [[nodiscard]] const_reverse_iterator rend() const ENTT_NOEXCEPT { + return crend(); + } + + /*! @copydoc rend */ + [[nodiscard]] reverse_iterator rend() ENTT_NOEXCEPT { + return rbegin() + instances.size(); + } + + /** + * @brief Returns the object associated with an entity. + * + * @warning + * Attempting to use an entity that doesn't belong to the storage results in + * undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * storage doesn't contain the given entity. + * + * @param entt A valid entity identifier. + * @return The object associated with the entity. + */ + [[nodiscard]] const object_type & get(const entity_type entt) const { + return instances[underlying_type::index(entt)]; + } + + /*! @copydoc get */ + [[nodiscard]] object_type & get(const entity_type entt) { + return const_cast(std::as_const(*this).get(entt)); + } + + /** + * @brief Returns a pointer to the object associated with an entity, if any. + * @param entt A valid entity identifier. + * @return The object associated with the entity, if any. + */ + [[nodiscard]] const object_type * try_get(const entity_type entt) const { + return underlying_type::contains(entt) ? (instances.data() + underlying_type::index(entt)) : nullptr; + } + + /*! @copydoc try_get */ + [[nodiscard]] object_type * try_get(const entity_type entt) { + return const_cast(std::as_const(*this).try_get(entt)); + } + + /** + * @brief Assigns an entity to a storage and constructs its object. + * + * This version accept both types that can be constructed in place directly + * and types like aggregates that do not work well with a placement new as + * performed usually under the hood during an _emplace back_. + * + * @warning + * Attempting to use an entity that already belongs to the storage results + * in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * storage already contains the given entity. + * + * @tparam Args Types of arguments to use to construct the object. + * @param entt A valid entity identifier. + * @param args Parameters to use to construct an object for the entity. + */ + template + void emplace(const entity_type entt, Args &&... args) { + if constexpr(std::is_aggregate_v) { + instances.push_back(Type{std::forward(args)...}); + } else { + instances.emplace_back(std::forward(args)...); + } + + // entity goes after component in case constructor throws + underlying_type::emplace(entt); + } + + /** + * @brief Assigns one or more entities to a storage and constructs their + * objects from a given instance. + * + * @warning + * Attempting to assign an entity that already belongs to the storage + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * storage already contains the given entity. + * + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + * @param value An instance of the object to construct. + */ + template + void insert(It first, It last, const object_type &value = {}) { + instances.insert(instances.end(), std::distance(first, last), value); + // entities go after components in case constructors throw + underlying_type::insert(first, last); + } + + /** + * @brief Assigns one or more entities to a storage and constructs their + * objects from a given range. + * + * @sa construct + * + * @tparam EIt Type of input iterator. + * @tparam CIt Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + * @param from An iterator to the first element of the range of objects. + * @param to An iterator past the last element of the range of objects. + */ + template + void insert(EIt first, EIt last, CIt from, CIt to) { + instances.insert(instances.end(), from, to); + // entities go after components in case constructors throw + underlying_type::insert(first, last); + } + + /** + * @brief Removes an entity from a storage and destroys its object. + * + * @warning + * Attempting to use an entity that doesn't belong to the storage results in + * undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * storage doesn't contain the given entity. + * + * @param entt A valid entity identifier. + */ + void erase(const entity_type entt) { + auto other = std::move(instances.back()); + instances[underlying_type::index(entt)] = std::move(other); + instances.pop_back(); + underlying_type::erase(entt); + } + + /** + * @brief Swaps entities and objects in the internal packed arrays. + * + * @warning + * Attempting to swap entities that don't belong to the sparse set results + * in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * sparse set doesn't contain the given entities. + * + * @param lhs A valid entity identifier. + * @param rhs A valid entity identifier. + */ + void swap(const entity_type lhs, const entity_type rhs) override { + std::swap(instances[underlying_type::index(lhs)], instances[underlying_type::index(rhs)]); + underlying_type::swap(lhs, rhs); + } + + /** + * @brief Sort elements according to the given comparison function. + * + * Sort the elements so that iterating the range with a couple of iterators + * returns them in the expected order. See `begin` and `end` for more + * details. + * + * The comparison function object must return `true` if the first element + * is _less_ than the second one, `false` otherwise. The signature of the + * comparison function should be equivalent to one of the following: + * + * @code{.cpp} + * bool(const Entity, const Entity); + * bool(const Type &, const Type &); + * @endcode + * + * Moreover, the comparison function object shall induce a + * _strict weak ordering_ on the values. + * + * The sort function oject must offer a member function template + * `operator()` that accepts three arguments: + * + * * An iterator to the first element of the range to sort. + * * An iterator past the last element of the range to sort. + * * A comparison function to use to compare the elements. + * + * @warning + * Empty types are never instantiated. Therefore, only comparison function + * objects that require to return entities rather than components are + * accepted. + * + * @tparam Compare Type of comparison function object. + * @tparam Sort Type of sort function object. + * @tparam Args Types of arguments to forward to the sort function object. + * @param first An iterator to the first element of the range to sort. + * @param last An iterator past the last element of the range to sort. + * @param compare A valid comparison function object. + * @param algo A valid sort function object. + * @param args Arguments to forward to the sort function object, if any. + */ + template + void sort(iterator first, iterator last, Compare compare, Sort algo = Sort{}, Args &&... args) { + ENTT_ASSERT(!(last < first)); + ENTT_ASSERT(!(last > end())); + + const auto from = underlying_type::begin() + std::distance(begin(), first); + const auto to = from + std::distance(first, last); + + const auto apply = [this](const auto lhs, const auto rhs) { + std::swap(instances[underlying_type::index(lhs)], instances[underlying_type::index(rhs)]); + }; + + if constexpr(std::is_invocable_v) { + underlying_type::arrange(from, to, std::move(apply), [this, compare = std::move(compare)](const auto lhs, const auto rhs) { + return compare(std::as_const(instances[underlying_type::index(lhs)]), std::as_const(instances[underlying_type::index(rhs)])); + }, std::move(algo), std::forward(args)...); + } else { + underlying_type::arrange(from, to, std::move(apply), std::move(compare), std::move(algo), std::forward(args)...); + } + } + + /*! @brief Clears a storage. */ + void clear() { + underlying_type::clear(); + instances.clear(); + } + +private: + std::vector instances; +}; + + +/*! @copydoc storage */ +template +class storage>>: public sparse_set { + using underlying_type = sparse_set; + +public: + /*! @brief Type of the objects associated with the entities. */ + using object_type = Type; + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + + /** + * @brief Assigns an entity to a storage and constructs its object. + * + * @warning + * Attempting to use an entity that already belongs to the storage results + * in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * storage already contains the given entity. + * + * @tparam Args Types of arguments to use to construct the object. + * @param entt A valid entity identifier. + * @param args Parameters to use to construct an object for the entity. + */ + template + void emplace(const entity_type entt, Args &&... args) { + [[maybe_unused]] object_type instance{std::forward(args)...}; + underlying_type::emplace(entt); + } + + /** + * @brief Assigns one or more entities to a storage. + * + * @warning + * Attempting to assign an entity that already belongs to the storage + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * storage already contains the given entity. + * + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + */ + template + void insert(It first, It last, const object_type & = {}) { + underlying_type::insert(first, last); + } +}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Applies component-to-pool conversion and defines the resulting type as + * the member typedef type. + * + * Formally: + * + * * If the component type is a non-const one, the member typedef type is the + * declared storage type. + * * If the component type is a const one, the member typedef type is the + * declared storage type, except it has a const-qualifier added. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Type Type of objects assigned to the entities. + */ +template +struct pool { + /*! @brief Resulting type after component-to-pool conversion. */ + using type = storage; +}; + + +/*! @copydoc pool */ +template +struct pool { + /*! @brief Resulting type after component-to-pool conversion. */ + using type = std::add_const_t>::type>; +}; + + +/** + * @brief Alias declaration to use to make component-to-pool conversions. + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Type Type of objects assigned to the entities. + */ +template +using pool_t = typename pool::type; + + +} + + +#endif + +// #include "sparse_set.hpp" + +// #include "utility.hpp" +#ifndef ENTT_ENTITY_UTILITY_HPP +#define ENTT_ENTITY_UTILITY_HPP + + +// #include "../core/type_traits.hpp" + + + +namespace entt { + + +/** + * @brief Alias for exclusion lists. + * @tparam Type List of types. + */ +template +struct exclude_t: type_list {}; + + +/** + * @brief Variable template for exclusion lists. + * @tparam Type List of types. + */ +template +inline constexpr exclude_t exclude{}; + + +/** + * @brief Alias for lists of observed components. + * @tparam Type List of types. + */ +template +struct get_t: type_list{}; + + +/** + * @brief Variable template for lists of observed components. + * @tparam Type List of types. + */ +template +inline constexpr get_t get{}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Group. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error, but for a few reasonable cases. + */ +template +class basic_group; + + +/** + * @brief Non-owning group. + * + * A non-owning group returns all entities and only the entities that have at + * least the given components. Moreover, it's guaranteed that the entity list + * is tightly packed in memory for fast iterations. + * + * @b Important + * + * Iterators aren't invalidated if: + * + * * New instances of the given components are created and assigned to entities. + * * The entity currently pointed is modified (as an example, if one of the + * given components is removed from the entity to which the iterator points). + * * The entity currently pointed is destroyed. + * + * In all other cases, modifying the pools iterated by the group in any way + * invalidates all the iterators and using them results in undefined behavior. + * + * @note + * Groups share references to the underlying data structures of the registry + * that generated them. Therefore any change to the entities and to the + * components made by means of the registry are immediately reflected by all the + * groups.
+ * Moreover, sorting a non-owning group affects all the instances of the same + * group (it means that users don't have to call `sort` on each instance to sort + * all of them because they _share_ entities and components). + * + * @warning + * Lifetime of a group must not overcome that of the registry that generated it. + * In any other case, attempting to use a group results in undefined behavior. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Exclude Types of components used to filter the group. + * @tparam Get Type of components observed by the group. + */ +template +class basic_group, get_t> { + /*! @brief A registry is allowed to create groups. */ + friend class basic_registry; + + template + using pool_type = pool_t; + + class group_proxy { + friend class basic_group, get_t>; + + class proxy_iterator { + friend class group_proxy; + + using it_type = typename sparse_set::iterator; + using ref_type = decltype(std::tuple_cat(std::declval, std::tuple<>, std::tuple *>>>()...)); + + proxy_iterator(it_type from, ref_type ref) ENTT_NOEXCEPT + : it{from}, + pools{ref} + {} + + public: + using difference_type = std::ptrdiff_t; + using value_type = decltype(std::tuple_cat( + std::declval>(), + std::declval, std::tuple<>, std::tuple>>()... + )); + using pointer = void; + using reference = decltype(std::tuple_cat( + std::declval>(), + std::declval, std::tuple<>, std::tuple>>()... + )); + using iterator_category = std::input_iterator_tag; + + proxy_iterator & operator++() ENTT_NOEXCEPT { + return ++it, *this; + } + + proxy_iterator operator++(int) ENTT_NOEXCEPT { + proxy_iterator orig = *this; + return ++(*this), orig; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return std::apply([entt = *it](auto *... cpool) { return reference{entt, cpool->get(entt)...}; }, pools); + } + + [[nodiscard]] bool operator==(const proxy_iterator &other) const ENTT_NOEXCEPT { + return other.it == it; + } + + [[nodiscard]] bool operator!=(const proxy_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + private: + it_type it{}; + ref_type pools{}; + }; + + group_proxy(const sparse_set &ref, std::tuple *...> gpools) + : handler{&ref}, + pools{gpools} + {} + + public: + using iterator = proxy_iterator; + + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return proxy_iterator{handler->begin(), std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool); + } + }(std::get *>(pools))...)}; + } + + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return proxy_iterator{handler->end(), std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool); + } + }(std::get *>(pools))...)}; + } + + private: + const sparse_set *handler; + std::tuple *...> pools; + }; + + basic_group(sparse_set &ref, pool_type &... gpool) ENTT_NOEXCEPT + : handler{&ref}, + pools{&gpool...} + {} + + template + void traverse(Func func, type_list) const { + for(const auto entt: *handler) { + if constexpr(std::is_invocable_v({}))...>) { + func(std::get *>(pools)->get(entt)...); + } else { + func(entt, std::get *>(pools)->get(entt)...); + } + } + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Random access iterator type. */ + using iterator = typename sparse_set::iterator; + /*! @brief Reversed iterator type. */ + using reverse_iterator = typename sparse_set::reverse_iterator; + + /** + * @brief Returns the number of existing components of the given type. + * @tparam Component Type of component of which to return the size. + * @return Number of existing components of the given type. + */ + template + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return std::get *>(pools)->size(); + } + + /** + * @brief Returns the number of entities that have the given components. + * @return Number of entities that have the given components. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return handler->size(); + } + + /** + * @brief Returns the number of elements that a group has currently + * allocated space for. + * @return Capacity of the group. + */ + [[nodiscard]] size_type capacity() const ENTT_NOEXCEPT { + return handler->capacity(); + } + + /*! @brief Requests the removal of unused capacity. */ + void shrink_to_fit() { + handler->shrink_to_fit(); + } + + /** + * @brief Checks whether a group or some pools are empty. + * @tparam Component Types of components in which one is interested. + * @return True if the group or the pools are empty, false otherwise. + */ + template + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + if constexpr(sizeof...(Component) == 0) { + return handler->empty(); + } else { + return (std::get *>(pools)->empty() && ...); + } + } + + /** + * @brief Direct access to the list of components of a given pool. + * + * The returned pointer is such that range + * `[raw(), raw() + size()]` is always a + * valid range, even if the container is empty. + * + * @note + * Components are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @tparam Component Type of component in which one is interested. + * @return A pointer to the array of components. + */ + template + [[nodiscard]] Component * raw() const ENTT_NOEXCEPT { + return std::get *>(pools)->raw(); + } + + /** + * @brief Direct access to the list of entities of a given pool. + * + * The returned pointer is such that range + * `[data(), data() + size()]` is always a + * valid range, even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @tparam Component Type of component in which one is interested. + * @return A pointer to the array of entities. + */ + template + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return std::get *>(pools)->data(); + } + + /** + * @brief Direct access to the list of entities. + * + * The returned pointer is such that range `[data(), data() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the array of entities. + */ + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return handler->data(); + } + + /** + * @brief Returns an iterator to the first entity of the group. + * + * The returned iterator points to the first entity of the group. If the + * group is empty, the returned iterator will be equal to `end()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the group. + */ + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return handler->begin(); + } + + /** + * @brief Returns an iterator that is past the last entity of the group. + * + * The returned iterator points to the entity following the last entity of + * the group. Attempting to dereference the returned iterator results in + * undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the + * group. + */ + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return handler->end(); + } + + /** + * @brief Returns an iterator to the first entity of the reversed group. + * + * The returned iterator points to the first entity of the reversed group. + * If the group is empty, the returned iterator will be equal to `rend()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the reversed group. + */ + [[nodiscard]] reverse_iterator rbegin() const ENTT_NOEXCEPT { + return handler->rbegin(); + } + + /** + * @brief Returns an iterator that is past the last entity of the reversed + * group. + * + * The returned iterator points to the entity following the last entity of + * the reversed group. Attempting to dereference the returned iterator + * results in undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the + * reversed group. + */ + [[nodiscard]] reverse_iterator rend() const ENTT_NOEXCEPT { + return handler->rend(); + } + + /** + * @brief Returns the first entity of the group, if any. + * @return The first entity of the group if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type front() const { + const auto it = begin(); + return it != end() ? *it : null; + } + + /** + * @brief Returns the last entity of the group, if any. + * @return The last entity of the group if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type back() const { + const auto it = rbegin(); + return it != rend() ? *it : null; + } + + /** + * @brief Finds an entity. + * @param entt A valid entity identifier. + * @return An iterator to the given entity if it's found, past the end + * iterator otherwise. + */ + [[nodiscard]] iterator find(const entity_type entt) const { + const auto it = handler->find(entt); + return it != end() && *it == entt ? it : end(); + } + + /** + * @brief Returns the identifier that occupies the given position. + * @param pos Position of the element to return. + * @return The identifier that occupies the given position. + */ + [[nodiscard]] entity_type operator[](const size_type pos) const { + return begin()[pos]; + } + + /** + * @brief Checks if a group contains an entity. + * @param entt A valid entity identifier. + * @return True if the group contains the given entity, false otherwise. + */ + [[nodiscard]] bool contains(const entity_type entt) const { + return handler->contains(entt); + } + + /** + * @brief Returns the components assigned to the given entity. + * + * Prefer this function instead of `registry::get` during iterations. It has + * far better performance than its counterpart. + * + * @warning + * Attempting to use an invalid component type results in a compilation + * error. Attempting to use an entity that doesn't belong to the group + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * group doesn't contain the given entity. + * + * @tparam Component Types of components to get. + * @param entt A valid entity identifier. + * @return The components assigned to the entity. + */ + template + [[nodiscard]] decltype(auto) get([[maybe_unused]] const entity_type entt) const { + ENTT_ASSERT(contains(entt)); + + if constexpr(sizeof...(Component) == 1) { + return (std::get *>(pools)->get(entt), ...); + } else { + return std::tuple({}))...>{get(entt)...}; + } + } + + /** + * @brief Iterates entities and components and applies the given function + * object to them. + * + * The function object is invoked for each entity. It is provided with the + * entity itself and a set of references to non-empty components. The + * _constness_ of the components is as requested.
+ * The signature of the function must be equivalent to one of the following + * forms: + * + * @code{.cpp} + * void(const entity_type, Type &...); + * void(Type &...); + * @endcode + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + using get_type_list = type_list_cat_t, type_list<>, type_list>...>; + traverse(std::move(func), get_type_list{}); + } + + /** + * @brief Returns an iterable object to use to _visit_ the group. + * + * The iterable object returns tuples that contain the current entity and a + * set of references to its non-empty components. The _constness_ of the + * components is as requested. + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @return An iterable object to use to _visit_ the group. + */ + [[nodiscard]] auto proxy() const ENTT_NOEXCEPT { + return group_proxy{*handler, pools}; + } + + /** + * @brief Sort a group according to the given comparison function. + * + * Sort the group so that iterating it with a couple of iterators returns + * entities and components in the expected order. See `begin` and `end` for + * more details. + * + * The comparison function object must return `true` if the first element + * is _less_ than the second one, `false` otherwise. The signature of the + * comparison function should be equivalent to one of the following: + * + * @code{.cpp} + * bool(std::tuple, std::tuple); + * bool(const Component &..., const Component &...); + * bool(const Entity, const Entity); + * @endcode + * + * Where `Component` are such that they are iterated by the group.
+ * Moreover, the comparison function object shall induce a + * _strict weak ordering_ on the values. + * + * The sort function oject must offer a member function template + * `operator()` that accepts three arguments: + * + * * An iterator to the first element of the range to sort. + * * An iterator past the last element of the range to sort. + * * A comparison function to use to compare the elements. + * + * @tparam Component Optional types of components to compare. + * @tparam Compare Type of comparison function object. + * @tparam Sort Type of sort function object. + * @tparam Args Types of arguments to forward to the sort function object. + * @param compare A valid comparison function object. + * @param algo A valid sort function object. + * @param args Arguments to forward to the sort function object, if any. + */ + template + void sort(Compare compare, Sort algo = Sort{}, Args &&... args) { + if constexpr(sizeof...(Component) == 0) { + static_assert(std::is_invocable_v, "Invalid comparison function"); + handler->sort(handler->begin(), handler->end(), std::move(compare), std::move(algo), std::forward(args)...); + } else if constexpr(sizeof...(Component) == 1) { + handler->sort(handler->begin(), handler->end(), [this, compare = std::move(compare)](const entity_type lhs, const entity_type rhs) { + return compare((std::get *>(pools)->get(lhs), ...), (std::get *>(pools)->get(rhs), ...)); + }, std::move(algo), std::forward(args)...); + } else { + handler->sort(handler->begin(), handler->end(), [this, compare = std::move(compare)](const entity_type lhs, const entity_type rhs) { + return compare(std::tuple({}))...>{std::get *>(pools)->get(lhs)...}, std::tuple({}))...>{std::get *>(pools)->get(rhs)...}); + }, std::move(algo), std::forward(args)...); + } + } + + /** + * @brief Sort the shared pool of entities according to the given component. + * + * Non-owning groups of the same type share with the registry a pool of + * entities with its own order that doesn't depend on the order of any pool + * of components. Users can order the underlying data structure so that it + * respects the order of the pool of the given component. + * + * @note + * The shared pool of entities and thus its order is affected by the changes + * to each and every pool that it tracks. Therefore changes to those pools + * can quickly ruin the order imposed to the pool of entities shared between + * the non-owning groups. + * + * @tparam Component Type of component to use to impose the order. + */ + template + void sort() const { + handler->respect(*std::get *>(pools)); + } + +private: + sparse_set *handler; + const std::tuple *...> pools; +}; + + +/** + * @brief Owning group. + * + * Owning groups return all entities and only the entities that have at least + * the given components. Moreover: + * + * * It's guaranteed that the entity list is tightly packed in memory for fast + * iterations. + * * It's guaranteed that the lists of owned components are tightly packed in + * memory for even faster iterations and to allow direct access. + * * They stay true to the order of the owned components and all instances have + * the same order in memory. + * + * The more types of components are owned by a group, the faster it is to + * iterate them. + * + * @b Important + * + * Iterators aren't invalidated if: + * + * * New instances of the given components are created and assigned to entities. + * * The entity currently pointed is modified (as an example, if one of the + * given components is removed from the entity to which the iterator points). + * * The entity currently pointed is destroyed. + * + * In all other cases, modifying the pools iterated by the group in any way + * invalidates all the iterators and using them results in undefined behavior. + * + * @note + * Groups share references to the underlying data structures of the registry + * that generated them. Therefore any change to the entities and to the + * components made by means of the registry are immediately reflected by all the + * groups. + * Moreover, sorting an owning group affects all the instance of the same group + * (it means that users don't have to call `sort` on each instance to sort all + * of them because they share the underlying data structure). + * + * @warning + * Lifetime of a group must not overcome that of the registry that generated it. + * In any other case, attempting to use a group results in undefined behavior. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Exclude Types of components used to filter the group. + * @tparam Get Types of components observed by the group. + * @tparam Owned Types of components owned by the group. + */ +template +class basic_group, get_t, Owned...> { + /*! @brief A registry is allowed to create groups. */ + friend class basic_registry; + + template + using pool_type = pool_t; + + template + using component_iterator = decltype(std::declval>().begin()); + + class group_proxy { + friend class basic_group, get_t, Owned...>; + + class proxy_iterator { + friend class group_proxy; + + using it_type = typename sparse_set::iterator; + using owned_type = decltype(std::tuple_cat(std::declval, std::tuple<>, std::tuple>>>()...)); + using get_type = decltype(std::tuple_cat(std::declval, std::tuple<>, std::tuple *>>>()...)); + + proxy_iterator(it_type from, owned_type oref, get_type gref) ENTT_NOEXCEPT + : it{from}, + owned{oref}, + get{gref} + {} + + public: + using difference_type = std::ptrdiff_t; + using value_type = decltype(std::tuple_cat( + std::declval>(), + std::declval, std::tuple<>, std::tuple>>()..., + std::declval, std::tuple<>, std::tuple>>()... + )); + using pointer = void; + using reference = decltype(std::tuple_cat( + std::declval>(), + std::declval, std::tuple<>, std::tuple>>()..., + std::declval, std::tuple<>, std::tuple>>()... + )); + using iterator_category = std::input_iterator_tag; + + proxy_iterator & operator++() ENTT_NOEXCEPT { + return ++it, std::apply([](auto &&... curr) { (++curr, ...); }, owned), *this; + } + + proxy_iterator operator++(int) ENTT_NOEXCEPT { + proxy_iterator orig = *this; + return ++(*this), orig; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return std::tuple_cat( + std::make_tuple(*it), + std::apply([](auto &&... curr) { return std::forward_as_tuple(*curr...); }, owned), + std::apply([entt = *it](auto &&... curr) { return std::forward_as_tuple(curr->get(entt)...); }, get) + ); + } + + [[nodiscard]] bool operator==(const proxy_iterator &other) const ENTT_NOEXCEPT { + return other.it == it; + } + + [[nodiscard]] bool operator!=(const proxy_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + private: + it_type it{}; + owned_type owned{}; + get_type get{}; + }; + + group_proxy(std::tuple *..., pool_type *...> cpools, const std::size_t &extent) + : pools{cpools}, + length{&extent} + {} + + public: + using iterator = proxy_iterator; + + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return proxy_iterator{ + std::get<0>(pools)->sparse_set::end() - *length, + std::tuple_cat([length = *length](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool->end() - length); + } + }(std::get *>(pools))...), + std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool); + } + }(std::get *>(pools))...) + }; + } + + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return proxy_iterator{ + std::get<0>(pools)->sparse_set::end(), + std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool->end()); + } + }(std::get *>(pools))...), + std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool); + } + }(std::get *>(pools))...) + }; + } + + private: + const std::tuple *..., pool_type *...> pools; + const std::size_t *length; + }; + + basic_group(const std::size_t &extent, pool_type &... opool, pool_type &... gpool) ENTT_NOEXCEPT + : pools{&opool..., &gpool...}, + length{&extent} + {} + + template + void traverse(Func func, type_list, type_list) const { + [[maybe_unused]] auto it = std::make_tuple((std::get *>(pools)->end() - *length)...); + [[maybe_unused]] auto data = std::get<0>(pools)->sparse_set::end() - *length; + + for(auto next = *length; next; --next) { + if constexpr(std::is_invocable_v({}))..., decltype(get({}))...>) { + if constexpr(sizeof...(Weak) == 0) { + func(*(std::get>(it)++)...); + } else { + const auto entt = *(data++); + func(*(std::get>(it)++)..., std::get *>(pools)->get(entt)...); + } + } else { + const auto entt = *(data++); + func(entt, *(std::get>(it)++)..., std::get *>(pools)->get(entt)...); + } + } + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Random access iterator type. */ + using iterator = typename sparse_set::iterator; + /*! @brief Reversed iterator type. */ + using reverse_iterator = typename sparse_set::reverse_iterator; + + /** + * @brief Returns the number of existing components of the given type. + * @tparam Component Type of component of which to return the size. + * @return Number of existing components of the given type. + */ + template + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return std::get *>(pools)->size(); + } + + /** + * @brief Returns the number of entities that have the given components. + * @return Number of entities that have the given components. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return *length; + } + + /** + * @brief Checks whether a group or some pools are empty. + * @tparam Component Types of components in which one is interested. + * @return True if the group or the pools are empty, false otherwise. + */ + template + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + if constexpr(sizeof...(Component) == 0) { + return !*length; + } else { + return (std::get *>(pools)->empty() && ...); + } + } + + /** + * @brief Direct access to the list of components of a given pool. + * + * The returned pointer is such that range + * `[raw(), raw() + size()]` is always a + * valid range, even if the container is empty.
+ * Moreover, in case the group owns the given component, the range + * `[raw(), raw() + size()]` is such that it contains + * the instances that are part of the group itself. + * + * @note + * Components are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @tparam Component Type of component in which one is interested. + * @return A pointer to the array of components. + */ + template + [[nodiscard]] Component * raw() const ENTT_NOEXCEPT { + return std::get *>(pools)->raw(); + } + + /** + * @brief Direct access to the list of entities of a given pool. + * + * The returned pointer is such that range + * `[data(), data() + size()]` is always a + * valid range, even if the container is empty.
+ * Moreover, in case the group owns the given component, the range + * `[data(), data() + size()]` is such that it + * contains the entities that are part of the group itself. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @tparam Component Type of component in which one is interested. + * @return A pointer to the array of entities. + */ + template + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return std::get *>(pools)->data(); + } + + /** + * @brief Direct access to the list of entities. + * + * The returned pointer is such that range `[data(), data() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the array of entities. + */ + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return std::get<0>(pools)->data(); + } + + /** + * @brief Returns an iterator to the first entity of the group. + * + * The returned iterator points to the first entity of the group. If the + * group is empty, the returned iterator will be equal to `end()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the group. + */ + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return std::get<0>(pools)->sparse_set::end() - *length; + } + + /** + * @brief Returns an iterator that is past the last entity of the group. + * + * The returned iterator points to the entity following the last entity of + * the group. Attempting to dereference the returned iterator results in + * undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the + * group. + */ + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return std::get<0>(pools)->sparse_set::end(); + } + + /** + * @brief Returns an iterator to the first entity of the reversed group. + * + * The returned iterator points to the first entity of the reversed group. + * If the group is empty, the returned iterator will be equal to `rend()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the reversed group. + */ + [[nodiscard]] reverse_iterator rbegin() const ENTT_NOEXCEPT { + return std::get<0>(pools)->sparse_set::rbegin(); + } + + /** + * @brief Returns an iterator that is past the last entity of the reversed + * group. + * + * The returned iterator points to the entity following the last entity of + * the reversed group. Attempting to dereference the returned iterator + * results in undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the + * reversed group. + */ + [[nodiscard]] reverse_iterator rend() const ENTT_NOEXCEPT { + return std::get<0>(pools)->sparse_set::rbegin() + *length; + } + + /** + * @brief Returns the first entity of the group, if any. + * @return The first entity of the group if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type front() const { + const auto it = begin(); + return it != end() ? *it : null; + } + + /** + * @brief Returns the last entity of the group, if any. + * @return The last entity of the group if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type back() const { + const auto it = rbegin(); + return it != rend() ? *it : null; + } + + /** + * @brief Finds an entity. + * @param entt A valid entity identifier. + * @return An iterator to the given entity if it's found, past the end + * iterator otherwise. + */ + [[nodiscard]] iterator find(const entity_type entt) const { + const auto it = std::get<0>(pools)->find(entt); + return it != end() && it >= begin() && *it == entt ? it : end(); + } + + /** + * @brief Returns the identifier that occupies the given position. + * @param pos Position of the element to return. + * @return The identifier that occupies the given position. + */ + [[nodiscard]] entity_type operator[](const size_type pos) const { + return begin()[pos]; + } + + /** + * @brief Checks if a group contains an entity. + * @param entt A valid entity identifier. + * @return True if the group contains the given entity, false otherwise. + */ + [[nodiscard]] bool contains(const entity_type entt) const { + return std::get<0>(pools)->contains(entt) && (std::get<0>(pools)->index(entt) < (*length)); + } + + /** + * @brief Returns the components assigned to the given entity. + * + * Prefer this function instead of `registry::get` during iterations. It has + * far better performance than its counterpart. + * + * @warning + * Attempting to use an invalid component type results in a compilation + * error. Attempting to use an entity that doesn't belong to the group + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * group doesn't contain the given entity. + * + * @tparam Component Types of components to get. + * @param entt A valid entity identifier. + * @return The components assigned to the entity. + */ + template + [[nodiscard]] decltype(auto) get([[maybe_unused]] const entity_type entt) const { + ENTT_ASSERT(contains(entt)); + + if constexpr(sizeof...(Component) == 1) { + return (std::get *>(pools)->get(entt), ...); + } else { + return std::tuple({}))...>{get(entt)...}; + } + } + + /** + * @brief Iterates entities and components and applies the given function + * object to them. + * + * The function object is invoked for each entity. It is provided with the + * entity itself and a set of references to non-empty components. The + * _constness_ of the components is as requested.
+ * The signature of the function must be equivalent to one of the following + * forms: + * + * @code{.cpp} + * void(const entity_type, Type &...); + * void(Type &...); + * @endcode + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + using owned_type_list = type_list_cat_t, type_list<>, type_list>...>; + using get_type_list = type_list_cat_t, type_list<>, type_list>...>; + traverse(std::move(func), owned_type_list{}, get_type_list{}); + } + + /** + * @brief Returns an iterable object to use to _visit_ the group. + * + * The iterable object returns tuples that contain the current entity and a + * set of references to its non-empty components. The _constness_ of the + * components is as requested. + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @return An iterable object to use to _visit_ the group. + */ + [[nodiscard]] auto proxy() const ENTT_NOEXCEPT { + return group_proxy{pools, *length}; + } + + /** + * @brief Sort a group according to the given comparison function. + * + * Sort the group so that iterating it with a couple of iterators returns + * entities and components in the expected order. See `begin` and `end` for + * more details. + * + * The comparison function object must return `true` if the first element + * is _less_ than the second one, `false` otherwise. The signature of the + * comparison function should be equivalent to one of the following: + * + * @code{.cpp} + * bool(std::tuple, std::tuple); + * bool(const Component &, const Component &); + * bool(const Entity, const Entity); + * @endcode + * + * Where `Component` are either owned types or not but still such that they + * are iterated by the group.
+ * Moreover, the comparison function object shall induce a + * _strict weak ordering_ on the values. + * + * The sort function oject must offer a member function template + * `operator()` that accepts three arguments: + * + * * An iterator to the first element of the range to sort. + * * An iterator past the last element of the range to sort. + * * A comparison function to use to compare the elements. + * + * @tparam Component Optional types of components to compare. + * @tparam Compare Type of comparison function object. + * @tparam Sort Type of sort function object. + * @tparam Args Types of arguments to forward to the sort function object. + * @param compare A valid comparison function object. + * @param algo A valid sort function object. + * @param args Arguments to forward to the sort function object, if any. + */ + template + void sort(Compare compare, Sort algo = Sort{}, Args &&... args) { + auto *cpool = std::get<0>(pools); + + if constexpr(sizeof...(Component) == 0) { + static_assert(std::is_invocable_v, "Invalid comparison function"); + cpool->sort(cpool->end()-*length, cpool->end(), std::move(compare), std::move(algo), std::forward(args)...); + } else if constexpr(sizeof...(Component) == 1) { + cpool->sort(cpool->end()-*length, cpool->end(), [this, compare = std::move(compare)](const entity_type lhs, const entity_type rhs) { + return compare((std::get *>(pools)->get(lhs), ...), (std::get *>(pools)->get(rhs), ...)); + }, std::move(algo), std::forward(args)...); + } else { + cpool->sort(cpool->end()-*length, cpool->end(), [this, compare = std::move(compare)](const entity_type lhs, const entity_type rhs) { + return compare(std::tuple({}))...>{std::get *>(pools)->get(lhs)...}, std::tuple({}))...>{std::get *>(pools)->get(rhs)...}); + }, std::move(algo), std::forward(args)...); + } + + [this](auto *head, auto *... other) { + for(auto next = *length; next; --next) { + const auto pos = next - 1; + [[maybe_unused]] const auto entt = head->data()[pos]; + (other->swap(other->data()[pos], entt), ...); + } + }(std::get *>(pools)...); + } + +private: + const std::tuple *..., pool_type *...> pools; + const size_type *length; +}; + + +} + + +#endif + +// #include "runtime_view.hpp" +#ifndef ENTT_ENTITY_RUNTIME_VIEW_HPP +#define ENTT_ENTITY_RUNTIME_VIEW_HPP + + +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "sparse_set.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Runtime view. + * + * Runtime views iterate over those entities that have at least all the given + * components in their bags. During initialization, a runtime view looks at the + * number of entities available for each component and picks up a reference to + * the smallest set of candidate entities in order to get a performance boost + * when iterate.
+ * Order of elements during iterations are highly dependent on the order of the + * underlying data structures. See sparse_set and its specializations for more + * details. + * + * @b Important + * + * Iterators aren't invalidated if: + * + * * New instances of the given components are created and assigned to entities. + * * The entity currently pointed is modified (as an example, if one of the + * given components is removed from the entity to which the iterator points). + * * The entity currently pointed is destroyed. + * + * In all the other cases, modifying the pools of the given components in any + * way invalidates all the iterators and using them results in undefined + * behavior. + * + * @note + * Views share references to the underlying data structures of the registry that + * generated them. Therefore any change to the entities and to the components + * made by means of the registry are immediately reflected by the views, unless + * a pool was missing when the view was built (in this case, the view won't + * have a valid reference and won't be updated accordingly). + * + * @warning + * Lifetime of a view must not overcome that of the registry that generated it. + * In any other case, attempting to use a view results in undefined behavior. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class basic_runtime_view { + /*! @brief A registry is allowed to create views. */ + friend class basic_registry; + + using underlying_iterator = typename sparse_set::iterator; + + class view_iterator final { + friend class basic_runtime_view; + + view_iterator(const std::vector *> &cpools, const std::vector *> &ignore, underlying_iterator curr) ENTT_NOEXCEPT + : pools{&cpools}, + filter{&ignore}, + it{curr} + { + if(it != (*pools)[0]->end() && !valid()) { + ++(*this); + } + } + + [[nodiscard]] bool valid() const { + return std::all_of(pools->begin()++, pools->end(), [entt = *it](const auto *curr) { return curr->contains(entt); }) + && std::none_of(filter->cbegin(), filter->cend(), [entt = *it](const auto *curr) { return curr && curr->contains(entt); }); + } + + public: + using difference_type = typename underlying_iterator::difference_type; + using value_type = typename underlying_iterator::value_type; + using pointer = typename underlying_iterator::pointer; + using reference = typename underlying_iterator::reference; + using iterator_category = std::bidirectional_iterator_tag; + + view_iterator() ENTT_NOEXCEPT = default; + + view_iterator & operator++() { + while(++it != (*pools)[0]->end() && !valid()); + return *this; + } + + view_iterator operator++(int) { + view_iterator orig = *this; + return ++(*this), orig; + } + + view_iterator & operator--() ENTT_NOEXCEPT { + while(--it != (*pools)[0]->begin() && !valid()); + return *this; + } + + view_iterator operator--(int) ENTT_NOEXCEPT { + view_iterator orig = *this; + return operator--(), orig; + } + + [[nodiscard]] bool operator==(const view_iterator &other) const ENTT_NOEXCEPT { + return other.it == it; + } + + [[nodiscard]] bool operator!=(const view_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + [[nodiscard]] pointer operator->() const { + return it.operator->(); + } + + [[nodiscard]] reference operator*() const { + return *operator->(); + } + + private: + const std::vector *> *pools; + const std::vector *> *filter; + underlying_iterator it; + }; + + basic_runtime_view(std::vector *> cpools, std::vector *> epools) ENTT_NOEXCEPT + : pools{std::move(cpools)}, + filter{std::move(epools)} + { + const auto it = std::min_element(pools.begin(), pools.end(), [](const auto *lhs, const auto *rhs) { + return (!lhs && rhs) || (lhs && rhs && lhs->size() < rhs->size()); + }); + + // brings the best candidate (if any) on front of the vector + std::rotate(pools.begin(), it, pools.end()); + } + + [[nodiscard]] bool valid() const { + return !pools.empty() && pools.front(); + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Bidirectional iterator type. */ + using iterator = view_iterator; + + /** + * @brief Estimates the number of entities that have the given components. + * @return Estimated number of entities that have the given components. + */ + [[nodiscard]] size_type size() const { + return valid() ? pools.front()->size() : size_type{}; + } + + /** + * @brief Checks if the view is definitely empty. + * @return True if the view is definitely empty, false otherwise. + */ + [[nodiscard]] bool empty() const { + return !valid() || pools.front()->empty(); + } + + /** + * @brief Returns an iterator to the first entity that has the given + * components. + * + * The returned iterator points to the first entity that has the given + * components. If the view is empty, the returned iterator will be equal to + * `end()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity that has the given components. + */ + [[nodiscard]] iterator begin() const { + return valid() ? iterator{pools, filter, pools[0]->begin()} : iterator{}; + } + + /** + * @brief Returns an iterator that is past the last entity that has the + * given components. + * + * The returned iterator points to the entity following the last entity that + * has the given components. Attempting to dereference the returned iterator + * results in undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity that has the + * given components. + */ + [[nodiscard]] iterator end() const { + return valid() ? iterator{pools, filter, pools[0]->end()} : iterator{}; + } + + /** + * @brief Checks if a view contains an entity. + * @param entt A valid entity identifier. + * @return True if the view contains the given entity, false otherwise. + */ + [[nodiscard]] bool contains(const entity_type entt) const { + return valid() && std::all_of(pools.cbegin(), pools.cend(), [entt](const auto *curr) { return curr->contains(entt); }) + && std::none_of(filter.cbegin(), filter.cend(), [entt](const auto *curr) { return curr && curr->contains(entt); }); + } + + /** + * @brief Iterates entities and applies the given function object to them. + * + * The function object is invoked for each entity. It is provided only with + * the entity itself. To get the components, users can use the registry with + * which the view was built.
+ * The signature of the function should be equivalent to the following: + * + * @code{.cpp} + * void(const entity_type); + * @endcode + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + for(const auto entity: *this) { + func(entity); + } + } + +private: + std::vector *> pools; + std::vector *> filter; +}; + + +} + + +#endif + +// #include "sparse_set.hpp" + +// #include "storage.hpp" + +// #include "utility.hpp" + +// #include "view.hpp" +#ifndef ENTT_ENTITY_VIEW_HPP +#define ENTT_ENTITY_VIEW_HPP + + +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/type_traits.hpp" + +// #include "entity.hpp" + +// #include "fwd.hpp" + +// #include "pool.hpp" + +// #include "sparse_set.hpp" + +// #include "utility.hpp" + + + +namespace entt { + + +/** + * @brief View. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error, but for a few reasonable cases. + */ +template +class basic_view; + + +/** + * @brief Multi component view. + * + * Multi component views iterate over those entities that have at least all the + * given components in their bags. During initialization, a multi component view + * looks at the number of entities available for each component and uses the + * smallest set in order to get a performance boost when iterate. + * + * @b Important + * + * Iterators aren't invalidated if: + * + * * New instances of the given components are created and assigned to entities. + * * The entity currently pointed is modified (as an example, if one of the + * given components is removed from the entity to which the iterator points). + * * The entity currently pointed is destroyed. + * + * In all other cases, modifying the pools iterated by the view in any way + * invalidates all the iterators and using them results in undefined behavior. + * + * @note + * Views share references to the underlying data structures of the registry that + * generated them. Therefore any change to the entities and to the components + * made by means of the registry are immediately reflected by views. + * + * @warning + * Lifetime of a view must not overcome that of the registry that generated it. + * In any other case, attempting to use a view results in undefined behavior. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Exclude Types of components used to filter the view. + * @tparam Component Types of components iterated by the view. + */ +template +class basic_view, Component...> { + /*! @brief A registry is allowed to create views. */ + friend class basic_registry; + + template + using pool_type = pool_t; + + template + using component_iterator = decltype(std::declval>().begin()); + + using unchecked_type = std::array *, (sizeof...(Component) - 1)>; + using filter_type = std::array *, sizeof...(Exclude)>; + + template + class view_iterator final { + friend class basic_view, Component...>; + + view_iterator(It from, It to, It curr, unchecked_type other, filter_type ignore) ENTT_NOEXCEPT + : first{from}, + last{to}, + it{curr}, + unchecked{other}, + filter{ignore} + { + if(it != last && !valid()) { + ++(*this); + } + } + + [[nodiscard]] bool valid() const { + return std::all_of(unchecked.cbegin(), unchecked.cend(), [entt = *it](const sparse_set *curr) { return curr->contains(entt); }) + && (sizeof...(Exclude) == 0 || std::none_of(filter.cbegin(), filter.cend(), [entt = *it](const sparse_set *cpool) { return cpool->contains(entt); })); + } + + public: + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = typename std::iterator_traits::pointer; + using reference = typename std::iterator_traits::reference; + using iterator_category = std::bidirectional_iterator_tag; + + view_iterator() ENTT_NOEXCEPT = default; + + view_iterator & operator++() { + while(++it != last && !valid()); + return *this; + } + + view_iterator operator++(int) { + view_iterator orig = *this; + return ++(*this), orig; + } + + view_iterator & operator--() ENTT_NOEXCEPT { + while(--it != first && !valid()); + return *this; + } + + view_iterator operator--(int) ENTT_NOEXCEPT { + view_iterator orig = *this; + return operator--(), orig; + } + + [[nodiscard]] bool operator==(const view_iterator &other) const ENTT_NOEXCEPT { + return other.it == it; + } + + [[nodiscard]] bool operator!=(const view_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + [[nodiscard]] pointer operator->() const { + return &*it; + } + + [[nodiscard]] reference operator*() const { + return *operator->(); + } + + private: + It first; + It last; + It it; + unchecked_type unchecked; + filter_type filter; + }; + + class view_proxy { + friend class basic_view, Component...>; + + using proxy_view_iterator = view_iterator::iterator>; + + class proxy_iterator { + friend class view_proxy; + + using ref_type = decltype(std::tuple_cat(std::declval, std::tuple<>, std::tuple *>>>()...)); + + proxy_iterator(proxy_view_iterator from, ref_type ref) ENTT_NOEXCEPT + : it{from}, + pools{ref} + {} + + public: + using difference_type = std::ptrdiff_t; + using value_type = decltype(std::tuple_cat( + std::declval>(), + std::declval, std::tuple<>, std::tuple>>()... + )); + using pointer = void; + using reference = decltype(std::tuple_cat( + std::declval>(), + std::declval, std::tuple<>, std::tuple>>()... + )); + using iterator_category = std::input_iterator_tag; + + proxy_iterator & operator++() ENTT_NOEXCEPT { + return ++it, *this; + } + + proxy_iterator operator++(int) ENTT_NOEXCEPT { + proxy_iterator orig = *this; + return ++(*this), orig; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return std::apply([entt = *it](auto *... cpool) { return reference{entt, cpool->get(entt)...}; }, pools); + } + + [[nodiscard]] bool operator==(const proxy_iterator &other) const ENTT_NOEXCEPT { + return other.it == it; + } + + [[nodiscard]] bool operator!=(const proxy_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + private: + proxy_view_iterator it{}; + const ref_type pools{}; + }; + + view_proxy(proxy_view_iterator from, proxy_view_iterator to, std::tuple *...> ref) + : first{from}, + last{to}, + pools{ref} + {} + + public: + using iterator = proxy_iterator; + + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return proxy_iterator{first, std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool); + } + }(std::get *>(pools))...)}; + } + + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return proxy_iterator{last, std::tuple_cat([](auto *cpool) { + if constexpr(is_eto_eligible_v::object_type>) { + return std::make_tuple(); + } else { + return std::make_tuple(cpool); + } + }(std::get *>(pools))...)}; + } + + private: + proxy_view_iterator first; + proxy_view_iterator last; + const std::tuple *...> pools; + }; + + basic_view(pool_type &... component, unpack_as_t, Exclude> &... epool) ENTT_NOEXCEPT + : pools{&component...}, + view{candidate()}, + filter{&epool...} + {} + + [[nodiscard]] const sparse_set * candidate() const ENTT_NOEXCEPT { + return (std::min)({ static_cast *>(std::get *>(pools))... }, [](const auto *lhs, const auto *rhs) { + return lhs->size() < rhs->size(); + }); + } + + [[nodiscard]] unchecked_type unchecked(const sparse_set *cpool) const { + std::size_t pos{}; + unchecked_type other{}; + ((std::get *>(pools) == cpool ? nullptr : (other[pos] = std::get *>(pools), other[pos++])), ...); + return other; + } + + template + [[nodiscard]] decltype(auto) get([[maybe_unused]] component_iterator &it, [[maybe_unused]] pool_type *cpool, [[maybe_unused]] const Entity entt) const { + if constexpr(std::is_same_v) { + return *it; + } else { + return cpool->get(entt); + } + } + + template + void traverse(Func func, type_list) const { + if constexpr(std::disjunction_v...>) { + auto it = std::get *>(pools)->begin(); + + for(const auto entt: static_cast &>(*std::get *>(pools))) { + if(((std::is_same_v || std::get *>(pools)->contains(entt)) && ...) + && (sizeof...(Exclude) == 0 || std::none_of(filter.cbegin(), filter.cend(), [entt](const sparse_set *cpool) { return cpool->contains(entt); }))) + { + if constexpr(std::is_invocable_v({}))...>) { + func(get(it, std::get *>(pools), entt)...); + } else { + func(entt, get(it, std::get *>(pools), entt)...); + } + } + + ++it; + } + } else { + for(const auto entt: static_cast &>(*std::get *>(pools))) { + if(((std::is_same_v || std::get *>(pools)->contains(entt)) && ...) + && (sizeof...(Exclude) == 0 || std::none_of(filter.cbegin(), filter.cend(), [entt](const sparse_set *cpool) { return cpool->contains(entt); }))) + { + if constexpr(std::is_invocable_v({}))...>) { + func(std::get *>(pools)->get(entt)...); + } else { + func(entt, std::get *>(pools)->get(entt)...); + } + } + } + } + } + + template + void iterate(Func func, type_list) const { + const auto last = view->data() + view->size(); + auto first = view->data(); + + while(first != last) { + if((std::get *>(pools)->contains(*first) && ...) + && (sizeof...(Exclude) == 0 || std::none_of(filter.cbegin(), filter.cend(), [entt = *first](const sparse_set *cpool) { return cpool->contains(entt); }))) + { + const auto base = *(first++); + const auto chunk = (std::min)({ (std::get *>(pools)->size() - std::get *>(pools)->index(base))... }); + size_type length{}; + + for(++length; + length < chunk + && ((*(std::get *>(pools)->data() + std::get *>(pools)->index(base) + length) == *first) && ...) + && (sizeof...(Exclude) == 0 || std::none_of(filter.cbegin(), filter.cend(), [entt = *first](const sparse_set *cpool) { return cpool->contains(entt); })); + ++length, ++first); + + func(view->data() + view->index(base), (std::get *>(pools)->raw() + std::get *>(pools)->index(base))..., length); + } else { + ++first; + } + } + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Bidirectional iterator type. */ + using iterator = view_iterator::iterator>; + /*! @brief Reverse iterator type. */ + using reverse_iterator = view_iterator::reverse_iterator>; + + /** + * @brief Returns the number of existing components of the given type. + * + * This isn't the number of entities iterated by the view. + * + * @tparam Comp Type of component of which to return the size. + * @return Number of existing components of the given type. + */ + template + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return std::get *>(pools)->size(); + } + + /** + * @brief Estimates the number of entities iterated by the view. + * @return Estimated number of entities iterated by the view. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return (std::min)({ std::get *>(pools)->size()... }); + } + + /** + * @brief Checks whether a view or some pools are empty. + * + * The view is definitely empty if one of the pools it uses is empty. In all + * other cases, the view may be empty and not return entities even if this + * function returns false. + * + * @tparam Comp Types of components in which one is interested. + * @return True if the view or the pools are empty, false otherwise. + */ + template + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + if constexpr(sizeof...(Comp) == 0) { + return (std::get *>(pools)->empty() || ...); + } else { + return (std::get *>(pools)->empty() && ...); + } + } + + /** + * @brief Direct access to the list of components of a given pool. + * + * The returned pointer is such that range + * `[raw(), raw() + size()]` is always a valid range, even + * if the container is empty. + * + * @note + * Components are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @tparam Comp Type of component in which one is interested. + * @return A pointer to the array of components. + */ + template + [[nodiscard]] Comp * raw() const ENTT_NOEXCEPT { + return std::get *>(pools)->raw(); + } + + /** + * @brief Direct access to the list of entities of a given pool. + * + * The returned pointer is such that range + * `[data(), data() + size()]` is always a valid range, + * even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @tparam Comp Type of component in which one is interested. + * @return A pointer to the array of entities. + */ + template + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return std::get *>(pools)->data(); + } + + /** + * @brief Returns an iterator to the first entity of the view. + * + * The returned iterator points to the first entity of the view. If the view + * is empty, the returned iterator will be equal to `end()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the view. + */ + [[nodiscard]] iterator begin() const { + return iterator{view->begin(), view->end(), view->begin(), unchecked(view), filter}; + } + + /** + * @brief Returns an iterator that is past the last entity of the view. + * + * The returned iterator points to the entity following the last entity of + * the view. Attempting to dereference the returned iterator results in + * undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the view. + */ + [[nodiscard]] iterator end() const { + return iterator{view->begin(), view->end(), view->end(), unchecked(view), filter}; + } + + /** + * @brief Returns an iterator to the first entity of the reversed view. + * + * The returned iterator points to the first entity of the reversed view. If + * the view is empty, the returned iterator will be equal to `rend()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the reversed view. + */ + [[nodiscard]] reverse_iterator rbegin() const { + return reverse_iterator{view->rbegin(), view->rend(), view->rbegin(), unchecked(view), filter}; + } + + /** + * @brief Returns an iterator that is past the last entity of the reversed + * view. + * + * The returned iterator points to the entity following the last entity of + * the reversed view. Attempting to dereference the returned iterator + * results in undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the + * reversed view. + */ + [[nodiscard]] reverse_iterator rend() const { + return reverse_iterator{view->rbegin(), view->rend(), view->rend(), unchecked(view), filter}; + } + + /** + * @brief Returns the first entity of the view, if any. + * @return The first entity of the view if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type front() const { + const auto it = begin(); + return it != end() ? *it : null; + } + + /** + * @brief Returns the last entity of the view, if any. + * @return The last entity of the view if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type back() const { + const auto it = rbegin(); + return it != rend() ? *it : null; + } + + /** + * @brief Finds an entity. + * @param entt A valid entity identifier. + * @return An iterator to the given entity if it's found, past the end + * iterator otherwise. + */ + [[nodiscard]] iterator find(const entity_type entt) const { + iterator it{view->begin(), view->end(), view->find(entt), unchecked(view), filter}; + return (it != end() && *it == entt) ? it : end(); + } + + /** + * @brief Checks if a view contains an entity. + * @param entt A valid entity identifier. + * @return True if the view contains the given entity, false otherwise. + */ + [[nodiscard]] bool contains(const entity_type entt) const { + return (std::get *>(pools)->contains(entt) && ...) + && (sizeof...(Exclude) == 0 || std::none_of(filter.begin(), filter.end(), [entt](const auto *cpool) { return cpool->contains(entt); })); + } + + /** + * @brief Returns the components assigned to the given entity. + * + * Prefer this function instead of `registry::get` during iterations. It has + * far better performance than its counterpart. + * + * @warning + * Attempting to use an invalid component type results in a compilation + * error. Attempting to use an entity that doesn't belong to the view + * results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * view doesn't contain the given entity. + * + * @tparam Comp Types of components to get. + * @param entt A valid entity identifier. + * @return The components assigned to the entity. + */ + template + [[nodiscard]] decltype(auto) get([[maybe_unused]] const entity_type entt) const { + ENTT_ASSERT(contains(entt)); + + if constexpr(sizeof...(Comp) == 0) { + static_assert(sizeof...(Component) == 1, "Invalid component type"); + return (std::get *>(pools)->get(entt), ...); + } else if constexpr(sizeof...(Comp) == 1) { + return (std::get *>(pools)->get(entt), ...); + } else { + return std::tuple({}))...>{get(entt)...}; + } + } + + /** + * @brief Iterates entities and components and applies the given function + * object to them. + * + * The function object is invoked for each entity. It is provided with the + * entity itself and a set of references to non-empty components. The + * _constness_ of the components is as requested.
+ * The signature of the function must be equivalent to one of the following + * forms: + * + * @code{.cpp} + * void(const entity_type, Type &...); + * void(Type &...); + * @endcode + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + view = candidate(); + ((std::get *>(pools) == view ? each(std::move(func)) : void()), ...); + } + + /** + * @brief Iterates entities and components and applies the given function + * object to them. + * + * The pool of the suggested component is used to lead the iterations. The + * returned entities will therefore respect the order of the pool associated + * with that type.
+ * It is no longer guaranteed that the performance is the best possible, but + * there will be greater control over the order of iteration. + * + * @sa each + * + * @tparam Comp Type of component to use to enforce the iteration order. + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + using non_empty_type = type_list_cat_t, type_list<>, type_list>...>; + traverse(std::move(func), non_empty_type{}); + } + + /** + * @brief Returns an iterable object to use to _visit_ the view. + * + * The iterable object returns tuples that contain the current entity and a + * set of references to its non-empty components. The _constness_ of the + * components is as requested. + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @return An iterable object to use to _visit_ the view. + */ + [[nodiscard]] auto proxy() const ENTT_NOEXCEPT { + view = candidate(); + return view_proxy{begin(), end(), pools}; + } + + /** + * @brief Returns an iterable object to use to _visit_ the view. + * + * The pool of the suggested component is used to lead the iterations. The + * returned elements will therefore respect the order of the pool associated + * with that type.
+ * It is no longer guaranteed that the performance is the best possible, but + * there will be greater control over the order of iteration. + * + * @sa each + * + * @tparam Comp Type of component to use to enforce the iteration order. + * @return An iterable object to use to _visit_ the view. + */ + template + [[nodiscard]] auto proxy() const ENTT_NOEXCEPT { + const sparse_set *cpool = std::get *>(pools); + iterator first{cpool->begin(), cpool->end(), cpool->begin(), unchecked(cpool), filter}; + iterator last{cpool->begin(), cpool->end(), cpool->end(), unchecked(cpool), filter}; + return view_proxy{std::move(first), std::move(last), pools}; + } + + /** + * @brief Chunked iteration for entities and components + * + * Chunked iteration tries to spot chunks in the sets of entities and + * components and return them one at a time along with their sizes.
+ * This type of iteration is intended where it's known a priori that the + * creation of entities and components takes place in chunk, which is + * actually quite common. In this case, various optimizations can be applied + * downstream to obtain even better performances from the views. + * + * The signature of the function must be equivalent to the following: + * + * @code{.cpp} + * void(const entity_type *, Type *..., size_type); + * @endcode + * + * The arguments are as follows: + * + * * A pointer to the entities belonging to the chunk. + * * Pointers to the components associated with the returned entities. + * * The length of the chunk. + * + * Note that the callback can be invoked 0 or more times and no guarantee is + * given on the order of the elements. + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void chunked(Func func) const { + using non_empty_type = type_list_cat_t, type_list<>, type_list>...>; + view = candidate(); + iterate(std::move(func), non_empty_type{}); + } + +private: + const std::tuple *...> pools; + mutable const sparse_set* view; + filter_type filter; +}; + + +/** + * @brief Single component view specialization. + * + * Single component views are specialized in order to get a boost in terms of + * performance. This kind of views can access the underlying data structure + * directly and avoid superfluous checks. + * + * @b Important + * + * Iterators aren't invalidated if: + * + * * New instances of the given component are created and assigned to entities. + * * The entity currently pointed is modified (as an example, the given + * component is removed from the entity to which the iterator points). + * * The entity currently pointed is destroyed. + * + * In all other cases, modifying the pool iterated by the view in any way + * invalidates all the iterators and using them results in undefined behavior. + * + * @note + * Views share a reference to the underlying data structure of the registry that + * generated them. Therefore any change to the entities and to the components + * made by means of the registry are immediately reflected by views. + * + * @warning + * Lifetime of a view must not overcome that of the registry that generated it. + * In any other case, attempting to use a view results in undefined behavior. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Component Type of component iterated by the view. + */ +template +class basic_view, Component> { + /*! @brief A registry is allowed to create views. */ + friend class basic_registry; + + using pool_type = pool_t; + + class view_proxy { + friend class basic_view, Component>; + + class proxy_iterator { + friend class view_proxy; + + using it_type = std::conditional_t< + is_eto_eligible_v, + std::tuple::iterator>, + std::tuple::iterator, decltype(std::declval().begin())> + >; + + proxy_iterator(it_type from) ENTT_NOEXCEPT + : it{from} + {} + + public: + using difference_type = std::ptrdiff_t; + using value_type = std::conditional_t, std::tuple, std::tuple>; + using pointer = void; + using reference = std::conditional_t, std::tuple, std::tuple>; + using iterator_category = std::input_iterator_tag; + + proxy_iterator & operator++() ENTT_NOEXCEPT { + return std::apply([](auto &&... curr) { (++curr, ...); }, it), *this; + } + + proxy_iterator operator++(int) ENTT_NOEXCEPT { + proxy_iterator orig = *this; + return ++(*this), orig; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return std::apply([](auto &&... curr) { return reference{*curr...}; }, it); + } + + [[nodiscard]] bool operator==(const proxy_iterator &other) const ENTT_NOEXCEPT { + return std::get<0>(other.it) == std::get<0>(it); + } + + [[nodiscard]] bool operator!=(const proxy_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + private: + it_type it{}; + }; + + view_proxy(pool_type &ref) + : pool{&ref} + {} + + public: + using iterator = proxy_iterator; + + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + if constexpr(is_eto_eligible_v) { + return proxy_iterator{std::make_tuple(pool->sparse_set::begin())}; + } else { + return proxy_iterator{std::make_tuple(pool->sparse_set::begin(), pool->begin())}; + } + } + + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + if constexpr(is_eto_eligible_v) { + return proxy_iterator{std::make_tuple(pool->sparse_set::end())}; + } else { + return proxy_iterator{std::make_tuple(pool->sparse_set::end(), pool->end())}; + } + } + + private: + pool_type *pool; + }; + + basic_view(pool_type &ref) ENTT_NOEXCEPT + : pool{&ref} + {} + +public: + /*! @brief Type of component iterated by the view. */ + using raw_type = Component; + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Random access iterator type. */ + using iterator = typename sparse_set::iterator; + /*! @brief Reversed iterator type. */ + using reverse_iterator = typename sparse_set::reverse_iterator; + + /** + * @brief Returns the number of entities that have the given component. + * @return Number of entities that have the given component. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return pool->size(); + } + + /** + * @brief Checks whether a view is empty. + * @return True if the view is empty, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return pool->empty(); + } + + /** + * @brief Direct access to the list of components. + * + * The returned pointer is such that range `[raw(), raw() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Components are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the array of components. + */ + [[nodiscard]] raw_type * raw() const ENTT_NOEXCEPT { + return pool->raw(); + } + + /** + * @brief Direct access to the list of entities. + * + * The returned pointer is such that range `[data(), data() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the array of entities. + */ + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return pool->data(); + } + + /** + * @brief Returns an iterator to the first entity of the view. + * + * The returned iterator points to the first entity of the view. If the view + * is empty, the returned iterator will be equal to `end()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the view. + */ + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return pool->sparse_set::begin(); + } + + /** + * @brief Returns an iterator that is past the last entity of the view. + * + * The returned iterator points to the entity following the last entity of + * the view. Attempting to dereference the returned iterator results in + * undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the view. + */ + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return pool->sparse_set::end(); + } + + /** + * @brief Returns an iterator to the first entity of the reversed view. + * + * The returned iterator points to the first entity of the reversed view. If + * the view is empty, the returned iterator will be equal to `rend()`. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the first entity of the reversed view. + */ + [[nodiscard]] reverse_iterator rbegin() const ENTT_NOEXCEPT { + return pool->sparse_set::rbegin(); + } + + /** + * @brief Returns an iterator that is past the last entity of the reversed + * view. + * + * The returned iterator points to the entity following the last entity of + * the reversed view. Attempting to dereference the returned iterator + * results in undefined behavior. + * + * @note + * Iterators stay true to the order imposed to the underlying data + * structures. + * + * @return An iterator to the entity following the last entity of the + * reversed view. + */ + [[nodiscard]] reverse_iterator rend() const ENTT_NOEXCEPT { + return pool->sparse_set::rend(); + } + + /** + * @brief Returns the first entity of the view, if any. + * @return The first entity of the view if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type front() const { + const auto it = begin(); + return it != end() ? *it : null; + } + + /** + * @brief Returns the last entity of the view, if any. + * @return The last entity of the view if one exists, the null entity + * otherwise. + */ + [[nodiscard]] entity_type back() const { + const auto it = rbegin(); + return it != rend() ? *it : null; + } + + /** + * @brief Finds an entity. + * @param entt A valid entity identifier. + * @return An iterator to the given entity if it's found, past the end + * iterator otherwise. + */ + [[nodiscard]] iterator find(const entity_type entt) const { + const auto it = pool->find(entt); + return it != end() && *it == entt ? it : end(); + } + + /** + * @brief Returns the identifier that occupies the given position. + * @param pos Position of the element to return. + * @return The identifier that occupies the given position. + */ + [[nodiscard]] entity_type operator[](const size_type pos) const { + return begin()[pos]; + } + + /** + * @brief Checks if a view contains an entity. + * @param entt A valid entity identifier. + * @return True if the view contains the given entity, false otherwise. + */ + [[nodiscard]] bool contains(const entity_type entt) const { + return pool->contains(entt); + } + + /** + * @brief Returns the component assigned to the given entity. + * + * Prefer this function instead of `registry::get` during iterations. It has + * far better performance than its counterpart. + * + * @warning + * Attempting to use an entity that doesn't belong to the view results in + * undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * view doesn't contain the given entity. + * + * @param entt A valid entity identifier. + * @return The component assigned to the entity. + */ + template + [[nodiscard]] decltype(auto) get(const entity_type entt) const { + static_assert(std::is_same_v, "Invalid component type"); + ENTT_ASSERT(contains(entt)); + return pool->get(entt); + } + + /** + * @brief Iterates entities and components and applies the given function + * object to them. + * + * The function object is invoked for each entity. It is provided with the + * entity itself and a reference to the component if it's a non-empty one. + * The _constness_ of the component is as requested.
+ * The signature of the function must be equivalent to one of the following + * forms: + * + * @code{.cpp} + * void(const entity_type, Component &); + * void(Component &); + * @endcode + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + if constexpr(is_eto_eligible_v) { + if constexpr(std::is_invocable_v) { + for(auto pos = pool->size(); pos; --pos) { + func(); + } + } else { + for(const auto entt: *this) { + func(entt); + } + } + } else { + if constexpr(std::is_invocable_v) { + for(auto &&component: *pool) { + func(component); + } + } else { + auto raw = pool->begin(); + + for(const auto entt: *this) { + func(entt, *(raw++)); + } + } + } + } + + /** + * @brief Returns an iterable object to use to _visit_ the view. + * + * The iterable object returns tuples that contain the current entity and a + * reference to its component if it's a non-empty one. The _constness_ of + * the component is as requested. + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned during iterations. + * + * @return An iterable object to use to _visit_ the view. + */ + [[nodiscard]] auto proxy() const ENTT_NOEXCEPT { + return view_proxy{*pool}; + } + +private: + pool_type *pool; +}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Fast and reliable entity-component system. + * + * The registry is the core class of the entity-component framework.
+ * It stores entities and arranges pools of components on a per request basis. + * By means of a registry, users can manage entities and components, then create + * views or groups to iterate them. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class basic_registry { + using traits_type = entt_traits; + + template + struct pool_handler final: storage { + static_assert(std::is_same_v>, "Invalid component type"); + + [[nodiscard]] auto on_construct() ENTT_NOEXCEPT { + return sink{construction}; + } + + [[nodiscard]] auto on_update() ENTT_NOEXCEPT { + return sink{update}; + } + + [[nodiscard]] auto on_destroy() ENTT_NOEXCEPT { + return sink{destruction}; + } + + template + decltype(auto) emplace(basic_registry &owner, const Entity entt, Args &&... args) { + storage::emplace(entt, std::forward(args)...); + construction.publish(owner, entt); + + if constexpr(!is_eto_eligible_v) { + return this->get(entt); + } + } + + template + void insert(basic_registry &owner, It first, It last, Args &&... args) { + storage::insert(first, last, std::forward(args)...); + + if(!construction.empty()) { + while(first != last) { construction.publish(owner, *(first++)); } + } + } + + void remove(basic_registry &owner, const Entity entt) { + destruction.publish(owner, entt); + this->erase(entt); + } + + template + void remove(basic_registry &owner, It first, It last) { + if(std::distance(first, last) == std::distance(this->begin(), this->end())) { + if(!destruction.empty()) { + while(first != last) { destruction.publish(owner, *(first++)); } + } + + this->clear(); + } else { + while(first != last) { this->remove(owner, *(first++)); } + } + } + + template + decltype(auto) patch(basic_registry &owner, const Entity entt, [[maybe_unused]] Func &&... func) { + if constexpr(is_eto_eligible_v) { + update.publish(owner, entt); + } else { + (std::forward(func)(this->get(entt)), ...); + update.publish(owner, entt); + return this->get(entt); + } + } + + decltype(auto) replace(basic_registry &owner, const Entity entt, Component component) { + return patch(owner, entt, [&component](auto &&curr) { curr = std::move(component); }); + } + + private: + sigh construction{}; + sigh destruction{}; + sigh update{}; + }; + + struct pool_data { + id_type type_id{}; + std::unique_ptr> pool{}; + void(* remove)(sparse_set &, basic_registry &, const Entity){}; + }; + + template + struct group_handler; + + template + struct group_handler, get_t, Owned...> { + static_assert(std::conjunction_v>..., std::is_same>..., std::is_same>...>, "One or more component types are invalid"); + std::conditional_t, std::size_t> current{}; + + template + void maybe_valid_if(basic_registry &owner, const Entity entt) { + [[maybe_unused]] const auto cpools = std::forward_as_tuple(owner.assure()...); + + const auto is_valid = ((std::is_same_v || std::get &>(cpools).contains(entt)) && ...) + && ((std::is_same_v || owner.assure().contains(entt)) && ...) + && ((std::is_same_v || !owner.assure().contains(entt)) && ...); + + if constexpr(sizeof...(Owned) == 0) { + if(is_valid && !current.contains(entt)) { + current.emplace(entt); + } + } else { + if(is_valid && !(std::get<0>(cpools).index(entt) < current)) { + const auto pos = current++; + (std::get &>(cpools).swap(std::get &>(cpools).data()[pos], entt), ...); + } + } + } + + void discard_if([[maybe_unused]] basic_registry &owner, const Entity entt) { + if constexpr(sizeof...(Owned) == 0) { + if(current.contains(entt)) { + current.erase(entt); + } + } else { + if(const auto cpools = std::forward_as_tuple(owner.assure()...); std::get<0>(cpools).contains(entt) && (std::get<0>(cpools).index(entt) < current)) { + const auto pos = --current; + (std::get &>(cpools).swap(std::get &>(cpools).data()[pos], entt), ...); + } + } + } + }; + + struct group_data { + std::size_t size; + std::unique_ptr group; + bool (* owned)(const id_type) ENTT_NOEXCEPT; + bool (* get)(const id_type) ENTT_NOEXCEPT; + bool (* exclude)(const id_type) ENTT_NOEXCEPT; + }; + + struct variable_data { + id_type type_id; + std::unique_ptr value; + }; + + template + [[nodiscard]] const pool_handler & assure() const { + const sparse_set *cpool; + + if constexpr(ENTT_FAST_PATH(has_type_index_v)) { + const auto index = type_index::value(); + + if(!(index < pools.size())) { + pools.resize(size_type(index+1u)); + } + + if(auto &&pdata = pools[index]; !pdata.pool) { + pdata.type_id = type_info::id(); + pdata.pool.reset(new pool_handler()); + pdata.remove = [](sparse_set &target, basic_registry &owner, const entity_type entt) { + static_cast &>(target).remove(owner, entt); + }; + } + + cpool = pools[index].pool.get(); + } else { + if(const auto it = std::find_if(pools.cbegin(), pools.cend(), [id = type_info::id()](const auto &pdata) { return id == pdata.type_id; }); it == pools.cend()) { + cpool = pools.emplace_back(pool_data{ + type_info::id(), + std::unique_ptr>{new pool_handler()}, + [](sparse_set &target, basic_registry &owner, const entity_type entt) { + static_cast &>(target).remove(owner, entt); + } + }).pool.get(); + } else { + cpool = it->pool.get(); + } + } + + return *static_cast *>(cpool); + } + + template + [[nodiscard]] pool_handler & assure() { + return const_cast &>(std::as_const(*this).template assure()); + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Underlying version type. */ + using version_type = typename traits_type::version_type; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + + /*! @brief Default constructor. */ + basic_registry() = default; + + /*! @brief Default move constructor. */ + basic_registry(basic_registry &&) = default; + + /*! @brief Default move assignment operator. @return This registry. */ + basic_registry & operator=(basic_registry &&) = default; + + /** + * @brief Prepares a pool for the given type if required. + * @tparam Component Type of component for which to prepare a pool. + */ + template + void prepare() { + // suppress the warning due to the [[nodiscard]] attribute + static_cast(assure()); + } + + /** + * @brief Returns the number of existing components of the given type. + * @tparam Component Type of component of which to return the size. + * @return Number of existing components of the given type. + */ + template + [[nodiscard]] size_type size() const { + return assure().size(); + } + + /** + * @brief Returns the number of entities created so far. + * @return Number of entities created so far. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return entities.size(); + } + + /** + * @brief Returns the number of entities still in use. + * @return Number of entities still in use. + */ + [[nodiscard]] size_type alive() const { + auto sz = entities.size(); + auto curr = destroyed; + + for(; curr != null; --sz) { + curr = entities[to_integral(curr) & traits_type::entity_mask]; + } + + return sz; + } + + /** + * @brief Increases the capacity of the registry or of the pools for the + * given components. + * + * If no components are specified, the capacity of the registry is + * increased, that is the number of entities it contains. Otherwise the + * capacity of the pools for the given components is increased.
+ * In both cases, if the new capacity is greater than the current capacity, + * new storage is allocated, otherwise the method does nothing. + * + * @tparam Component Types of components for which to reserve storage. + * @param cap Desired capacity. + */ + template + void reserve(const size_type cap) { + if constexpr(sizeof...(Component) == 0) { + entities.reserve(cap); + } else { + (assure().reserve(cap), ...); + } + } + + /** + * @brief Returns the capacity of the pool for the given component. + * @tparam Component Type of component in which one is interested. + * @return Capacity of the pool of the given component. + */ + template + [[nodiscard]] size_type capacity() const { + return assure().capacity(); + } + + /** + * @brief Returns the number of entities that a registry has currently + * allocated space for. + * @return Capacity of the registry. + */ + [[nodiscard]] size_type capacity() const ENTT_NOEXCEPT { + return entities.capacity(); + } + + /** + * @brief Requests the removal of unused capacity for the given components. + * @tparam Component Types of components for which to reclaim unused + * capacity. + */ + template + void shrink_to_fit() { + (assure().shrink_to_fit(), ...); + } + + /** + * @brief Checks whether the registry or the pools of the given components + * are empty. + * + * A registry is considered empty when it doesn't contain entities that are + * still in use. + * + * @tparam Component Types of components in which one is interested. + * @return True if the registry or the pools of the given components are + * empty, false otherwise. + */ + template + [[nodiscard]] bool empty() const { + if constexpr(sizeof...(Component) == 0) { + return !alive(); + } else { + return (assure().empty() && ...); + } + } + + /** + * @brief Direct access to the list of components of a given pool. + * + * The returned pointer is such that range + * `[raw(), raw() + size()]` is always a + * valid range, even if the container is empty. + * + * Components are in the reverse order as imposed by the sorting + * functionalities. + * + * @note + * Empty components aren't explicitly instantiated. Therefore, this function + * isn't available for them. A compilation error will occur if invoked. + * + * @tparam Component Type of component in which one is interested. + * @return A pointer to the array of components of the given type. + */ + template + [[nodiscard]] const Component * raw() const { + return assure().raw(); + } + + /*! @copydoc raw */ + template + [[nodiscard]] Component * raw() { + return const_cast(std::as_const(*this).template raw()); + } + + /** + * @brief Direct access to the list of entities of a given pool. + * + * The returned pointer is such that range + * `[data(), data() + size()]` is always a + * valid range, even if the container is empty. + * + * Entities are in the reverse order as imposed by the sorting + * functionalities. + * + * @tparam Component Type of component in which one is interested. + * @return A pointer to the array of entities. + */ + template + [[nodiscard]] const entity_type * data() const { + return assure().data(); + } + + /** + * @brief Direct access to the list of entities of a registry. + * + * The returned pointer is such that range `[data(), data() + size()]` is + * always a valid range, even if the container is empty. + * + * @warning + * This list contains both valid and destroyed entities and isn't suitable + * for direct use. + * + * @return A pointer to the array of entities. + */ + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return entities.data(); + } + + /** + * @brief Checks if an entity identifier refers to a valid entity. + * @param entity An entity identifier, either valid or not. + * @return True if the identifier is valid, false otherwise. + */ + [[nodiscard]] bool valid(const entity_type entity) const { + const auto pos = size_type(to_integral(entity) & traits_type::entity_mask); + return (pos < entities.size() && entities[pos] == entity); + } + + /** + * @brief Returns the entity identifier without the version. + * @param entity An entity identifier, either valid or not. + * @return The entity identifier without the version. + */ + [[nodiscard]] static entity_type entity(const entity_type entity) ENTT_NOEXCEPT { + return entity_type{to_integral(entity) & traits_type::entity_mask}; + } + + /** + * @brief Returns the version stored along with an entity identifier. + * @param entity An entity identifier, either valid or not. + * @return The version stored along with the given entity identifier. + */ + [[nodiscard]] static version_type version(const entity_type entity) ENTT_NOEXCEPT { + return version_type(to_integral(entity) >> traits_type::entity_shift); + } + + /** + * @brief Returns the actual version for an entity identifier. + * + * @warning + * Attempting to use an entity that doesn't belong to the registry results + * in undefined behavior. An entity belongs to the registry even if it has + * been previously destroyed and/or recycled.
+ * An assertion will abort the execution at runtime in debug mode if the + * registry doesn't own the given entity. + * + * @param entity A valid entity identifier. + * @return Actual version for the given entity identifier. + */ + [[nodiscard]] version_type current(const entity_type entity) const { + const auto pos = size_type(to_integral(entity) & traits_type::entity_mask); + ENTT_ASSERT(pos < entities.size()); + return version_type(to_integral(entities[pos]) >> traits_type::entity_shift); + } + + /** + * @brief Creates a new entity and returns it. + * + * There are two kinds of possible entity identifiers: + * + * * Newly created ones in case no entities have been previously destroyed. + * * Recycled ones with updated versions. + * + * @return A valid entity identifier. + */ + entity_type create() { + entity_type entt; + + if(destroyed == null) { + entt = entities.emplace_back(entity_type{static_cast(entities.size())}); + // traits_type::entity_mask is reserved to allow for null identifiers + ENTT_ASSERT(to_integral(entt) < traits_type::entity_mask); + } else { + const auto curr = to_integral(destroyed); + const auto version = to_integral(entities[curr]) & (traits_type::version_mask << traits_type::entity_shift); + destroyed = entity_type{to_integral(entities[curr]) & traits_type::entity_mask}; + entt = entities[curr] = entity_type{curr | version}; + } + + return entt; + } + + /** + * @brief Creates a new entity and returns it. + * + * @sa create + * + * If the requested entity isn't in use, the suggested identifier is created + * and returned. Otherwise, a new one will be generated for this purpose. + * + * @param hint A desired entity identifier. + * @return A valid entity identifier. + */ + [[nodiscard]] entity_type create(const entity_type hint) { + ENTT_ASSERT(hint != null); + entity_type entt; + + if(const auto req = (to_integral(hint) & traits_type::entity_mask); !(req < entities.size())) { + entities.reserve(req + 1); + + for(auto pos = entities.size(); pos < req; ++pos) { + entities.emplace_back(destroyed); + destroyed = entity_type{static_cast(pos)}; + } + + entt = entities.emplace_back(hint); + } else if(const auto curr = (to_integral(entities[req]) & traits_type::entity_mask); req == curr) { + entt = create(); + } else { + auto *it = &destroyed; + for(; (to_integral(*it) & traits_type::entity_mask) != req; it = &entities[to_integral(*it) & traits_type::entity_mask]); + *it = entity_type{curr | (to_integral(*it) & (traits_type::version_mask << traits_type::entity_shift))}; + entt = entities[req] = hint; + } + + return entt; + } + + /** + * @brief Assigns each element in a range an entity. + * + * @sa create + * + * @tparam It Type of forward iterator. + * @param first An iterator to the first element of the range to generate. + * @param last An iterator past the last element of the range to generate. + */ + template + void create(It first, It last) { + std::generate(first, last, [this]() { return create(); }); + } + + /** + * @brief Assigns entities to an empty registry. + * + * This function is intended for use in conjunction with `raw`.
+ * Don't try to inject ranges of randomly generated entities. There is no + * guarantee that a registry will continue to work properly in this case. + * + * @warning + * An assertion will abort the execution at runtime in debug mode if all + * pools aren't empty. + * + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + */ + template + void assign(It first, It last) { + ENTT_ASSERT(std::all_of(pools.cbegin(), pools.cend(), [](auto &&pdata) { return !pdata.pool || pdata.pool->empty(); })); + entities.assign(first, last); + destroyed = null; + + for(std::size_t pos{}, end = entities.size(); pos < end; ++pos) { + if((to_integral(entities[pos]) & traits_type::entity_mask) != pos) { + const auto version = to_integral(entities[pos]) & (traits_type::version_mask << traits_type::entity_shift); + entities[pos] = entity_type{to_integral(destroyed) | version}; + destroyed = entity_type{static_cast(pos)}; + } + } + } + + /** + * @brief Destroys an entity. + * + * When an entity is destroyed, its version is updated and the identifier + * can be recycled at any time. + * + * @sa remove_all + * + * @param entity A valid entity identifier. + */ + void destroy(const entity_type entity) { + destroy(entity, version_type((to_integral(entity) >> traits_type::entity_shift) + 1)); + } + + /** + * @brief Destroys an entity. + * + * If the entity isn't already destroyed, the suggested version is used + * instead of the implicitly generated one. + * + * @sa remove_all + * + * @param entity A valid entity identifier. + * @param version A desired version upon destruction. + */ + void destroy(const entity_type entity, const version_type version) { + remove_all(entity); + // lengthens the implicit list of destroyed entities + const auto entt = to_integral(entity) & traits_type::entity_mask; + entities[entt] = entity_type{to_integral(destroyed) | (typename traits_type::entity_type{version} << traits_type::entity_shift)}; + destroyed = entity_type{entt}; + } + + /** + * @brief Destroys all the entities in a range. + * + * @sa destroy + * + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + */ + template + void destroy(It first, It last) { + while(first != last) { destroy(*(first++)); } + } + + /** + * @brief Assigns the given component to an entity. + * + * A new instance of the given component is created and initialized with the + * arguments provided (the component must have a proper constructor or be of + * aggregate type). Then the component is assigned to the given entity. + * + * @warning + * Attempting to use an invalid entity or to assign a component to an entity + * that already owns it results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity or if the entity already owns an instance of the given + * component. + * + * @tparam Component Type of component to create. + * @tparam Args Types of arguments to use to construct the component. + * @param entity A valid entity identifier. + * @param args Parameters to use to initialize the component. + * @return A reference to the newly created component. + */ + template + decltype(auto) emplace(const entity_type entity, Args &&... args) { + ENTT_ASSERT(valid(entity)); + return assure().emplace(*this, entity, std::forward(args)...); + } + + /** + * @brief Assigns each entity in a range the given component. + * + * @sa emplace + * + * @tparam Component Type of component to create. + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + * @param value An instance of the component to assign. + */ + template + void insert(It first, It last, const Component &value = {}) { + ENTT_ASSERT(std::all_of(first, last, [this](const auto entity) { return valid(entity); })); + assure().insert(*this, first, last, value); + } + + /** + * @brief Assigns each entity in a range the given components. + * + * @sa emplace + * + * @tparam Component Type of component to create. + * @tparam EIt Type of input iterator. + * @tparam CIt Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + * @param from An iterator to the first element of the range of components. + * @param to An iterator past the last element of the range of components. + */ + template + void insert(EIt first, EIt last, CIt from, CIt to) { + static_assert(std::is_constructible_v::value_type>, "Invalid value type"); + ENTT_ASSERT(std::all_of(first, last, [this](const auto entity) { return valid(entity); })); + assure().insert(*this, first, last, from, to); + } + + /** + * @brief Assigns or replaces the given component for an entity. + * + * Equivalent to the following snippet (pseudocode): + * + * @code{.cpp} + * auto &component = registry.has(entity) ? registry.replace(entity, args...) : registry.emplace(entity, args...); + * @endcode + * + * Prefer this function anyway because it has slightly better performance. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @tparam Component Type of component to assign or replace. + * @tparam Args Types of arguments to use to construct the component. + * @param entity A valid entity identifier. + * @param args Parameters to use to initialize the component. + * @return A reference to the newly created component. + */ + template + decltype(auto) emplace_or_replace(const entity_type entity, Args &&... args) { + ENTT_ASSERT(valid(entity)); + auto &cpool = assure(); + + return cpool.contains(entity) + ? cpool.replace(*this, entity, Component{std::forward(args)...}) + : cpool.emplace(*this, entity, std::forward(args)...); + } + + /** + * @brief Patches the given component for an entity. + * + * The signature of the functions should be equivalent to the following: + * + * @code{.cpp} + * void(Component &); + * @endcode + * + * @note + * Empty types aren't explicitly instantiated and therefore they are never + * returned. However, this function can be used to trigger an update signal + * for them. + * + * @warning + * Attempting to use an invalid entity or to patch a component of an entity + * that doesn't own it results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity or if the entity doesn't own an instance of the given + * component. + * + * @tparam Component Type of component to patch. + * @tparam Func Types of the function objects to invoke. + * @param entity A valid entity identifier. + * @param func Valid function objects. + * @return A reference to the patched component. + */ + template + decltype(auto) patch(const entity_type entity, Func &&... func) { + ENTT_ASSERT(valid(entity)); + return assure().patch(*this, entity, std::forward(func)...); + } + + /** + * @brief Replaces the given component for an entity. + * + * A new instance of the given component is created and initialized with the + * arguments provided (the component must have a proper constructor or be of + * aggregate type). Then the component is assigned to the given entity. + * + * @warning + * Attempting to use an invalid entity or to replace a component of an + * entity that doesn't own it results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity or if the entity doesn't own an instance of the given + * component. + * + * @tparam Component Type of component to replace. + * @tparam Args Types of arguments to use to construct the component. + * @param entity A valid entity identifier. + * @param args Parameters to use to initialize the component. + * @return A reference to the component being replaced. + */ + template + decltype(auto) replace(const entity_type entity, Args &&... args) { + return assure().replace(*this, entity, Component{std::forward(args)...}); + } + + /** + * @brief Removes the given components from an entity. + * + * @warning + * Attempting to use an invalid entity or to remove a component from an + * entity that doesn't own it results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity or if the entity doesn't own an instance of the given + * component. + * + * @tparam Component Types of components to remove. + * @param entity A valid entity identifier. + */ + template + void remove(const entity_type entity) { + ENTT_ASSERT(valid(entity)); + (assure().remove(*this, entity), ...); + } + + /** + * @brief Removes the given components from all the entities in a range. + * + * @see remove + * + * @tparam Component Types of components to remove. + * @tparam It Type of input iterator. + * @param first An iterator to the first element of the range of entities. + * @param last An iterator past the last element of the range of entities. + */ + template + void remove(It first, It last) { + ENTT_ASSERT(std::all_of(first, last, [this](const auto entity) { return valid(entity); })); + (assure().remove(*this, first, last), ...); + } + + /** + * @brief Removes the given components from an entity. + * + * Equivalent to the following snippet (pseudocode): + * + * @code{.cpp} + * if(registry.has(entity)) { registry.remove(entity) } + * @endcode + * + * Prefer this function anyway because it has slightly better performance. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @tparam Component Types of components to remove. + * @param entity A valid entity identifier. + * @return The number of components actually removed. + */ + template + size_type remove_if_exists(const entity_type entity) { + ENTT_ASSERT(valid(entity)); + + return ([this, entity](auto &&cpool) { + return cpool.contains(entity) ? (cpool.remove(*this, entity), true) : false; + }(assure()) + ... + size_type{}); + } + + /** + * @brief Removes all the components from an entity and makes it orphaned. + * + * @warning + * In case there are listeners that observe the destruction of components + * and assign other components to the entity in their bodies, the result of + * invoking this function may not be as expected. In the worst case, it + * could lead to undefined behavior. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @param entity A valid entity identifier. + */ + void remove_all(const entity_type entity) { + ENTT_ASSERT(valid(entity)); + + for(auto pos = pools.size(); pos; --pos) { + if(auto &pdata = pools[pos-1]; pdata.pool && pdata.pool->contains(entity)) { + pdata.remove(*pdata.pool, *this, entity); + } + } + } + + /** + * @brief Checks if an entity has all the given components. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @tparam Component Components for which to perform the check. + * @param entity A valid entity identifier. + * @return True if the entity has all the components, false otherwise. + */ + template + [[nodiscard]] bool has(const entity_type entity) const { + ENTT_ASSERT(valid(entity)); + return (assure().contains(entity) && ...); + } + + /** + * @brief Checks if an entity has at least one of the given components. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @tparam Component Components for which to perform the check. + * @param entity A valid entity identifier. + * @return True if the entity has at least one of the given components, + * false otherwise. + */ + template + [[nodiscard]] bool any(const entity_type entity) const { + ENTT_ASSERT(valid(entity)); + return (assure().contains(entity) || ...); + } + + /** + * @brief Returns references to the given components for an entity. + * + * @warning + * Attempting to use an invalid entity or to get a component from an entity + * that doesn't own it results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity or if the entity doesn't own an instance of the given + * component. + * + * @tparam Component Types of components to get. + * @param entity A valid entity identifier. + * @return References to the components owned by the entity. + */ + template + [[nodiscard]] decltype(auto) get([[maybe_unused]] const entity_type entity) const { + ENTT_ASSERT(valid(entity)); + + if constexpr(sizeof...(Component) == 1) { + return (assure().get(entity), ...); + } else { + return std::forward_as_tuple(get(entity)...); + } + } + + /*! @copydoc get */ + template + [[nodiscard]] decltype(auto) get([[maybe_unused]] const entity_type entity) { + ENTT_ASSERT(valid(entity)); + + if constexpr(sizeof...(Component) == 1) { + return (assure().get(entity), ...); + } else { + return std::forward_as_tuple(get(entity)...); + } + } + + /** + * @brief Returns a reference to the given component for an entity. + * + * In case the entity doesn't own the component, the parameters provided are + * used to construct it.
+ * Equivalent to the following snippet (pseudocode): + * + * @code{.cpp} + * auto &component = registry.has(entity) ? registry.get(entity) : registry.emplace(entity, args...); + * @endcode + * + * Prefer this function anyway because it has slightly better performance. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @tparam Component Type of component to get. + * @tparam Args Types of arguments to use to construct the component. + * @param entity A valid entity identifier. + * @param args Parameters to use to initialize the component. + * @return Reference to the component owned by the entity. + */ + template + [[nodiscard]] decltype(auto) get_or_emplace(const entity_type entity, Args &&... args) { + ENTT_ASSERT(valid(entity)); + auto &cpool = assure(); + return cpool.contains(entity) ? cpool.get(entity) : cpool.emplace(*this, entity, std::forward(args)...); + } + + /** + * @brief Returns pointers to the given components for an entity. + * + * @warning + * Attempting to use an invalid entity results in undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid entity. + * + * @tparam Component Types of components to get. + * @param entity A valid entity identifier. + * @return Pointers to the components owned by the entity. + */ + template + [[nodiscard]] auto try_get([[maybe_unused]] const entity_type entity) const { + ENTT_ASSERT(valid(entity)); + + if constexpr(sizeof...(Component) == 1) { + return (assure().try_get(entity), ...); + } else { + return std::make_tuple(try_get(entity)...); + } + } + + /*! @copydoc try_get */ + template + [[nodiscard]] auto try_get([[maybe_unused]] const entity_type entity) { + ENTT_ASSERT(valid(entity)); + + if constexpr(sizeof...(Component) == 1) { + return (assure().try_get(entity), ...); + } else { + return std::make_tuple(try_get(entity)...); + } + } + + /** + * @brief Clears a whole registry or the pools for the given components. + * @tparam Component Types of components to remove from their entities. + */ + template + void clear() { + if constexpr(sizeof...(Component) == 0) { + // useless this-> used to suppress a warning with clang + each([this](const auto entity) { this->destroy(entity); }); + } else { + ([this](auto &&cpool) { + cpool.remove(*this, cpool.sparse_set::begin(), cpool.sparse_set::end()); + }(assure()), ...); + } + } + + /** + * @brief Iterates all the entities that are still in use. + * + * The function object is invoked for each entity that is still in use.
+ * The signature of the function should be equivalent to the following: + * + * @code{.cpp} + * void(const Entity); + * @endcode + * + * This function is fairly slow and should not be used frequently. However, + * it's useful for iterating all the entities still in use, regardless of + * their components. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + if(destroyed == null) { + for(auto pos = entities.size(); pos; --pos) { + func(entities[pos-1]); + } + } else { + for(auto pos = entities.size(); pos; --pos) { + if(const auto entt = entities[pos - 1]; (to_integral(entt) & traits_type::entity_mask) == (pos - 1)) { + func(entt); + } + } + } + } + + /** + * @brief Checks if an entity has components assigned. + * @param entity A valid entity identifier. + * @return True if the entity has no components assigned, false otherwise. + */ + [[nodiscard]] bool orphan(const entity_type entity) const { + ENTT_ASSERT(valid(entity)); + return std::none_of(pools.cbegin(), pools.cend(), [entity](auto &&pdata) { return pdata.pool && pdata.pool->contains(entity); }); + } + + /** + * @brief Iterates orphans and applies them the given function object. + * + * The function object is invoked for each entity that is still in use and + * has no components assigned.
+ * The signature of the function should be equivalent to the following: + * + * @code{.cpp} + * void(const Entity); + * @endcode + * + * This function can be very slow and should not be used frequently. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void orphans(Func func) const { + each([this, &func](const auto entity) { + if(orphan(entity)) { + func(entity); + } + }); + } + + /** + * @brief Returns a sink object for the given component. + * + * A sink is an opaque object used to connect listeners to components.
+ * The sink returned by this function can be used to receive notifications + * whenever a new instance of the given component is created and assigned to + * an entity. + * + * The function type for a listener is equivalent to: + * + * @code{.cpp} + * void(registry &, Entity); + * @endcode + * + * Listeners are invoked **after** the component has been assigned to the + * entity. + * + * @sa sink + * + * @tparam Component Type of component of which to get the sink. + * @return A temporary sink object. + */ + template + [[nodiscard]] auto on_construct() { + return assure().on_construct(); + } + + /** + * @brief Returns a sink object for the given component. + * + * A sink is an opaque object used to connect listeners to components.
+ * The sink returned by this function can be used to receive notifications + * whenever an instance of the given component is explicitly updated. + * + * The function type for a listener is equivalent to: + * + * @code{.cpp} + * void(registry &, Entity); + * @endcode + * + * Listeners are invoked **after** the component has been updated. + * + * @sa sink + * + * @tparam Component Type of component of which to get the sink. + * @return A temporary sink object. + */ + template + [[nodiscard]] auto on_update() { + return assure().on_update(); + } + + /** + * @brief Returns a sink object for the given component. + * + * A sink is an opaque object used to connect listeners to components.
+ * The sink returned by this function can be used to receive notifications + * whenever an instance of the given component is removed from an entity and + * thus destroyed. + * + * The function type for a listener is equivalent to: + * + * @code{.cpp} + * void(registry &, Entity); + * @endcode + * + * Listeners are invoked **before** the component has been removed from the + * entity. + * + * @sa sink + * + * @tparam Component Type of component of which to get the sink. + * @return A temporary sink object. + */ + template + [[nodiscard]] auto on_destroy() { + return assure().on_destroy(); + } + + /** + * @brief Returns a view for the given components. + * + * This kind of objects are created on the fly and share with the registry + * its internal data structures.
+ * Feel free to discard a view after the use. Creating and destroying a view + * is an incredibly cheap operation because they do not require any type of + * initialization.
+ * As a rule of thumb, storing a view should never be an option. + * + * Views do their best to iterate the smallest set of candidate entities. + * In particular: + * + * * Single component views are incredibly fast and iterate a packed array + * of entities, all of which has the given component. + * * Multi component views look at the number of entities available for each + * component and pick up a reference to the smallest set of candidates to + * test for the given components. + * + * Views in no way affect the functionalities of the registry nor those of + * the underlying pools. + * + * @note + * Multi component views are pretty fast. However their performance tend to + * degenerate when the number of components to iterate grows up and the most + * of the entities have all the given components.
+ * To get a performance boost, consider using a group instead. + * + * @tparam Component Type of components used to construct the view. + * @tparam Exclude Types of components used to filter the view. + * @return A newly created view. + */ + template + [[nodiscard]] basic_view, Component...> view(exclude_t = {}) const { + static_assert(sizeof...(Component) > 0, "Exclusion-only views are not supported"); + return { assure>()..., assure()... }; + } + + /*! @copydoc view */ + template + [[nodiscard]] basic_view, Component...> view(exclude_t = {}) { + static_assert(sizeof...(Component) > 0, "Exclusion-only views are not supported"); + return { assure>()..., assure()... }; + } + + /** + * @brief Returns a runtime view for the given components. + * + * This kind of objects are created on the fly and share with the registry + * its internal data structures.
+ * Users should throw away the view after use. Fortunately, creating and + * destroying a runtime view is an incredibly cheap operation because they + * do not require any type of initialization.
+ * As a rule of thumb, storing a view should never be an option. + * + * Runtime views are to be used when users want to construct a view from + * some external inputs and don't know at compile-time what are the required + * components. + * + * @tparam ItComp Type of input iterator for the components to use to + * construct the view. + * @tparam ItExcl Type of input iterator for the components to use to filter + * the view. + * @param first An iterator to the first element of the range of components + * to use to construct the view. + * @param last An iterator past the last element of the range of components + * to use to construct the view. + * @param from An iterator to the first element of the range of components + * to use to filter the view. + * @param to An iterator past the last element of the range of components to + * use to filter the view. + * @return A newly created runtime view. + */ + template + [[nodiscard]] basic_runtime_view runtime_view(ItComp first, ItComp last, ItExcl from = {}, ItExcl to = {}) const { + std::vector *> component(std::distance(first, last)); + std::vector *> filter(std::distance(from, to)); + + std::transform(first, last, component.begin(), [this](const auto ctype) { + const auto it = std::find_if(pools.cbegin(), pools.cend(), [ctype](auto &&pdata) { return pdata.pool && pdata.type_id == ctype; }); + return it == pools.cend() ? nullptr : it->pool.get(); + }); + + std::transform(from, to, filter.begin(), [this](const auto ctype) { + const auto it = std::find_if(pools.cbegin(), pools.cend(), [ctype](auto &&pdata) { return pdata.pool && pdata.type_id == ctype; }); + return it == pools.cend() ? nullptr : it->pool.get(); + }); + + return { std::move(component), std::move(filter) }; + } + + /** + * @brief Returns a group for the given components. + * + * This kind of objects are created on the fly and share with the registry + * its internal data structures.
+ * Feel free to discard a group after the use. Creating and destroying a + * group is an incredibly cheap operation because they do not require any + * type of initialization, but for the first time they are requested.
+ * As a rule of thumb, storing a group should never be an option. + * + * Groups support exclusion lists and can own types of components. The more + * types are owned by a group, the faster it is to iterate entities and + * components.
+ * However, groups also affect some features of the registry such as the + * creation and destruction of components, which will consequently be + * slightly slower (nothing that can be noticed in most cases). + * + * @note + * Pools of components that are owned by a group cannot be sorted anymore. + * The group takes the ownership of the pools and arrange components so as + * to iterate them as fast as possible. + * + * @tparam Owned Types of components owned by the group. + * @tparam Get Types of components observed by the group. + * @tparam Exclude Types of components used to filter the group. + * @return A newly created group. + */ + template + [[nodiscard]] basic_group, get_t, Owned...> group(get_t, exclude_t = {}) { + static_assert(sizeof...(Owned) + sizeof...(Get) > 0, "Exclusion-only views are not supported"); + static_assert(sizeof...(Owned) + sizeof...(Get) + sizeof...(Exclude) > 1, "Single component groups are not allowed"); + + using handler_type = group_handler, get_t...>, std::decay_t...>; + + const auto cpools = std::forward_as_tuple(assure>()..., assure>()...); + constexpr auto size = sizeof...(Owned) + sizeof...(Get) + sizeof...(Exclude); + handler_type *handler = nullptr; + + if(auto it = std::find_if(groups.cbegin(), groups.cend(), [size](const auto &gdata) { + return gdata.size == size + && (gdata.owned(type_info>::id()) && ...) + && (gdata.get(type_info>::id()) && ...) + && (gdata.exclude(type_info::id()) && ...); + }); it != groups.cend()) + { + handler = static_cast(it->group.get()); + } + + if(!handler) { + group_data candidate = { + size, + { new handler_type{}, [](void *instance) { delete static_cast(instance); } }, + []([[maybe_unused]] const id_type ctype) ENTT_NOEXCEPT { return ((ctype == type_info>::id()) || ...); }, + []([[maybe_unused]] const id_type ctype) ENTT_NOEXCEPT { return ((ctype == type_info>::id()) || ...); }, + []([[maybe_unused]] const id_type ctype) ENTT_NOEXCEPT { return ((ctype == type_info::id()) || ...); }, + }; + + handler = static_cast(candidate.group.get()); + + const void *maybe_valid_if = nullptr; + const void *discard_if = nullptr; + + if constexpr(sizeof...(Owned) == 0) { + groups.push_back(std::move(candidate)); + } else { + ENTT_ASSERT(std::all_of(groups.cbegin(), groups.cend(), [size](const auto &gdata) { + const auto overlapping = (0u + ... + gdata.owned(type_info>::id())); + const auto sz = overlapping + (0u + ... + gdata.get(type_info>::id())) + (0u + ... + gdata.exclude(type_info::id())); + return !overlapping || ((sz == size) || (sz == gdata.size)); + })); + + const auto next = std::find_if_not(groups.cbegin(), groups.cend(), [size](const auto &gdata) { + return !(0u + ... + gdata.owned(type_info>::id())) || (size > gdata.size); + }); + + const auto prev = std::find_if(std::make_reverse_iterator(next), groups.crend(), [](const auto &gdata) { + return (0u + ... + gdata.owned(type_info>::id())); + }); + + maybe_valid_if = (next == groups.cend() ? maybe_valid_if : next->group.get()); + discard_if = (prev == groups.crend() ? discard_if : prev->group.get()); + groups.insert(next, std::move(candidate)); + } + + (on_construct>().before(maybe_valid_if).template connect<&handler_type::template maybe_valid_if>>(*handler), ...); + (on_construct>().before(maybe_valid_if).template connect<&handler_type::template maybe_valid_if>>(*handler), ...); + (on_destroy().before(maybe_valid_if).template connect<&handler_type::template maybe_valid_if>(*handler), ...); + + (on_destroy>().before(discard_if).template connect<&handler_type::discard_if>(*handler), ...); + (on_destroy>().before(discard_if).template connect<&handler_type::discard_if>(*handler), ...); + (on_construct().before(discard_if).template connect<&handler_type::discard_if>(*handler), ...); + + if constexpr(sizeof...(Owned) == 0) { + for(const auto entity: view(exclude)) { + handler->current.emplace(entity); + } + } else { + // we cannot iterate backwards because we want to leave behind valid entities in case of owned types + for(auto *first = std::get<0>(cpools).data(), *last = first + std::get<0>(cpools).size(); first != last; ++first) { + handler->template maybe_valid_if...>>>(*this, *first); + } + } + } + + if constexpr(sizeof...(Owned) == 0) { + return { handler->current, std::get> &>(cpools)... }; + } else { + return { handler->current, std::get> &>(cpools)... , std::get> &>(cpools)... }; + } + } + + /** + * @brief Returns a group for the given components. + * + * @sa group + * + * @tparam Owned Types of components owned by the group. + * @tparam Get Types of components observed by the group. + * @tparam Exclude Types of components used to filter the group. + * @return A newly created group. + */ + template + [[nodiscard]] basic_group, get_t, Owned...> group(get_t, exclude_t = {}) const { + static_assert(std::conjunction_v..., std::is_const...>, "Invalid non-const type"); + return const_cast(this)->group(get_t{}, exclude); + } + + /** + * @brief Returns a group for the given components. + * + * @sa group + * + * @tparam Owned Types of components owned by the group. + * @tparam Exclude Types of components used to filter the group. + * @return A newly created group. + */ + template + [[nodiscard]] basic_group, get_t<>, Owned...> group(exclude_t = {}) { + return group(get_t<>{}, exclude); + } + + /** + * @brief Returns a group for the given components. + * + * @sa group + * + * @tparam Owned Types of components owned by the group. + * @tparam Exclude Types of components used to filter the group. + * @return A newly created group. + */ + template + [[nodiscard]] basic_group, get_t<>, Owned...> group(exclude_t = {}) const { + static_assert(std::conjunction_v...>, "Invalid non-const type"); + return const_cast(this)->group(exclude); + } + + /** + * @brief Checks whether the given components belong to any group. + * @tparam Component Types of components in which one is interested. + * @return True if the pools of the given components are sortable, false + * otherwise. + */ + template + [[nodiscard]] bool sortable() const { + return std::none_of(groups.cbegin(), groups.cend(), [](auto &&gdata) { return (gdata.owned(type_info>::id()) || ...); }); + } + + /** + * @brief Checks whether a group can be sorted. + * @tparam Owned Types of components owned by the group. + * @tparam Get Types of components observed by the group. + * @tparam Exclude Types of components used to filter the group. + * @return True if the group can be sorted, false otherwise. + */ + template + [[nodiscard]] bool sortable(const basic_group, get_t, Owned...> &) ENTT_NOEXCEPT { + constexpr auto size = sizeof...(Owned) + sizeof...(Get) + sizeof...(Exclude); + return std::find_if(groups.cbegin(), groups.cend(), [size](const auto &gdata) { + return (0u + ... + gdata.owned(type_info>::id())) && (size < gdata.size); + }) == groups.cend(); + } + + /** + * @brief Sorts the pool of entities for the given component. + * + * The order of the elements in a pool is highly affected by assignments + * of components to entities and deletions. Components are arranged to + * maximize the performance during iterations and users should not make any + * assumption on the order.
+ * This function can be used to impose an order to the elements in the pool + * of the given component. The order is kept valid until a component of the + * given type is assigned or removed from an entity. + * + * The comparison function object must return `true` if the first element + * is _less_ than the second one, `false` otherwise. The signature of the + * comparison function should be equivalent to one of the following: + * + * @code{.cpp} + * bool(const Entity, const Entity); + * bool(const Component &, const Component &); + * @endcode + * + * Moreover, the comparison function object shall induce a + * _strict weak ordering_ on the values. + * + * The sort function oject must offer a member function template + * `operator()` that accepts three arguments: + * + * * An iterator to the first element of the range to sort. + * * An iterator past the last element of the range to sort. + * * A comparison function to use to compare the elements. + * + * The comparison funtion object received by the sort function object hasn't + * necessarily the type of the one passed along with the other parameters to + * this member function. + * + * @warning + * Pools of components owned by a group cannot be sorted.
+ * An assertion will abort the execution at runtime in debug mode in case + * the pool is owned by a group. + * + * @tparam Component Type of components to sort. + * @tparam Compare Type of comparison function object. + * @tparam Sort Type of sort function object. + * @tparam Args Types of arguments to forward to the sort function object. + * @param compare A valid comparison function object. + * @param algo A valid sort function object. + * @param args Arguments to forward to the sort function object, if any. + */ + template + void sort(Compare compare, Sort algo = Sort{}, Args &&... args) { + ENTT_ASSERT(sortable()); + auto &cpool = assure(); + cpool.sort(cpool.begin(), cpool.end(), std::move(compare), std::move(algo), std::forward(args)...); + } + + /** + * @brief Sorts two pools of components in the same way. + * + * The order of the elements in a pool is highly affected by assignments + * of components to entities and deletions. Components are arranged to + * maximize the performance during iterations and users should not make any + * assumption on the order. + * + * It happens that different pools of components must be sorted the same way + * because of runtime and/or performance constraints. This function can be + * used to order a pool of components according to the order between the + * entities in another pool of components. + * + * @b How @b it @b works + * + * Being `A` and `B` the two sets where `B` is the master (the one the order + * of which rules) and `A` is the slave (the one to sort), after a call to + * this function an iterator for `A` will return the entities according to + * the following rules: + * + * * All the entities in `A` that are also in `B` are returned first + * according to the order they have in `B`. + * * All the entities in `A` that are not in `B` are returned in no + * particular order after all the other entities. + * + * Any subsequent change to `B` won't affect the order in `A`. + * + * @warning + * Pools of components owned by a group cannot be sorted.
+ * An assertion will abort the execution at runtime in debug mode in case + * the pool is owned by a group. + * + * @tparam To Type of components to sort. + * @tparam From Type of components to use to sort. + */ + template + void sort() { + ENTT_ASSERT(sortable()); + assure().respect(assure()); + } + + /** + * @brief Visits an entity and returns the types for its components. + * + * The signature of the function should be equivalent to the following: + * + * @code{.cpp} + * void(const id_type); + * @endcode + * + * Returned identifiers are those of the components owned by the entity. + * + * @sa type_info + * + * @warning + * It's not specified whether a component attached to or removed from the + * given entity during the visit is returned or not to the caller. + * + * @tparam Func Type of the function object to invoke. + * @param entity A valid entity identifier. + * @param func A valid function object. + */ + template + void visit(entity_type entity, Func func) const { + for(auto pos = pools.size(); pos; --pos) { + if(const auto &pdata = pools[pos-1]; pdata.pool && pdata.pool->contains(entity)) { + func(pdata.type_id); + } + } + } + + /** + * @brief Visits a registry and returns the types for its components. + * + * The signature of the function should be equivalent to the following: + * + * @code{.cpp} + * void(const id_type); + * @endcode + * + * Returned identifiers are those of the components managed by the registry. + * + * @sa type_info + * + * @warning + * It's not specified whether a component for which a pool is created during + * the visit is returned or not to the caller. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void visit(Func func) const { + for(auto pos = pools.size(); pos; --pos) { + if(const auto &pdata = pools[pos-1]; pdata.pool) { + func(pdata.type_id); + } + } + } + + /** + * @brief Binds an object to the context of the registry. + * + * If the value already exists it is overwritten, otherwise a new instance + * of the given type is created and initialized with the arguments provided. + * + * @tparam Type Type of object to set. + * @tparam Args Types of arguments to use to construct the object. + * @param args Parameters to use to initialize the value. + * @return A reference to the newly created object. + */ + template + Type & set(Args &&... args) { + unset(); + vars.push_back(variable_data{type_info::id(), { new Type{std::forward(args)...}, [](void *instance) { delete static_cast(instance); } }}); + return *static_cast(vars.back().value.get()); + } + + /** + * @brief Unsets a context variable if it exists. + * @tparam Type Type of object to set. + */ + template + void unset() { + vars.erase(std::remove_if(vars.begin(), vars.end(), [](auto &&var) { + return var.type_id == type_info::id(); + }), vars.end()); + } + + /** + * @brief Binds an object to the context of the registry. + * + * In case the context doesn't contain the given object, the parameters + * provided are used to construct it. + * + * @tparam Type Type of object to set. + * @tparam Args Types of arguments to use to construct the object. + * @param args Parameters to use to initialize the object. + * @return A reference to the object in the context of the registry. + */ + template + [[nodiscard]] Type & ctx_or_set(Args &&... args) { + auto *value = try_ctx(); + return value ? *value : set(std::forward(args)...); + } + + /** + * @brief Returns a pointer to an object in the context of the registry. + * @tparam Type Type of object to get. + * @return A pointer to the object if it exists in the context of the + * registry, a null pointer otherwise. + */ + template + [[nodiscard]] const Type * try_ctx() const { + auto it = std::find_if(vars.cbegin(), vars.cend(), [](auto &&var) { return var.type_id == type_info::id(); }); + return it == vars.cend() ? nullptr : static_cast(it->value.get()); + } + + /*! @copydoc try_ctx */ + template + [[nodiscard]] Type * try_ctx() { + return const_cast(std::as_const(*this).template try_ctx()); + } + + /** + * @brief Returns a reference to an object in the context of the registry. + * + * @warning + * Attempting to get a context variable that doesn't exist results in + * undefined behavior.
+ * An assertion will abort the execution at runtime in debug mode in case of + * invalid requests. + * + * @tparam Type Type of object to get. + * @return A valid reference to the object in the context of the registry. + */ + template + [[nodiscard]] const Type & ctx() const { + const auto *instance = try_ctx(); + ENTT_ASSERT(instance); + return *instance; + } + + /*! @copydoc ctx */ + template + [[nodiscard]] Type & ctx() { + return const_cast(std::as_const(*this).template ctx()); + } + + /** + * @brief Visits a registry and returns the types for its context variables. + * + * The signature of the function should be equivalent to the following: + * + * @code{.cpp} + * void(const id_type); + * @endcode + * + * Returned identifiers are those of the context variables currently set. + * + * @sa type_info + * + * @warning + * It's not specified whether a context variable created during the visit is + * returned or not to the caller. + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void ctx(Func func) const { + for(auto pos = vars.size(); pos; --pos) { + func(vars[pos-1].type_id); + } + } + +private: + std::vector groups{}; + mutable std::vector pools{}; + std::vector entities{}; + std::vector vars{}; + entity_type destroyed{null}; +}; + + +} + + +#endif + +// #include "entity.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Dedicated to those who aren't confident with the + * entity-component-system architecture. + * + * Tiny wrapper around a registry, for all those users that aren't confident + * with entity-component-system architecture and prefer to iterate objects + * directly. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +struct [[deprecated("Consider using the handle class instead")]] basic_actor { + /*! @brief Type of registry used internally. */ + using registry_type = basic_registry; + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + + basic_actor() ENTT_NOEXCEPT + : entt{null}, reg{nullptr} + {} + + /** + * @brief Move constructor. + * + * After actor move construction, instances that have been moved from are + * placed in a valid but unspecified state. It's highly discouraged to + * continue using them. + * + * @param other The instance to move from. + */ + basic_actor(basic_actor &&other) ENTT_NOEXCEPT + : entt{other.entt}, reg{other.reg} + { + other.entt = null; + } + + /** + * @brief Constructs an actor from a given registry. + * @param ref An instance of the registry class. + */ + explicit basic_actor(registry_type &ref) + : entt{ref.create()}, reg{&ref} + {} + + /** + * @brief Constructs an actor from a given entity. + * @param entity A valid entity identifier. + * @param ref An instance of the registry class. + */ + explicit basic_actor(entity_type entity, registry_type &ref) ENTT_NOEXCEPT + : entt{entity}, reg{&ref} + { + ENTT_ASSERT(ref.valid(entity)); + } + + /*! @brief Default destructor. */ + virtual ~basic_actor() { + if(*this) { + reg->destroy(entt); + } + } + + /** + * @brief Move assignment operator. + * + * After actor move assignment, instances that have been moved from are + * placed in a valid but unspecified state. It's highly discouraged to + * continue using them. + * + * @param other The instance to move from. + * @return This actor. + */ + basic_actor & operator=(basic_actor &&other) ENTT_NOEXCEPT { + if(this != &other) { + auto tmp{std::move(other)}; + std::swap(reg, tmp.reg); + std::swap(entt, tmp.entt); + } + + return *this; + } + + /** + * @brief Assigns the given component to an actor. + * + * A new instance of the given component is created and initialized with the + * arguments provided (the component must have a proper constructor or be of + * aggregate type). Then the component is assigned to the actor.
+ * In case the actor already has a component of the given type, it's + * replaced with the new one. + * + * @tparam Component Type of the component to create. + * @tparam Args Types of arguments to use to construct the component. + * @param args Parameters to use to initialize the component. + * @return A reference to the newly created component. + */ + template + decltype(auto) assign(Args &&... args) { + return reg->template emplace_or_replace(entt, std::forward(args)...); + } + + /** + * @brief Removes the given component from an actor. + * @tparam Component Type of the component to remove. + */ + template + void remove() { + reg->template remove(entt); + } + + /** + * @brief Checks if an actor has the given components. + * @tparam Component Components for which to perform the check. + * @return True if the actor has all the components, false otherwise. + */ + template + [[nodiscard]] bool has() const { + return reg->template has(entt); + } + + /** + * @brief Returns references to the given components for an actor. + * @tparam Component Types of components to get. + * @return References to the components owned by the actor. + */ + template + [[nodiscard]] decltype(auto) get() const { + return std::as_const(*reg).template get(entt); + } + + /*! @copydoc get */ + template + [[nodiscard]] decltype(auto) get() { + return reg->template get(entt); + } + + /** + * @brief Returns pointers to the given components for an actor. + * @tparam Component Types of components to get. + * @return Pointers to the components owned by the actor. + */ + template + [[nodiscard]] auto try_get() const { + return std::as_const(*reg).template try_get(entt); + } + + /*! @copydoc try_get */ + template + [[nodiscard]] auto try_get() { + return reg->template try_get(entt); + } + + /** + * @brief Returns a reference to the underlying registry. + * @return A reference to the underlying registry. + */ + [[nodiscard]] const registry_type & backend() const ENTT_NOEXCEPT { + return *reg; + } + + /*! @copydoc backend */ + [[nodiscard]] registry_type & backend() ENTT_NOEXCEPT { + return const_cast(std::as_const(*this).backend()); + } + + /** + * @brief Returns the entity associated with an actor. + * @return The entity associated with the actor. + */ + [[nodiscard]] entity_type entity() const ENTT_NOEXCEPT { + return entt; + } + + /** + * @brief Checks if an actor refers to a valid entity or not. + * @return True if the actor refers to a valid entity, false otherwise. + */ + [[nodiscard]] explicit operator bool() const { + return reg && reg->valid(entt); + } + +private: + entity_type entt; + registry_type *reg; +}; + + +} + + +#endif + +// #include "entity/entity.hpp" + +// #include "entity/group.hpp" + +// #include "entity/handle.hpp" +#ifndef ENTT_ENTITY_HANDLE_HPP +#define ENTT_ENTITY_HANDLE_HPP + + +// #include "registry.hpp" + + + +namespace entt { + + +/** + * @brief Non-owning handle to an entity. + * + * Tiny wrapper around a registry and an entity. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +struct basic_handle { + /*! @brief Underlying entity identifier. */ + using entity_type = std::remove_const_t; + + /*! @brief Type of registry accepted by the handle. */ + using registry_type = std::conditional_t< + std::is_const_v, + const basic_registry, + basic_registry + >; + + /** + * @brief Constructs a handle from a given registry and entity. + * @param ref An instance of the registry class. + * @param value An entity identifier. + */ + basic_handle(registry_type &ref, entity_type value = null) ENTT_NOEXCEPT + : reg{&ref}, entt{value} + {} + + /** + * @brief Assigns an entity to a handle. + * @param value An entity identifier. + * @return This handle. + */ + basic_handle & operator=(const entity_type value) ENTT_NOEXCEPT { + entt = value; + return *this; + } + + /** + * @brief Assigns the null object to a handle. + * @return This handle. + */ + basic_handle & operator=(null_t) ENTT_NOEXCEPT { + return (*this = static_cast(null)); + } + + /** + * @brief Constructs a const handle from a non-const one. + * @return A const handle referring to the same entity. + */ + [[nodiscard]] operator basic_handle() const ENTT_NOEXCEPT { + return {*reg, entt}; + } + + /** + * @brief Converts a handle to its underlying entity. + * @return An entity identifier. + */ + [[nodiscard]] operator entity_type() const ENTT_NOEXCEPT { + return entity(); + } + + /** + * @brief Checks if a handle refers to a valid entity or not. + * @return True if the handle refers to a valid entity, false otherwise. + */ + [[nodiscard]] explicit operator bool() const { + return reg->valid(entt); + } + + /** + * @brief Returns a reference to the underlying registry. + * @return A reference to the underlying registry. + */ + [[nodiscard]] registry_type & registry() const ENTT_NOEXCEPT { + return *reg; + } + + /** + * @brief Returns the entity associated with a handle. + * @return The entity associated with the handle. + */ + [[nodiscard]] entity_type entity() const ENTT_NOEXCEPT { + return entt; + } + + /** + * @brief Assigns the given component to a handle. + * @sa basic_registry::emplace + * @tparam Component Type of component to create. + * @tparam Args Types of arguments to use to construct the component. + * @param args Parameters to use to initialize the component. + * @return A reference to the newly created component. + */ + template + decltype(auto) emplace(Args &&... args) const { + return reg->template emplace(entt, std::forward(args)...); + } + + /** + * @brief Assigns or replaces the given component for a handle. + * @sa basic_registry::emplace_or_replace + * @tparam Component Type of component to assign or replace. + * @tparam Args Types of arguments to use to construct the component. + * @param args Parameters to use to initialize the component. + * @return A reference to the newly created component. + */ + template + decltype(auto) emplace_or_replace(Args &&... args) const { + return reg->template emplace_or_replace(entt, std::forward(args)...); + } + + /** + * @brief Patches the given component for a handle. + * @sa basic_registry::patch + * @tparam Component Type of component to patch. + * @tparam Func Types of the function objects to invoke. + * @param func Valid function objects. + * @return A reference to the patched component. + */ + template + decltype(auto) patch(Func &&... func) const { + return reg->template patch(entt, std::forward(func)...); + } + + /** + * @brief Replaces the given component for a handle. + * @sa basic_registry::replace + * @tparam Component Type of component to replace. + * @tparam Args Types of arguments to use to construct the component. + * @param args Parameters to use to initialize the component. + * @return A reference to the component being replaced. + */ + template + decltype(auto) replace(Args &&... args) const { + return reg->template replace(entt, std::forward(args)...); + } + + /** + * @brief Removes the given components from a handle. + * @sa basic_registry::remove + * @tparam Component Types of components to remove. + */ + template + void remove() const { + reg->template remove(entt); + } + + /** + * @brief Removes the given components from a handle. + * @sa basic_registry::remove_if_exists + * @tparam Component Types of components to remove. + * @return The number of components actually removed. + */ + template + decltype(auto) remove_if_exists() const { + return reg->template remove_if_exists(entt); + } + + /** + * @brief Removes all the components from a handle and makes it orphaned. + * @sa basic_registry::remove_all + */ + void remove_all() const { + reg->remove_all(entt); + } + + /** + * @brief Checks if a handle has all the given components. + * @sa basic_registry::has + * @tparam Component Components for which to perform the check. + * @return True if the handle has all the components, false otherwise. + */ + template + [[nodiscard]] decltype(auto) has() const { + return reg->template has(entt); + } + + /** + * @brief Checks if a handle has at least one of the given components. + * @sa basic_registry::any + * @tparam Component Components for which to perform the check. + * @return True if the handle has at least one of the given components, + * false otherwise. + */ + template + [[nodiscard]] decltype(auto) any() const { + return reg->template any(entt); + } + + /** + * @brief Returns references to the given components for a handle. + * @sa basic_registry::get + * @tparam Component Types of components to get. + * @return References to the components owned by the handle. + */ + template + [[nodiscard]] decltype(auto) get() const { + return reg->template get(entt); + } + + /** + * @brief Returns a reference to the given component for a handle. + * @sa basic_registry::get_or_emplace + * @tparam Component Type of component to get. + * @tparam Args Types of arguments to use to construct the component. + * @param args Parameters to use to initialize the component. + * @return Reference to the component owned by the handle. + */ + template + [[nodiscard]] decltype(auto) get_or_emplace(Args &&... args) const { + return reg->template get_or_emplace(entt, std::forward(args)...); + } + + /** + * @brief Returns pointers to the given components for a handle. + * @sa basic_registry::try_get + * @tparam Component Types of components to get. + * @return Pointers to the components owned by the handle. + */ + template + [[nodiscard]] decltype(auto) try_get() const { + return reg->template try_get(entt); + } + + /** + * @brief Checks if a handle has components assigned. + * @return True if the handle has no components assigned, false otherwise. + */ + [[nodiscard]] bool orphan() const { + return reg->orphan(entt); + } + + /** + * @brief Visits a handle and returns the types for its components. + * @sa basic_registry::visit + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void visit(Func &&func) const { + reg->visit(entt, std::forward(func)); + } + +private: + registry_type *reg; + entity_type entt; +}; + + +/** + * @brief Deduction guide. + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +basic_handle(basic_registry &, Entity) -> basic_handle; + + +/** + * @brief Deduction guide. + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +basic_handle(const basic_registry &, Entity) -> basic_handle; + + +} + + +#endif + +// #include "entity/helper.hpp" +#ifndef ENTT_ENTITY_HELPER_HPP +#define ENTT_ENTITY_HELPER_HPP + + +#include +// #include "../config/config.h" + +// #include "../core/type_traits.hpp" + +// #include "../signal/delegate.hpp" + +// #include "registry.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Converts a registry to a view. + * @tparam Const Constness of the accepted registry. + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +struct as_view { + /*! @brief Type of registry to convert. */ + using registry_type = std::conditional_t, basic_registry>; + + /** + * @brief Constructs a converter for a given registry. + * @param source A valid reference to a registry. + */ + as_view(registry_type &source) ENTT_NOEXCEPT: reg{source} {} + + /** + * @brief Conversion function from a registry to a view. + * @tparam Exclude Types of components used to filter the view. + * @tparam Component Type of components used to construct the view. + * @return A newly created view. + */ + template + operator basic_view() const { + return reg.template view(Exclude{}); + } + +private: + registry_type ® +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the constness of a registry directly from the instance + * provided to the constructor. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +as_view(basic_registry &) ENTT_NOEXCEPT -> as_view; + + +/*! @copydoc as_view */ +template +as_view(const basic_registry &) ENTT_NOEXCEPT -> as_view; + + +/** + * @brief Converts a registry to a group. + * @tparam Const Constness of the accepted registry. + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +struct as_group { + /*! @brief Type of registry to convert. */ + using registry_type = std::conditional_t, basic_registry>; + + /** + * @brief Constructs a converter for a given registry. + * @param source A valid reference to a registry. + */ + as_group(registry_type &source) ENTT_NOEXCEPT: reg{source} {} + + /** + * @brief Conversion function from a registry to a group. + * @tparam Exclude Types of components used to filter the group. + * @tparam Get Types of components observed by the group. + * @tparam Owned Types of components owned by the group. + * @return A newly created group. + */ + template + operator basic_group() const { + return reg.template group(Get{}, Exclude{}); + } + +private: + registry_type ® +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the constness of a registry directly from the instance + * provided to the constructor. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +as_group(basic_registry &) ENTT_NOEXCEPT -> as_group; + + +/*! @copydoc as_group */ +template +as_group(const basic_registry &) ENTT_NOEXCEPT -> as_group; + + + +/** + * @brief Helper to create a listener that directly invokes a member function. + * @tparam Member Member function to invoke on a component of the given type. + * @tparam Entity A valid entity type (see entt_traits for more details). + * @param reg A registry that contains the given entity and its components. + * @param entt Entity from which to get the component. + */ +template +void invoke(basic_registry ®, const Entity entt) { + static_assert(std::is_member_function_pointer_v, "Invalid pointer to non-static member function"); + delegate &, const Entity)> func; + func.template connect(reg.template get>(entt)); + func(reg, entt); +} + + +/** + * @brief Returns the entity associated with a given component. + * @tparam Entity A valid entity type (see entt_traits for more details). + * @tparam Component Type of component. + * @param reg A registry that contains the given entity and its components. + * @param component A valid component instance. + * @return The entity associated with the given component. + */ +template +Entity to_entity(const basic_registry ®, const Component &component) { + return *(reg.template data() + (&component - reg.template raw())); +} + + +} + + +#endif + +// #include "entity/observer.hpp" +#ifndef ENTT_ENTITY_OBSERVER_HPP +#define ENTT_ENTITY_OBSERVER_HPP + + +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/type_traits.hpp" + +// #include "registry.hpp" + +// #include "storage.hpp" + +// #include "utility.hpp" + +// #include "entity.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/*! @brief Grouping matcher. */ +template +struct matcher {}; + + +/** + * @brief Collector. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error, but for a few reasonable cases. + */ +template +struct basic_collector; + + +/** + * @brief Collector. + * + * A collector contains a set of rules (literally, matchers) to use to track + * entities.
+ * Its main purpose is to generate a descriptor that allows an observer to know + * how to connect to a registry. + */ +template<> +struct basic_collector<> { + /** + * @brief Adds a grouping matcher to the collector. + * @tparam AllOf Types of components tracked by the matcher. + * @tparam NoneOf Types of components used to filter out entities. + * @return The updated collector. + */ + template + static constexpr auto group(exclude_t = {}) ENTT_NOEXCEPT { + return basic_collector, type_list<>, type_list, AllOf...>>{}; + } + + /** + * @brief Adds an observing matcher to the collector. + * @tparam AnyOf Type of component for which changes should be detected. + * @return The updated collector. + */ + template + static constexpr auto update() ENTT_NOEXCEPT { + return basic_collector, type_list<>, AnyOf>>{}; + } +}; + +/** + * @brief Collector. + * @copydetails basic_collector<> + * @tparam Reject Untracked types used to filter out entities. + * @tparam Require Untracked types required by the matcher. + * @tparam Rule Specific details of the current matcher. + * @tparam Other Other matchers. + */ +template +struct basic_collector, type_list, Rule...>, Other...> { + /*! @brief Current matcher. */ + using current_type = matcher, type_list, Rule...>; + + /** + * @brief Adds a grouping matcher to the collector. + * @tparam AllOf Types of components tracked by the matcher. + * @tparam NoneOf Types of components used to filter out entities. + * @return The updated collector. + */ + template + static constexpr auto group(exclude_t = {}) ENTT_NOEXCEPT { + return basic_collector, type_list<>, type_list, AllOf...>, current_type, Other...>{}; + } + + /** + * @brief Adds an observing matcher to the collector. + * @tparam AnyOf Type of component for which changes should be detected. + * @return The updated collector. + */ + template + static constexpr auto update() ENTT_NOEXCEPT { + return basic_collector, type_list<>, AnyOf>, current_type, Other...>{}; + } + + /** + * @brief Updates the filter of the last added matcher. + * @tparam AllOf Types of components required by the matcher. + * @tparam NoneOf Types of components used to filter out entities. + * @return The updated collector. + */ + template + static constexpr auto where(exclude_t = {}) ENTT_NOEXCEPT { + using extended_type = matcher, type_list, Rule...>; + return basic_collector{}; + } +}; + + +/*! @brief Variable template used to ease the definition of collectors. */ +inline constexpr basic_collector<> collector{}; + + +/** + * @brief Observer. + * + * An observer returns all the entities and only the entities that fit the + * requirements of at least one matcher. Moreover, it's guaranteed that the + * entity list is tightly packed in memory for fast iterations.
+ * In general, observers don't stay true to the order of any set of components. + * + * Observers work mainly with two types of matchers, provided through a + * collector: + * + * * Observing matcher: an observer will return at least all the living entities + * for which one or more of the given components have been updated and not yet + * destroyed. + * * Grouping matcher: an observer will return at least all the living entities + * that would have entered the given group if it existed and that would have + * not yet left it. + * + * If an entity respects the requirements of multiple matchers, it will be + * returned once and only once by the observer in any case. + * + * Matchers support also filtering by means of a _where_ clause that accepts + * both a list of types and an exclusion list.
+ * Whenever a matcher finds that an entity matches its requirements, the + * condition of the filter is verified before to register the entity itself. + * Moreover, a registered entity isn't returned by the observer if the condition + * set by the filter is broken in the meantime. + * + * @b Important + * + * Iterators aren't invalidated if: + * + * * New instances of the given components are created and assigned to entities. + * * The entity currently pointed is modified (as an example, if one of the + * given components is removed from the entity to which the iterator points). + * * The entity currently pointed is destroyed. + * + * In all the other cases, modifying the pools of the given components in any + * way invalidates all the iterators and using them results in undefined + * behavior. + * + * @warning + * Lifetime of an observer doesn't necessarily have to overcome that of the + * registry to which it is connected. However, the observer must be disconnected + * from the registry before being destroyed to avoid crashes due to dangling + * pointers. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class basic_observer { + using payload_type = std::uint32_t; + + template + struct matcher_handler; + + template + struct matcher_handler, type_list, AnyOf>> { + template + static void maybe_valid_if(basic_observer &obs, const basic_registry ®, const Entity entt) { + if(reg.template has(entt) && !reg.template any(entt)) { + if(auto *comp = obs.view.try_get(entt); !comp) { + obs.view.emplace(entt); + } + + obs.view.get(entt) |= (1 << Index); + } + } + + template + static void discard_if(basic_observer &obs, const basic_registry &, const Entity entt) { + if(auto *value = obs.view.try_get(entt); value && !(*value &= (~(1 << Index)))) { + obs.view.erase(entt); + } + } + + template + static void connect(basic_observer &obs, basic_registry ®) { + (reg.template on_destroy().template connect<&discard_if>(obs), ...); + (reg.template on_construct().template connect<&discard_if>(obs), ...); + reg.template on_update().template connect<&maybe_valid_if>(obs); + reg.template on_destroy().template connect<&discard_if>(obs); + } + + static void disconnect(basic_observer &obs, basic_registry ®) { + (reg.template on_destroy().disconnect(obs), ...); + (reg.template on_construct().disconnect(obs), ...); + reg.template on_update().disconnect(obs); + reg.template on_destroy().disconnect(obs); + } + }; + + template + struct matcher_handler, type_list, type_list, AllOf...>> { + template + static void maybe_valid_if(basic_observer &obs, const basic_registry ®, const Entity entt) { + if(reg.template has(entt) && !reg.template any(entt)) { + if(auto *comp = obs.view.try_get(entt); !comp) { + obs.view.emplace(entt); + } + + obs.view.get(entt) |= (1 << Index); + } + } + + template + static void discard_if(basic_observer &obs, const basic_registry &, const Entity entt) { + if(auto *value = obs.view.try_get(entt); value && !(*value &= (~(1 << Index)))) { + obs.view.erase(entt); + } + } + + template + static void connect(basic_observer &obs, basic_registry ®) { + (reg.template on_destroy().template connect<&discard_if>(obs), ...); + (reg.template on_construct().template connect<&discard_if>(obs), ...); + (reg.template on_construct().template connect<&maybe_valid_if>(obs), ...); + (reg.template on_destroy().template connect<&maybe_valid_if>(obs), ...); + (reg.template on_destroy().template connect<&discard_if>(obs), ...); + (reg.template on_construct().template connect<&discard_if>(obs), ...); + } + + static void disconnect(basic_observer &obs, basic_registry ®) { + (reg.template on_destroy().disconnect(obs), ...); + (reg.template on_construct().disconnect(obs), ...); + (reg.template on_construct().disconnect(obs), ...); + (reg.template on_destroy().disconnect(obs), ...); + (reg.template on_destroy().disconnect(obs), ...); + (reg.template on_construct().disconnect(obs), ...); + } + }; + + template + static void disconnect(basic_observer &obs, basic_registry ®) { + (matcher_handler::disconnect(obs, reg), ...); + } + + template + void connect(basic_registry ®, std::index_sequence) { + static_assert(sizeof...(Matcher) < std::numeric_limits::digits, "Too many matchers"); + (matcher_handler::template connect(*this, reg), ...); + release = &basic_observer::disconnect; + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Random access iterator type. */ + using iterator = typename sparse_set::iterator; + + /*! @brief Default constructor. */ + basic_observer() + : target{}, release{}, view{} + {} + + /*! @brief Default copy constructor, deleted on purpose. */ + basic_observer(const basic_observer &) = delete; + /*! @brief Default move constructor, deleted on purpose. */ + basic_observer(basic_observer &&) = delete; + + /** + * @brief Creates an observer and connects it to a given registry. + * @tparam Matcher Types of matchers to use to initialize the observer. + * @param reg A valid reference to a registry. + */ + template + basic_observer(basic_registry ®, basic_collector) + : target{®}, + release{}, + view{} + { + connect(reg, std::index_sequence_for{}); + } + + /*! @brief Default destructor. */ + ~basic_observer() = default; + + /** + * @brief Default copy assignment operator, deleted on purpose. + * @return This observer. + */ + basic_observer & operator=(const basic_observer &) = delete; + + /** + * @brief Default move assignment operator, deleted on purpose. + * @return This observer. + */ + basic_observer & operator=(basic_observer &&) = delete; + + /** + * @brief Connects an observer to a given registry. + * @tparam Matcher Types of matchers to use to initialize the observer. + * @param reg A valid reference to a registry. + */ + template + void connect(basic_registry ®, basic_collector) { + disconnect(); + connect(reg, std::index_sequence_for{}); + target = ® + view.clear(); + } + + /*! @brief Disconnects an observer from the registry it keeps track of. */ + void disconnect() { + if(release) { + release(*this, *target); + release = nullptr; + } + } + + /** + * @brief Returns the number of elements in an observer. + * @return Number of elements. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return view.size(); + } + + /** + * @brief Checks whether an observer is empty. + * @return True if the observer is empty, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return view.empty(); + } + + /** + * @brief Direct access to the list of entities of the observer. + * + * The returned pointer is such that range `[data(), data() + size()]` is + * always a valid range, even if the container is empty. + * + * @note + * Entities are in the reverse order as returned by the `begin`/`end` + * iterators. + * + * @return A pointer to the array of entities. + */ + [[nodiscard]] const entity_type * data() const ENTT_NOEXCEPT { + return view.data(); + } + + /** + * @brief Returns an iterator to the first entity of the observer. + * + * The returned iterator points to the first entity of the observer. If the + * container is empty, the returned iterator will be equal to `end()`. + * + * @return An iterator to the first entity of the observer. + */ + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return view.sparse_set::begin(); + } + + /** + * @brief Returns an iterator that is past the last entity of the observer. + * + * The returned iterator points to the entity following the last entity of + * the observer. Attempting to dereference the returned iterator results in + * undefined behavior. + * + * @return An iterator to the entity following the last entity of the + * observer. + */ + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return view.sparse_set::end(); + } + + /*! @brief Clears the underlying container. */ + void clear() ENTT_NOEXCEPT { + view.clear(); + } + + /** + * @brief Iterates entities and applies the given function object to them. + * + * The function object is invoked for each entity.
+ * The signature of the function must be equivalent to the following form: + * + * @code{.cpp} + * void(const entity_type); + * @endcode + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + for(const auto entity: *this) { + func(entity); + } + } + + /** + * @brief Iterates entities and applies the given function object to them, + * then clears the observer. + * + * @sa each + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) { + std::as_const(*this).each(std::move(func)); + clear(); + } + +private: + basic_registry *target; + void(* release)(basic_observer &, basic_registry &); + storage view; +}; + + +} + + +#endif + +// #include "entity/pool.hpp" + +// #include "entity/registry.hpp" + +// #include "entity/runtime_view.hpp" + +// #include "entity/snapshot.hpp" +#ifndef ENTT_ENTITY_SNAPSHOT_HPP +#define ENTT_ENTITY_SNAPSHOT_HPP + + +#include +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "entity.hpp" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Utility class to create snapshots from a registry. + * + * A _snapshot_ can be either a dump of the entire registry or a narrower + * selection of components of interest.
+ * This type can be used in both cases if provided with a correctly configured + * output archive. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class basic_snapshot { + /*! @brief A registry is allowed to create snapshots. */ + friend class basic_registry; + + using traits_type = entt_traits; + + template + void get(Archive &archive, std::size_t sz, It first, It last) const { + archive(typename traits_type::entity_type(sz)); + + while(first != last) { + const auto entt = *(first++); + + if(reg->template has(entt)) { + if constexpr(std::is_empty_v) { + archive(entt); + } else { + archive(entt, reg->template get(entt)); + } + } + } + } + + template + void component(Archive &archive, It first, It last, std::index_sequence) const { + std::array size{}; + auto begin = first; + + while(begin != last) { + const auto entt = *(begin++); + ((reg->template has(entt) ? ++size[Indexes] : size[Indexes]), ...); + } + + (get(archive, size[Indexes], first, last), ...); + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + + /** + * @brief Constructs an instance that is bound to a given registry. + * @param source A valid reference to a registry. + */ + basic_snapshot(const basic_registry &source) ENTT_NOEXCEPT + : reg{&source} + {} + + /*! @brief Default move constructor. */ + basic_snapshot(basic_snapshot &&) = default; + + /*! @brief Default move assignment operator. @return This snapshot. */ + basic_snapshot & operator=(basic_snapshot &&) = default; + + /** + * @brief Puts aside all the entities from the underlying registry. + * + * Entities are serialized along with their versions. Destroyed entities are + * taken in consideration as well by this function. + * + * @tparam Archive Type of output archive. + * @param archive A valid reference to an output archive. + * @return An object of this type to continue creating the snapshot. + */ + template + const basic_snapshot & entities(Archive &archive) const { + const auto sz = reg->size(); + auto first = reg->data(); + const auto last = first + sz; + + archive(typename traits_type::entity_type(sz)); + + while(first != last) { + archive(*(first++)); + } + + return *this; + } + + /** + * @brief Puts aside the given components. + * + * Each instance is serialized together with the entity to which it belongs. + * Entities are serialized along with their versions. + * + * @tparam Component Types of components to serialize. + * @tparam Archive Type of output archive. + * @param archive A valid reference to an output archive. + * @return An object of this type to continue creating the snapshot. + */ + template + const basic_snapshot & component(Archive &archive) const { + (component(archive, reg->template data(), reg->template data() + reg->template size()), ...); + return *this; + } + + /** + * @brief Puts aside the given components for the entities in a range. + * + * Each instance is serialized together with the entity to which it belongs. + * Entities are serialized along with their versions. + * + * @tparam Component Types of components to serialize. + * @tparam Archive Type of output archive. + * @tparam It Type of input iterator. + * @param archive A valid reference to an output archive. + * @param first An iterator to the first element of the range to serialize. + * @param last An iterator past the last element of the range to serialize. + * @return An object of this type to continue creating the snapshot. + */ + template + const basic_snapshot & component(Archive &archive, It first, It last) const { + component(archive, first, last, std::index_sequence_for{}); + return *this; + } + +private: + const basic_registry *reg; +}; + + +/** + * @brief Utility class to restore a snapshot as a whole. + * + * A snapshot loader requires that the destination registry be empty and loads + * all the data at once while keeping intact the identifiers that the entities + * originally had.
+ * An example of use is the implementation of a save/restore utility. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class basic_snapshot_loader { + /*! @brief A registry is allowed to create snapshot loaders. */ + friend class basic_registry; + + using traits_type = entt_traits; + + template + void assign(Archive &archive) const { + typename traits_type::entity_type length{}; + archive(length); + + entity_type entt{}; + + if constexpr(std::is_empty_v) { + while(length--) { + archive(entt); + const auto entity = reg->valid(entt) ? entt : reg->create(entt); + ENTT_ASSERT(entity == entt); + reg->template emplace(entity); + } + } else { + Type instance{}; + + while(length--) { + archive(entt, instance); + const auto entity = reg->valid(entt) ? entt : reg->create(entt); + ENTT_ASSERT(entity == entt); + reg->template emplace(entity, std::move(instance)); + } + } + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + + /** + * @brief Constructs an instance that is bound to a given registry. + * @param source A valid reference to a registry. + */ + basic_snapshot_loader(basic_registry &source) ENTT_NOEXCEPT + : reg{&source} + { + // restoring a snapshot as a whole requires a clean registry + ENTT_ASSERT(reg->empty()); + } + + /*! @brief Default move constructor. */ + basic_snapshot_loader(basic_snapshot_loader &&) = default; + + /*! @brief Default move assignment operator. @return This loader. */ + basic_snapshot_loader & operator=(basic_snapshot_loader &&) = default; + + /** + * @brief Restores entities that were in use during serialization. + * + * This function restores the entities that were in use during serialization + * and gives them the versions they originally had. + * + * @tparam Archive Type of input archive. + * @param archive A valid reference to an input archive. + * @return A valid loader to continue restoring data. + */ + template + const basic_snapshot_loader & entities(Archive &archive) const { + typename traits_type::entity_type length{}; + + archive(length); + std::vector all(length); + + for(decltype(length) pos{}; pos < length; ++pos) { + archive(all[pos]); + } + + reg->assign(all.cbegin(), all.cend()); + + return *this; + } + + /** + * @brief Restores components and assigns them to the right entities. + * + * The template parameter list must be exactly the same used during + * serialization. In the event that the entity to which the component is + * assigned doesn't exist yet, the loader will take care to create it with + * the version it originally had. + * + * @tparam Component Types of components to restore. + * @tparam Archive Type of input archive. + * @param archive A valid reference to an input archive. + * @return A valid loader to continue restoring data. + */ + template + const basic_snapshot_loader & component(Archive &archive) const { + (assign(archive), ...); + return *this; + } + + /** + * @brief Destroys those entities that have no components. + * + * In case all the entities were serialized but only part of the components + * was saved, it could happen that some of the entities have no components + * once restored.
+ * This functions helps to identify and destroy those entities. + * + * @return A valid loader to continue restoring data. + */ + const basic_snapshot_loader & orphans() const { + reg->orphans([this](const auto entt) { + reg->destroy(entt); + }); + + return *this; + } + +private: + basic_registry *reg; +}; + + +/** + * @brief Utility class for _continuous loading_. + * + * A _continuous loader_ is designed to load data from a source registry to a + * (possibly) non-empty destination. The loader can accommodate in a registry + * more than one snapshot in a sort of _continuous loading_ that updates the + * destination one step at a time.
+ * Identifiers that entities originally had are not transferred to the target. + * Instead, the loader maps remote identifiers to local ones while restoring a + * snapshot.
+ * An example of use is the implementation of a client-server applications with + * the requirement of transferring somehow parts of the representation side to + * side. + * + * @tparam Entity A valid entity type (see entt_traits for more details). + */ +template +class basic_continuous_loader { + using traits_type = entt_traits; + + void destroy(Entity entt) { + const auto it = remloc.find(entt); + + if(it == remloc.cend()) { + const auto local = reg->create(); + remloc.emplace(entt, std::make_pair(local, true)); + reg->destroy(local); + } + } + + void restore(Entity entt) { + const auto it = remloc.find(entt); + + if(it == remloc.cend()) { + const auto local = reg->create(); + remloc.emplace(entt, std::make_pair(local, true)); + } else { + remloc[entt].first = reg->valid(remloc[entt].first) ? remloc[entt].first : reg->create(); + // set the dirty flag + remloc[entt].second = true; + } + } + + template + auto update(int, Container &container) + -> decltype(typename Container::mapped_type{}, void()) { + // map like container + Container other; + + for(auto &&pair: container) { + using first_type = std::remove_const_t::first_type>; + using second_type = typename std::decay_t::second_type; + + if constexpr(std::is_same_v && std::is_same_v) { + other.emplace(map(pair.first), map(pair.second)); + } else if constexpr(std::is_same_v) { + other.emplace(map(pair.first), std::move(pair.second)); + } else { + static_assert(std::is_same_v, "Neither the key nor the value are of entity type"); + other.emplace(std::move(pair.first), map(pair.second)); + } + } + + std::swap(container, other); + } + + template + auto update(char, Container &container) + -> decltype(typename Container::value_type{}, void()) { + // vector like container + static_assert(std::is_same_v, "Invalid value type"); + + for(auto &&entt: container) { + entt = map(entt); + } + } + + template + void update([[maybe_unused]] Other &instance, [[maybe_unused]] Member Type:: *member) { + if constexpr(!std::is_same_v) { + return; + } else if constexpr(std::is_same_v) { + instance.*member = map(instance.*member); + } else { + // maybe a container? let's try... + update(0, instance.*member); + } + } + + template + void remove_if_exists() { + for(auto &&ref: remloc) { + const auto local = ref.second.first; + + if(reg->valid(local)) { + reg->template remove_if_exists(local); + } + } + } + + template + void assign(Archive &archive, [[maybe_unused]] Member Type:: *... member) { + typename traits_type::entity_type length{}; + archive(length); + + entity_type entt{}; + + if constexpr(std::is_empty_v) { + while(length--) { + archive(entt); + restore(entt); + reg->template emplace_or_replace(map(entt)); + } + } else { + Other instance{}; + + while(length--) { + archive(entt, instance); + (update(instance, member), ...); + restore(entt); + reg->template emplace_or_replace(map(entt), std::move(instance)); + } + } + } + +public: + /*! @brief Underlying entity identifier. */ + using entity_type = Entity; + + /** + * @brief Constructs an instance that is bound to a given registry. + * @param source A valid reference to a registry. + */ + basic_continuous_loader(basic_registry &source) ENTT_NOEXCEPT + : reg{&source} + {} + + /*! @brief Default move constructor. */ + basic_continuous_loader(basic_continuous_loader &&) = default; + + /*! @brief Default move assignment operator. @return This loader. */ + basic_continuous_loader & operator=(basic_continuous_loader &&) = default; + + /** + * @brief Restores entities that were in use during serialization. + * + * This function restores the entities that were in use during serialization + * and creates local counterparts for them if required. + * + * @tparam Archive Type of input archive. + * @param archive A valid reference to an input archive. + * @return A non-const reference to this loader. + */ + template + basic_continuous_loader & entities(Archive &archive) { + typename traits_type::entity_type length{}; + entity_type entt{}; + + archive(length); + + for(decltype(length) pos{}; pos < length; ++pos) { + archive(entt); + + if(const auto entity = (to_integral(entt) & traits_type::entity_mask); entity == pos) { + restore(entt); + } else { + destroy(entt); + } + } + + return *this; + } + + /** + * @brief Restores components and assigns them to the right entities. + * + * The template parameter list must be exactly the same used during + * serialization. In the event that the entity to which the component is + * assigned doesn't exist yet, the loader will take care to create a local + * counterpart for it.
+ * Members can be either data members of type entity_type or containers of + * entities. In both cases, the loader will visit them and update the + * entities by replacing each one with its local counterpart. + * + * @tparam Component Type of component to restore. + * @tparam Archive Type of input archive. + * @tparam Type Types of components to update with local counterparts. + * @tparam Member Types of members to update with their local counterparts. + * @param archive A valid reference to an input archive. + * @param member Members to update with their local counterparts. + * @return A non-const reference to this loader. + */ + template + basic_continuous_loader & component(Archive &archive, Member Type:: *... member) { + (remove_if_exists(), ...); + (assign(archive, member...), ...); + return *this; + } + + /** + * @brief Helps to purge entities that no longer have a conterpart. + * + * Users should invoke this member function after restoring each snapshot, + * unless they know exactly what they are doing. + * + * @return A non-const reference to this loader. + */ + basic_continuous_loader & shrink() { + auto it = remloc.begin(); + + while(it != remloc.cend()) { + const auto local = it->second.first; + bool &dirty = it->second.second; + + if(dirty) { + dirty = false; + ++it; + } else { + if(reg->valid(local)) { + reg->destroy(local); + } + + it = remloc.erase(it); + } + } + + return *this; + } + + /** + * @brief Destroys those entities that have no components. + * + * In case all the entities were serialized but only part of the components + * was saved, it could happen that some of the entities have no components + * once restored.
+ * This functions helps to identify and destroy those entities. + * + * @return A non-const reference to this loader. + */ + basic_continuous_loader & orphans() { + reg->orphans([this](const auto entt) { + reg->destroy(entt); + }); + + return *this; + } + + /** + * @brief Tests if a loader knows about a given entity. + * @param entt An entity identifier. + * @return True if `entity` is managed by the loader, false otherwise. + */ + [[nodiscard]] bool contains(entity_type entt) const ENTT_NOEXCEPT { + return (remloc.find(entt) != remloc.cend()); + } + + /** + * @brief Returns the identifier to which an entity refers. + * @param entt An entity identifier. + * @return The local identifier if any, the null entity otherwise. + */ + [[nodiscard]] entity_type map(entity_type entt) const ENTT_NOEXCEPT { + const auto it = remloc.find(entt); + entity_type other = null; + + if(it != remloc.cend()) { + other = it->second.first; + } + + return other; + } + +private: + std::unordered_map> remloc; + basic_registry *reg; +}; + + +} + + +#endif + +// #include "entity/sparse_set.hpp" + +// #include "entity/storage.hpp" + +// #include "entity/utility.hpp" + +// #include "entity/view.hpp" + +// #include "locator/locator.hpp" +#ifndef ENTT_LOCATOR_LOCATOR_HPP +#define ENTT_LOCATOR_LOCATOR_HPP + + +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/** + * @brief Service locator, nothing more. + * + * A service locator can be used to do what it promises: locate services.
+ * Usually service locators are tightly bound to the services they expose and + * thus it's hard to define a general purpose class to do that. This template + * based implementation tries to fill the gap and to get rid of the burden of + * defining a different specific locator for each application. + * + * @tparam Service Type of service managed by the locator. + */ +template +struct service_locator { + /*! @brief Type of service offered. */ + using service_type = Service; + + /*! @brief Default constructor, deleted on purpose. */ + service_locator() = delete; + /*! @brief Default destructor, deleted on purpose. */ + ~service_locator() = delete; + + /** + * @brief Tests if a valid service implementation is set. + * @return True if the service is set, false otherwise. + */ + [[nodiscard]] static bool empty() ENTT_NOEXCEPT { + return !static_cast(service); + } + + /** + * @brief Returns a weak pointer to a service implementation, if any. + * + * Clients of a service shouldn't retain references to it. The recommended + * way is to retrieve the service implementation currently set each and + * every time the need of using it arises. Otherwise users can incur in + * unexpected behaviors. + * + * @return A reference to the service implementation currently set, if any. + */ + [[nodiscard]] static std::weak_ptr get() ENTT_NOEXCEPT { + return service; + } + + /** + * @brief Returns a weak reference to a service implementation, if any. + * + * Clients of a service shouldn't retain references to it. The recommended + * way is to retrieve the service implementation currently set each and + * every time the need of using it arises. Otherwise users can incur in + * unexpected behaviors. + * + * @warning + * In case no service implementation has been set, a call to this function + * results in undefined behavior. + * + * @return A reference to the service implementation currently set, if any. + */ + [[nodiscard]] static Service & ref() ENTT_NOEXCEPT { + return *service; + } + + /** + * @brief Sets or replaces a service. + * @tparam Impl Type of the new service to use. + * @tparam Args Types of arguments to use to construct the service. + * @param args Parameters to use to construct the service. + */ + template + static void set(Args &&... args) { + service = std::make_shared(std::forward(args)...); + } + + /** + * @brief Sets or replaces a service. + * @param ptr Service to use to replace the current one. + */ + static void set(std::shared_ptr ptr) { + ENTT_ASSERT(static_cast(ptr)); + service = std::move(ptr); + } + + /** + * @brief Resets a service. + * + * The service is no longer valid after a reset. + */ + static void reset() { + service.reset(); + } + +private: + inline static std::shared_ptr service = nullptr; +}; + + +} + + +#endif + +// #include "meta/container.hpp" +#ifndef ENTT_META_CONTAINER_HPP +#define ENTT_META_CONTAINER_HPP + + +#include +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + +// #include "type_traits.hpp" +#ifndef ENTT_META_TYPE_TRAITS_HPP +#define ENTT_META_TYPE_TRAITS_HPP + + +#include + + +namespace entt { + + +/** + * @brief Traits class template to be specialized to enable support for meta + * sequence containers. + */ +template +struct meta_sequence_container_traits; + + +/** + * @brief Traits class template to be specialized to enable support for meta + * associative containers. + */ +template +struct meta_associative_container_traits; + + +/** + * @brief Provides the member constant `value` to true if support for meta + * sequence containers is enabled for the given type, false otherwise. + * @tparam Type Potentially sequence container type. + */ +template +struct has_meta_sequence_container_traits: std::false_type {}; + + +/*! @copydoc has_meta_sequence_container_traits */ +template +struct has_meta_sequence_container_traits::value_type>> + : std::true_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially sequence container type. + */ +template +inline constexpr auto has_meta_sequence_container_traits_v = has_meta_sequence_container_traits::value; + + +/** + * @brief Provides the member constant `value` to true if support for meta + * associative containers is enabled for the given type, false otherwise. + * @tparam Type Potentially associative container type. + */ +template +struct has_meta_associative_container_traits: std::false_type {}; + + +/*! @copydoc has_meta_associative_container_traits */ +template +struct has_meta_associative_container_traits::key_type>> + : std::true_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially associative container type. + */ +template +inline constexpr auto has_meta_associative_container_traits_v = has_meta_associative_container_traits::value; + + +/** + * @brief Provides the member constant `value` to true if a meta associative + * container claims to wrap a key-only type, false otherwise. + * @tparam Type Potentially key-only meta associative container type. + */ +template +struct is_key_only_meta_associative_container: std::true_type {}; + + +/*! @copydoc is_key_only_meta_associative_container */ +template +struct is_key_only_meta_associative_container::mapped_type>> + : std::false_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially key-only meta associative container type. + */ +template +inline constexpr auto is_key_only_meta_associative_container_v = is_key_only_meta_associative_container::value; + + +/** + * @brief Provides the member constant `value` to true if a given type is a + * pointer-like type from the point of view of the meta system, false otherwise. + * @tparam Type Potentially pointer-like type. + */ +template +struct is_meta_pointer_like: std::false_type {}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially pointer-like type. + */ +template +inline constexpr auto is_meta_pointer_like_v = is_meta_pointer_like::value; + + +} + + +#endif + + + +namespace entt { + + +namespace internal { + + +template class... Trait> +struct container_traits: public Trait... {}; + + +/** + * @brief Basic STL-compatible container traits + * @tparam Container The type of the container. + */ +template +struct basic_container { + /*! @brief Iterator type of the container. */ + using iterator = typename Container::iterator; + /*! @brief Unsigned integer type. */ + using size_type = typename Container::size_type; + /*! @brief Value type of the container. */ + using value_type = typename Container::value_type; + + /** + * @brief Returns the size of the given container. + * @param cont The container for which to return the size. + * @return The size of the given container. + */ + [[nodiscard]] static size_type size(const Container &cont) ENTT_NOEXCEPT { + return cont.size(); + } + + /** + * @brief Returns an iterator to the first element of the given container. + * @param cont The container for which to return the iterator. + * @return An iterator to the first element of the given container. + */ + [[nodiscard]] static iterator begin(Container &cont) { + return cont.begin(); + } + + /** + * @brief Returns an iterator past the last element of the given container. + * @param cont The container for which to return the iterator. + * @return An iterator past the last element of the given container. + */ + [[nodiscard]] static iterator end(Container &cont) { + return cont.end(); + } +}; + + +/** + * @brief Basic STL-compatible associative container traits + * @tparam Container The type of the container. + */ +template +struct basic_associative_container { + /*! @brief Key type of the sequence container. */ + using key_type = typename Container::key_type; + + /** + * @brief Returns an iterator to the element with key equivalent to the given + * one, if any. + * @param cont The container in which to search for the element. + * @param key The key of the element to search. + * @return An iterator to the element with the given key, if any. + */ + [[nodiscard]] static typename Container::iterator find(Container &cont, const key_type &key) { + return cont.find(key); + } +}; + + +/** + * @brief Basic STL-compatible dynamic container traits + * @tparam Container The type of the container. + */ +template +struct basic_dynamic_container { + /** + * @brief Clears the content of the given container. + * @param cont The container for which to clear the content. + * @return True in case of success, false otherwise. + */ + [[nodiscard]] static bool clear(Container &cont) { + return cont.clear(), true; + } +}; + + +/** + * @brief Basic STL-compatible dynamic associative container traits + * @tparam Container The type of the container. + */ +template +struct basic_dynamic_associative_container { + /** + * @brief Removes the specified element from the given container. + * @param cont The container from which to remove the element. + * @param key The element to remove. + * @return A bool denoting whether the removal took place. + */ + [[nodiscard]] static bool erase(Container &cont, const typename Container::key_type &key) { + const auto sz = cont.size(); + return cont.erase(key) != sz; + } +}; + + +/** + * @brief Basic STL-compatible sequence container traits + * @tparam Container The type of the container. + */ +template +struct basic_sequence_container { + /** + * @brief Returns a reference to the element at the specified location of the + * given container (no bounds checking is performed). + * @param cont The container from which to get the element. + * @param pos The position of the element to return. + * @return A reference to the requested element. + */ + [[nodiscard]] static typename Container::value_type & get(Container &cont, typename Container::size_type pos) { + return cont[pos]; + } +}; + + +/** + * @brief STL-compatible dynamic associative key-only container traits + * @tparam Container The type of the container. + */ +template +struct dynamic_associative_key_only_container { + /** + * @brief Inserts an element into the given container. + * @param cont The container in which to insert the element. + * @param key The element to insert. + * @return A bool denoting whether the insertion took place. + */ + [[nodiscard]] static bool insert(Container &cont, const typename Container::key_type &key) { + return cont.insert(key).second; + } +}; + + +/** + * @brief STL-compatible dynamic key-value associative container traits + * @tparam Container The type of the container. + */ +template +struct dynamic_associative_key_value_container { + /** + * @brief Inserts an element (a key/value pair) into the given container. + * @param cont The container in which to insert the element. + * @param key The key of the element to insert. + * @param value The value of the element to insert. + * @return A bool denoting whether the insertion took place. + */ + [[nodiscard]] static bool insert(Container &cont, const typename Container::key_type &key, const typename Container::mapped_type &value) { + return cont.insert(std::make_pair(key, value)).second; + } +}; + + +/** + * @brief STL-compatible dynamic sequence container traits + * @tparam Container The type of the container. + */ +template +struct dynamic_sequence_container { + /** + * @brief Resizes the given container to contain the given number of elements. + * @param cont The container to resize. + * @param sz The new size of the container. + * @return True in case of success, false otherwise. + */ + [[nodiscard]] static bool resize(Container &cont, typename Container::size_type sz) { + return (cont.resize(sz), true); + } + + /** + * @brief Inserts an element at the specified location of the given container. + * @param cont The container into which to insert the element. + * @param it Iterator before which the element will be inserted. + * @param value Element value to insert. + * @return A pair consisting of an iterator to the inserted element (in case + * of success) and a bool denoting whether the insertion took place. + */ + [[nodiscard]] static std::pair insert(Container &cont, typename Container::iterator it, const typename Container::value_type &value) { + return { cont.insert(it, value), true }; + } + + /** + * @brief Removes the element at the specified location from the given container. + * @param cont The container from which to remove the element. + * @param it Iterator to the element to remove. + * @return A pair consisting of an iterator following the last removed + * element (in case of success) and a bool denoting whether the insertion + * took place. + */ + [[nodiscard]] static std::pair erase(Container &cont, typename Container::iterator it) { + return { cont.erase(it), true }; + } +}; + + +/** + * @brief STL-compatible fixed sequence container traits + * @tparam Container The type of the container. + */ +template +struct fixed_sequence_container { + /** + * @brief Does nothing. + * @return False to indicate failure in all cases. + */ + [[nodiscard]] static bool resize(const Container &, typename Container::size_type) { + return false; + } + + /** + * @brief Does nothing. + * @return False to indicate failure in all cases. + */ + [[nodiscard]] static bool clear(const Container &) { + return false; + } + + /** + * @brief Does nothing. + * @return A pair consisting of an invalid iterator and a false value to + * indicate failure in all cases. + */ + [[nodiscard]] static std::pair insert(const Container &, typename Container::iterator, const typename Container::value_type &) { + return { {}, false }; + } + + /** + * @brief Does nothing. + * @return A pair consisting of an invalid iterator and a false value to + * indicate failure in all cases. + */ + [[nodiscard]] static std::pair erase(const Container &, typename Container::iterator) { + return { {}, false }; + } +}; + + +} + + +/** + * @brief Meta sequence container traits for `std::vector`s of any type. + * @tparam Type The type of elements. + * @tparam Args Other arguments. + */ +template +struct meta_sequence_container_traits> + : internal::container_traits< + std::vector, + internal::basic_container, + internal::basic_dynamic_container, + internal::basic_sequence_container, + internal::dynamic_sequence_container + > +{}; + + +/** + * @brief Meta sequence container traits for `std::array`s of any type. + * @tparam Type The type of elements. + * @tparam N The number of elements. + */ +template +struct meta_sequence_container_traits> + : internal::container_traits< + std::array, + internal::basic_container, + internal::basic_sequence_container, + internal::fixed_sequence_container + > +{}; + + +/** + * @brief Meta associative container traits for `std::map`s of any type. + * @tparam Key The key type of elements. + * @tparam Value The value type of elements. + * @tparam Args Other arguments. + */ +template +struct meta_associative_container_traits> + : internal::container_traits< + std::map, + internal::basic_container, + internal::basic_associative_container, + internal::basic_dynamic_container, + internal::basic_dynamic_associative_container, + internal::dynamic_associative_key_value_container + > +{ + /*! @brief Mapped type of the sequence container. */ + using mapped_type = typename std::map::mapped_type; +}; + + +/** + * @brief Meta associative container traits for `std::unordered_map`s of any + * type. + * @tparam Key The key type of elements. + * @tparam Value The value type of elements. + * @tparam Args Other arguments. + */ +template +struct meta_associative_container_traits> + : internal::container_traits< + std::unordered_map, + internal::basic_container, + internal::basic_associative_container, + internal::basic_dynamic_container, + internal::basic_dynamic_associative_container, + internal::dynamic_associative_key_value_container + > +{ + /*! @brief Mapped type of the sequence container. */ + using mapped_type = typename std::unordered_map::mapped_type; +}; + + +/** + * @brief Meta associative container traits for `std::set`s of any type. + * @tparam Key The type of elements. + * @tparam Args Other arguments. + */ +template +struct meta_associative_container_traits> + : internal::container_traits< + std::set, + internal::basic_container, + internal::basic_associative_container, + internal::basic_dynamic_container, + internal::basic_dynamic_associative_container, + internal::dynamic_associative_key_only_container + > +{}; + + +/** + * @brief Meta associative container traits for `std::unordered_set`s of any + * type. + * @tparam Key The type of elements. + * @tparam Args Other arguments. + */ +template +struct meta_associative_container_traits> + : internal::container_traits< + std::unordered_set, + internal::basic_container, + internal::basic_associative_container, + internal::basic_dynamic_container, + internal::basic_dynamic_associative_container, + internal::dynamic_associative_key_only_container + > +{}; + + +} + + +#endif + +// #include "meta/ctx.hpp" +#ifndef ENTT_META_CTX_HPP +#define ENTT_META_CTX_HPP + + +// #include "../core/attribute.h" +#ifndef ENTT_CORE_ATTRIBUTE_H +#define ENTT_CORE_ATTRIBUTE_H + + +#ifndef ENTT_EXPORT +# if defined _WIN32 || defined __CYGWIN__ || defined _MSC_VER +# define ENTT_EXPORT __declspec(dllexport) +# define ENTT_IMPORT __declspec(dllimport) +# define ENTT_HIDDEN +# elif defined __GNUC__ && __GNUC__ >= 4 +# define ENTT_EXPORT __attribute__((visibility("default"))) +# define ENTT_IMPORT __attribute__((visibility("default"))) +# define ENTT_HIDDEN __attribute__((visibility("hidden"))) +# else /* Unsupported compiler */ +# define ENTT_EXPORT +# define ENTT_IMPORT +# define ENTT_HIDDEN +# endif +#endif + + +#ifndef ENTT_API +# if defined ENTT_API_EXPORT +# define ENTT_API ENTT_EXPORT +# elif defined ENTT_API_IMPORT +# define ENTT_API ENTT_IMPORT +# else /* No API */ +# define ENTT_API +# endif +#endif + + +#endif + +// #include "../config/config.h" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +struct meta_type_node; + + +struct ENTT_API meta_context { + // we could use the lines below but VS2017 returns with an ICE if combined with ENTT_API despite the code being valid C++ + // inline static meta_type_node *local = nullptr; + // inline static meta_type_node **global = &local; + + [[nodiscard]] static meta_type_node * & local() ENTT_NOEXCEPT { + static meta_type_node *chain = nullptr; + return chain; + } + + [[nodiscard]] static meta_type_node ** & global() ENTT_NOEXCEPT { + static meta_type_node **chain = &local(); + return chain; + } +}; + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/*! @brief Opaque container for a meta context. */ +struct meta_ctx { + /** + * @brief Binds the meta system to a given context. + * @param other A valid context to which to bind. + */ + static void bind(meta_ctx other) ENTT_NOEXCEPT { + internal::meta_context::global() = other.ctx; + } + +private: + internal::meta_type_node **ctx{&internal::meta_context::local()}; +}; + + +} + + +#endif + +// #include "meta/factory.hpp" +#ifndef ENTT_META_FACTORY_HPP +#define ENTT_META_FACTORY_HPP + + +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/fwd.hpp" +#ifndef ENTT_CORE_FWD_HPP +#define ENTT_CORE_FWD_HPP + + +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/*! @brief Alias declaration for type identifiers. */ +using id_type = ENTT_ID_TYPE; + + +} + + +#endif + +// #include "../core/type_info.hpp" +#ifndef ENTT_CORE_TYPE_INFO_HPP +#define ENTT_CORE_TYPE_INFO_HPP + + +#include +// #include "../config/config.h" + +// #include "../core/attribute.h" +#ifndef ENTT_CORE_ATTRIBUTE_H +#define ENTT_CORE_ATTRIBUTE_H + + +#ifndef ENTT_EXPORT +# if defined _WIN32 || defined __CYGWIN__ || defined _MSC_VER +# define ENTT_EXPORT __declspec(dllexport) +# define ENTT_IMPORT __declspec(dllimport) +# define ENTT_HIDDEN +# elif defined __GNUC__ && __GNUC__ >= 4 +# define ENTT_EXPORT __attribute__((visibility("default"))) +# define ENTT_IMPORT __attribute__((visibility("default"))) +# define ENTT_HIDDEN __attribute__((visibility("hidden"))) +# else /* Unsupported compiler */ +# define ENTT_EXPORT +# define ENTT_IMPORT +# define ENTT_HIDDEN +# endif +#endif + + +#ifndef ENTT_API +# if defined ENTT_API_EXPORT +# define ENTT_API ENTT_EXPORT +# elif defined ENTT_API_IMPORT +# define ENTT_API ENTT_IMPORT +# else /* No API */ +# define ENTT_API +# endif +#endif + + +#endif + +// #include "hashed_string.hpp" +#ifndef ENTT_CORE_HASHED_STRING_HPP +#define ENTT_CORE_HASHED_STRING_HPP + + +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +struct fnv1a_traits; + + +template<> +struct fnv1a_traits { + using type = std::uint32_t; + static constexpr std::uint32_t offset = 2166136261; + static constexpr std::uint32_t prime = 16777619; +}; + + +template<> +struct fnv1a_traits { + using type = std::uint64_t; + static constexpr std::uint64_t offset = 14695981039346656037ull; + static constexpr std::uint64_t prime = 1099511628211ull; +}; + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Zero overhead unique identifier. + * + * A hashed string is a compile-time tool that allows users to use + * human-readable identifers in the codebase while using their numeric + * counterparts at runtime.
+ * Because of that, a hashed string can also be used in constant expressions if + * required. + * + * @tparam Char Character type. + */ +template +class basic_hashed_string { + using traits_type = internal::fnv1a_traits; + + struct const_wrapper { + // non-explicit constructor on purpose + constexpr const_wrapper(const Char *curr) ENTT_NOEXCEPT: str{curr} {} + const Char *str; + }; + + // Fowler–Noll–Vo hash function v. 1a - the good + [[nodiscard]] static constexpr id_type helper(const Char *curr) ENTT_NOEXCEPT { + auto value = traits_type::offset; + + while(*curr != 0) { + value = (value ^ static_cast(*(curr++))) * traits_type::prime; + } + + return value; + } + +public: + /*! @brief Character type. */ + using value_type = Char; + /*! @brief Unsigned integer type. */ + using hash_type = id_type; + + /** + * @brief Returns directly the numeric representation of a string. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * const auto value = basic_hashed_string::to_value("my.png"); + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + * @return The numeric representation of the string. + */ + template + [[nodiscard]] static constexpr hash_type value(const value_type (&str)[N]) ENTT_NOEXCEPT { + return helper(str); + } + + /** + * @brief Returns directly the numeric representation of a string. + * @param wrapper Helps achieving the purpose by relying on overloading. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const_wrapper wrapper) ENTT_NOEXCEPT { + return helper(wrapper.str); + } + + /** + * @brief Returns directly the numeric representation of a string view. + * @param str Human-readable identifer. + * @param size Length of the string to hash. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const value_type *str, std::size_t size) ENTT_NOEXCEPT { + id_type partial{traits_type::offset}; + while(size--) { partial = (partial^(str++)[0])*traits_type::prime; } + return partial; + } + + /*! @brief Constructs an empty hashed string. */ + constexpr basic_hashed_string() ENTT_NOEXCEPT + : str{nullptr}, hash{} + {} + + /** + * @brief Constructs a hashed string from an array of const characters. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * basic_hashed_string hs{"my.png"}; + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param curr Human-readable identifer. + */ + template + constexpr basic_hashed_string(const value_type (&curr)[N]) ENTT_NOEXCEPT + : str{curr}, hash{helper(curr)} + {} + + /** + * @brief Explicit constructor on purpose to avoid constructing a hashed + * string directly from a `const value_type *`. + * @param wrapper Helps achieving the purpose by relying on overloading. + */ + explicit constexpr basic_hashed_string(const_wrapper wrapper) ENTT_NOEXCEPT + : str{wrapper.str}, hash{helper(wrapper.str)} + {} + + /** + * @brief Returns the human-readable representation of a hashed string. + * @return The string used to initialize the instance. + */ + [[nodiscard]] constexpr const value_type * data() const ENTT_NOEXCEPT { + return str; + } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr hash_type value() const ENTT_NOEXCEPT { + return hash; + } + + /*! @copydoc data */ + [[nodiscard]] constexpr operator const value_type *() const ENTT_NOEXCEPT { return data(); } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr operator hash_type() const ENTT_NOEXCEPT { return value(); } + + /** + * @brief Compares two hashed strings. + * @param other Hashed string with which to compare. + * @return True if the two hashed strings are identical, false otherwise. + */ + [[nodiscard]] constexpr bool operator==(const basic_hashed_string &other) const ENTT_NOEXCEPT { + return hash == other.hash; + } + +private: + const value_type *str; + hash_type hash; +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the character type of the hashed string directly from a + * human-readable identifer provided to the constructor. + * + * @tparam Char Character type. + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + */ +template +basic_hashed_string(const Char (&str)[N]) ENTT_NOEXCEPT +-> basic_hashed_string; + + +/** + * @brief Compares two hashed strings. + * @tparam Char Character type. + * @param lhs A valid hashed string. + * @param rhs A valid hashed string. + * @return True if the two hashed strings are identical, false otherwise. + */ +template +[[nodiscard]] constexpr bool operator!=(const basic_hashed_string &lhs, const basic_hashed_string &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/*! @brief Aliases for common character types. */ +using hashed_string = basic_hashed_string; + + +/*! @brief Aliases for common character types. */ +using hashed_wstring = basic_hashed_string; + + +} + + +/** + * @brief User defined literal for hashed strings. + * @param str The literal without its suffix. + * @return A properly initialized hashed string. + */ +[[nodiscard]] constexpr entt::hashed_string operator"" ENTT_HS_SUFFIX(const char *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_string{str}; +} + + +/** + * @brief User defined literal for hashed wstrings. + * @param str The literal without its suffix. + * @return A properly initialized hashed wstring. + */ +[[nodiscard]] constexpr entt::hashed_wstring operator"" ENTT_HWS_SUFFIX(const wchar_t *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_wstring{str}; +} + + +#endif + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +struct ENTT_API type_index { + [[nodiscard]] static id_type next() ENTT_NOEXCEPT { + static ENTT_MAYBE_ATOMIC(id_type) value{}; + return value++; + } +}; + + +template +[[nodiscard]] constexpr auto type_name() ENTT_NOEXCEPT { +#if defined ENTT_PRETTY_FUNCTION + std::string_view pretty_function{ENTT_PRETTY_FUNCTION}; + auto first = pretty_function.find_first_not_of(' ', pretty_function.find_first_of(ENTT_PRETTY_FUNCTION_PREFIX)+1); + auto value = pretty_function.substr(first, pretty_function.find_last_of(ENTT_PRETTY_FUNCTION_SUFFIX) - first); + return value; +#else + return std::string_view{}; +#endif +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Type index. + * @tparam Type Type for which to generate a sequential identifier. + */ +template +struct ENTT_API type_index { + /** + * @brief Returns the sequential identifier of a given type. + * @return The sequential identifier of a given type. + */ + [[nodiscard]] static id_type value() ENTT_NOEXCEPT { + static const id_type value = internal::type_index::next(); + return value; + } +}; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * indexable, false otherwise. + * @tparam Type Potentially indexable type. + */ +template +struct has_type_index: std::false_type {}; + + +/*! @brief has_type_index */ +template +struct has_type_index::value())>>: std::true_type {}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially indexable type. + */ +template +inline constexpr bool has_type_index_v = has_type_index::value; + + +/** + * @brief Type info. + * @tparam Type Type for which to generate information. + */ +template +struct type_info { + /** + * @brief Returns the numeric representation of a given type. + * @return The numeric representation of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr id_type id() ENTT_NOEXCEPT { + constexpr auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + static const auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#else + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + return type_index::value(); + } +#endif + + /** + * @brief Returns the name of a given type. + * @return The name of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + constexpr auto value = internal::type_name(); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static std::string_view name() ENTT_NOEXCEPT { + static const auto value = internal::type_name(); + return value; + } +#else + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + return internal::type_name(); + } +#endif +}; + + +} + + +#endif + +// #include "../core/type_traits.hpp" +#ifndef ENTT_CORE_TYPE_TRAITS_HPP +#define ENTT_CORE_TYPE_TRAITS_HPP + + +#include +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Using declaration to be used to _repeat_ the same type a number of + * times equal to the size of a given parameter pack. + * @tparam Type A type to repeat. + */ +template +using unpack_as_t = Type; + + +/** + * @brief Helper variable template to be used to _repeat_ the same value a + * number of times equal to the size of a given parameter pack. + * @tparam Value A value to repeat. + */ +template +inline constexpr auto unpack_as_v = Value; + + +/** + * @brief Wraps a static constant. + * @tparam Value A static constant. + */ +template +using integral_constant = std::integral_constant; + + +/** + * @brief Alias template to ease the creation of named values. + * @tparam Value A constant value at least convertible to `id_type`. + */ +template +using tag = integral_constant; + + +/** + * @brief Utility class to disambiguate overloaded functions. + * @tparam N Number of choices available. + */ +template +struct choice_t + // Unfortunately, doxygen cannot parse such a construct. + /*! @cond TURN_OFF_DOXYGEN */ + : choice_t + /*! @endcond */ +{}; + + +/*! @copybrief choice_t */ +template<> +struct choice_t<0> {}; + + +/** + * @brief Variable template for the choice trick. + * @tparam N Number of choices available. + */ +template +inline constexpr choice_t choice{}; + + +/*! @brief A class to use to push around lists of types, nothing more. */ +template +struct type_list {}; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_size; + + +/** + * @brief Compile-time number of elements in a type list. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_size> + : std::integral_constant +{}; + + +/** + * @brief Helper variable template. + * @tparam List Type list. + */ +template +inline constexpr auto type_list_size_v = type_list_size::value; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_cat; + + +/*! @brief Concatenates multiple type lists. */ +template<> +struct type_list_cat<> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list<>; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the first type list. + * @tparam Other Types provided by the second type list. + * @tparam List Other type lists, if any. + */ +template +struct type_list_cat, type_list, List...> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = typename type_list_cat, List...>::type; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_cat> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list; +}; + + +/** + * @brief Helper type. + * @tparam List Type lists to concatenate. + */ +template +using type_list_cat_t = typename type_list_cat::type; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_unique; + + +/** + * @brief Removes duplicates types from a type list. + * @tparam Type One of the types provided by the given type list. + * @tparam Other The other types provided by the given type list. + */ +template +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = std::conditional_t< + std::disjunction_v...>, + typename type_list_unique>::type, + type_list_cat_t, typename type_list_unique>::type> + >; +}; + + +/*! @brief Removes duplicates types from a type list. */ +template<> +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = type_list<>; +}; + + +/** + * @brief Helper type. + * @tparam Type A type list. + */ +template +using type_list_unique_t = typename type_list_unique::type; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * equality comparable, false otherwise. + * @tparam Type Potentially equality comparable type. + */ +template> +struct is_equality_comparable: std::false_type {}; + + +/*! @copydoc is_equality_comparable */ +template +struct is_equality_comparable() == std::declval())>> + : std::true_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially equality comparable type. + */ +template +inline constexpr auto is_equality_comparable_v = is_equality_comparable::value; + + +/** + * @brief Provides the member constant `value` to true if a given type is empty + * and the empty type optimization is enabled, false otherwise. + * @tparam Type Potential empty type. + */ +template +struct is_eto_eligible + : ENTT_IS_EMPTY(Type) +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potential empty type. + */ +template +inline constexpr auto is_eto_eligible_v = is_eto_eligible::value; + + +/** + * @brief Extracts the class of a non-static member object or function. + * @tparam Member A pointer to a non-static member object or function. + */ +template +class member_class { + static_assert(std::is_member_pointer_v, "Invalid pointer type to non-static member object or function"); + + template + static Class * clazz(Ret(Class:: *)(Args...)); + + template + static Class * clazz(Ret(Class:: *)(Args...) const); + + template + static Class * clazz(Type Class:: *); + +public: + /*! @brief The class of the given non-static member object or function. */ + using type = std::remove_pointer_t()))>; +}; + + +/** + * @brief Helper type. + * @tparam Member A pointer to a non-static member object or function. + */ +template +using member_class_t = typename member_class::type; + + +} + + +#endif + +// #include "internal.hpp" +#ifndef ENTT_META_INTERNAL_HPP +#define ENTT_META_INTERNAL_HPP + + +#include +#include +#include +#include +#include +// #include "../core/attribute.h" + +// #include "../config/config.h" + +// #include "../core/fwd.hpp" + +// #include "../core/type_info.hpp" + +// #include "../core/type_traits.hpp" + +// #include "type_traits.hpp" + + + +namespace entt { + + +class meta_any; +struct meta_handle; + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +class meta_storage { + using storage_type = std::aligned_storage_t; + using copy_fn_type = void(meta_storage &, const meta_storage &); + using steal_fn_type = void(meta_storage &, meta_storage &); + using destroy_fn_type = void(meta_storage &); + + template> + struct type_traits { + template + static void instance(meta_storage &buffer, Args &&... args) { + buffer.instance = new Type{std::forward(args)...}; + new (&buffer.storage) Type *{static_cast(buffer.instance)}; + } + + static void destroy(meta_storage &buffer) { + delete static_cast(buffer.instance); + } + + static void copy(meta_storage &to, const meta_storage &from) { + to.instance = new Type{*static_cast(from.instance)}; + new (&to.storage) Type *{static_cast(to.instance)}; + } + + static void steal(meta_storage &to, meta_storage &from) { + new (&to.storage) Type *{static_cast(from.instance)}; + to.instance = from.instance; + } + }; + + template + struct type_traits>> { + template + static void instance(meta_storage &buffer, Args &&... args) { + buffer.instance = new (&buffer.storage) Type{std::forward(args)...}; + } + + static void destroy(meta_storage &buffer) { + static_cast(buffer.instance)->~Type(); + } + + static void copy(meta_storage &to, const meta_storage &from) { + to.instance = new (&to.storage) Type{*static_cast(from.instance)}; + } + + static void steal(meta_storage &to, meta_storage &from) { + to.instance = new (&to.storage) Type{std::move(*static_cast(from.instance))}; + destroy(from); + } + }; + +public: + /*! @brief Default constructor. */ + meta_storage() ENTT_NOEXCEPT + : storage{}, + instance{}, + destroy_fn{}, + copy_fn{}, + steal_fn{} + {} + + template + explicit meta_storage(std::in_place_type_t, [[maybe_unused]] Args &&... args) + : meta_storage{} + { + if constexpr(!std::is_void_v) { + type_traits::instance(*this, std::forward(args)...); + destroy_fn = &type_traits::destroy; + copy_fn = &type_traits::copy; + steal_fn = &type_traits::steal; + } + } + + template + meta_storage(std::reference_wrapper value) + : meta_storage{} + { + instance = &value.get(); + } + + template>, meta_storage>>> + meta_storage(Type &&value) + : meta_storage{std::in_place_type>>, std::forward(value)} + {} + + meta_storage(const meta_storage &other) + : meta_storage{} + { + (other.copy_fn ? other.copy_fn : [](auto &to, const auto &from) { to.instance = from.instance; })(*this, other); + destroy_fn = other.destroy_fn; + copy_fn = other.copy_fn; + steal_fn = other.steal_fn; + } + + meta_storage(meta_storage &&other) + : meta_storage{} + { + swap(*this, other); + } + + ~meta_storage() { + if(destroy_fn) { + destroy_fn(*this); + } + } + + meta_storage & operator=(meta_storage other) { + swap(other, *this); + return *this; + } + + [[nodiscard]] const void * data() const ENTT_NOEXCEPT { + return instance; + } + + [[nodiscard]] void * data() ENTT_NOEXCEPT { + return const_cast(std::as_const(*this).data()); + } + + template + void emplace(Args &&... args) { + *this = meta_storage{std::in_place_type, std::forward(args)...}; + } + + [[nodiscard]] meta_storage ref() const ENTT_NOEXCEPT { + meta_storage other{}; + other.instance = instance; + return other; + } + + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(instance == nullptr); + } + + friend void swap(meta_storage &lhs, meta_storage &rhs) { + using std::swap; + + if(lhs.steal_fn && rhs.steal_fn) { + meta_storage buffer{}; + lhs.steal_fn(buffer, lhs); + rhs.steal_fn(lhs, rhs); + lhs.steal_fn(rhs, buffer); + } else if(lhs.steal_fn) { + lhs.steal_fn(rhs, lhs); + } else if(rhs.steal_fn) { + rhs.steal_fn(lhs, rhs); + } else { + swap(lhs.instance, rhs.instance); + } + + swap(lhs.destroy_fn, rhs.destroy_fn); + swap(lhs.copy_fn, rhs.copy_fn); + swap(lhs.steal_fn, rhs.steal_fn); + } + +private: + storage_type storage; + void *instance; + destroy_fn_type *destroy_fn; + copy_fn_type *copy_fn; + steal_fn_type *steal_fn; +}; + + +struct meta_type_node; + + +struct meta_prop_node { + meta_prop_node * next; + meta_any(* const key)(); + meta_any(* const value)(); +}; + + +struct meta_base_node { + meta_type_node * const parent; + meta_base_node * next; + meta_type_node *(* const type)() ENTT_NOEXCEPT; + const void *(* const cast)(const void *) ENTT_NOEXCEPT; +}; + + +struct meta_conv_node { + meta_type_node * const parent; + meta_conv_node * next; + meta_type_node *(* const type)() ENTT_NOEXCEPT; + meta_any(* const conv)(const void *); +}; + + +struct meta_ctor_node { + using size_type = std::size_t; + meta_type_node * const parent; + meta_ctor_node * next; + meta_prop_node * prop; + const size_type size; + meta_type_node *(* const arg)(size_type) ENTT_NOEXCEPT; + meta_any(* const invoke)(meta_any * const); +}; + + +struct meta_data_node { + id_type id; + meta_type_node * const parent; + meta_data_node * next; + meta_prop_node * prop; + const bool is_static; + meta_type_node *(* const type)() ENTT_NOEXCEPT; + bool(* const set)(meta_handle, meta_any); + meta_any(* const get)(meta_handle); +}; + + +struct meta_func_node { + using size_type = std::size_t; + id_type id; + meta_type_node * const parent; + meta_func_node * next; + meta_prop_node * prop; + const size_type size; + const bool is_const; + const bool is_static; + meta_type_node *(* const ret)() ENTT_NOEXCEPT; + meta_type_node *(* const arg)(size_type) ENTT_NOEXCEPT; + meta_any(* const invoke)(meta_handle, meta_any *); +}; + + +struct meta_type_node { + using size_type = std::size_t; + const id_type type_id; + id_type id; + meta_type_node * next; + meta_prop_node * prop; + const bool is_void; + const bool is_integral; + const bool is_floating_point; + const bool is_array; + const bool is_enum; + const bool is_union; + const bool is_class; + const bool is_pointer; + const bool is_function_pointer; + const bool is_member_object_pointer; + const bool is_member_function_pointer; + const bool is_pointer_like; + const bool is_sequence_container; + const bool is_associative_container; + const size_type rank; + size_type(* const extent)(size_type); + bool(* const compare)(const void *, const void *); + meta_type_node *(* const remove_pointer)() ENTT_NOEXCEPT; + meta_type_node *(* const remove_extent)() ENTT_NOEXCEPT; + meta_base_node *base{nullptr}; + meta_conv_node *conv{nullptr}; + meta_ctor_node *ctor{nullptr}; + meta_data_node *data{nullptr}; + meta_func_node *func{nullptr}; + void(* dtor)(void *){nullptr}; +}; + + +template +class meta_range { + struct range_iterator { + using difference_type = std::ptrdiff_t; + using value_type = Node; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; + + range_iterator() ENTT_NOEXCEPT = default; + + range_iterator(Node *head) ENTT_NOEXCEPT + : node{head} + {} + + range_iterator & operator++() ENTT_NOEXCEPT { + return node = node->next, *this; + } + + range_iterator operator++(int) ENTT_NOEXCEPT { + range_iterator orig = *this; + return ++(*this), orig; + } + + [[nodiscard]] bool operator==(const range_iterator &other) const ENTT_NOEXCEPT { + return other.node == node; + } + + [[nodiscard]] bool operator!=(const range_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + [[nodiscard]] pointer operator->() const ENTT_NOEXCEPT { + return node; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return *operator->(); + } + + private: + Node *node{nullptr}; + }; + +public: + using iterator = range_iterator; + + meta_range() ENTT_NOEXCEPT = default; + + meta_range(Node *head) + : node{head} + {} + + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return iterator{node}; + } + + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return iterator{}; + } + +private: + Node *node{nullptr}; +}; + + +template +auto find_if(const Op &op, const meta_type_node *node) +-> std::decay_t*Member)> { + std::decay_t*Member)> ret = nullptr; + + for(auto &&curr: meta_range{node->*Member}) { + if(op(&curr)) { + ret = &curr; + break; + } + } + + if(!ret) { + for(auto &&curr: meta_range{node->base}) { + if(ret = find_if(op, curr.type()); ret) { + break; + } + } + } + + return ret; +} + + +template +class ENTT_API meta_node { + static_assert(std::is_same_v>>, "Invalid type"); + + [[nodiscard]] static bool compare(const void *lhs, const void *rhs) { + if constexpr(!std::is_function_v && is_equality_comparable_v) { + return *static_cast(lhs) == *static_cast(rhs); + } else { + return lhs == rhs; + } + } + + template + [[nodiscard]] static auto extent(meta_type_node::size_type dim, std::index_sequence) { + meta_type_node::size_type ext{}; + ((ext = (dim == Index ? std::extent_v : ext)), ...); + return ext; + } + +public: + [[nodiscard]] static meta_type_node * resolve() ENTT_NOEXCEPT { + static meta_type_node node{ + type_info::id(), + {}, + nullptr, + nullptr, + std::is_void_v, + std::is_integral_v, + std::is_floating_point_v, + std::is_array_v, + std::is_enum_v, + std::is_union_v, + std::is_class_v, + std::is_pointer_v, + std::is_pointer_v && std::is_function_v>, + std::is_member_object_pointer_v, + std::is_member_function_pointer_v, + is_meta_pointer_like_v, + has_meta_sequence_container_traits_v, + has_meta_associative_container_traits_v, + std::rank_v, + [](meta_type_node::size_type dim) { + return extent(dim, std::make_index_sequence>{}); + }, + &compare, // workaround for an issue with VS2017 + &meta_node>>::resolve, + &meta_node>>::resolve + }; + + return &node; + } +}; + + +template +struct meta_info: meta_node>...> {}; + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +} + + +#endif + +// #include "meta.hpp" +#ifndef ENTT_META_META_HPP +#define ENTT_META_META_HPP + + +#include +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/fwd.hpp" + +// #include "../core/utility.hpp" +#ifndef ENTT_CORE_UTILITY_HPP +#define ENTT_CORE_UTILITY_HPP + + +#include +// #include "../config/config.h" + + + +namespace entt { + + +/*! @brief Identity function object (waiting for C++20). */ +struct identity { + /** + * @brief Returns its argument unchanged. + * @tparam Type Type of the argument. + * @param value The actual argument. + * @return The submitted value as-is. + */ + template + [[nodiscard]] constexpr Type && operator()(Type &&value) const ENTT_NOEXCEPT { + return std::forward(value); + } +}; + + +/** + * @brief Constant utility to disambiguate overloaded members of a class. + * @tparam Type Type of the desired overload. + * @tparam Class Type of class to which the member belongs. + * @param member A valid pointer to a member. + * @return Pointer to the member. + */ +template +[[nodiscard]] constexpr auto overload(Type Class:: *member) ENTT_NOEXCEPT { return member; } + + +/** + * @brief Constant utility to disambiguate overloaded functions. + * @tparam Func Function type of the desired overload. + * @param func A valid pointer to a function. + * @return Pointer to the function. + */ +template +[[nodiscard]] constexpr auto overload(Func *func) ENTT_NOEXCEPT { return func; } + + +/** + * @brief Helper type for visitors. + * @tparam Func Types of function objects. + */ +template +struct overloaded: Func... { + using Func::operator()...; +}; + + +/** + * @brief Deduction guide. + * @tparam Func Types of function objects. + */ +template +overloaded(Func...) -> overloaded; + + +/** + * @brief Basic implementation of a y-combinator. + * @tparam Func Type of a potentially recursive function. + */ +template +struct y_combinator { + /** + * @brief Constructs a y-combinator from a given function. + * @param recursive A potentially recursive function. + */ + y_combinator(Func recursive): + func{std::move(recursive)} + {} + + /** + * @brief Invokes a y-combinator and therefore its underlying function. + * @tparam Args Types of arguments to use to invoke the underlying function. + * @param args Parameters to use to invoke the underlying function. + * @return Return value of the underlying function, if any. + */ + template + decltype(auto) operator()(Args &&... args) const { + return func(*this, std::forward(args)...); + } + + /*! @copydoc operator()() */ + template + decltype(auto) operator()(Args &&... args) { + return func(*this, std::forward(args)...); + } + +private: + Func func; +}; + + +} + + +#endif + +// #include "ctx.hpp" + +// #include "internal.hpp" + +// #include "range.hpp" +#ifndef ENTT_META_RANGE_HPP +#define ENTT_META_RANGE_HPP + + +// #include "internal.hpp" + + + +namespace entt { + + +/** + * @brief Iterable range to use to iterate all types of meta objects. + * @tparam Type Type of meta objects iterated. + */ +template +class meta_range { + struct range_iterator { + using difference_type = std::ptrdiff_t; + using value_type = Type; + using pointer = void; + using reference = value_type; + using iterator_category = std::input_iterator_tag; + using node_type = typename Type::node_type; + + range_iterator() ENTT_NOEXCEPT = default; + + range_iterator(node_type *head) ENTT_NOEXCEPT + : it{head} + {} + + range_iterator & operator++() ENTT_NOEXCEPT { + return ++it, *this; + } + + range_iterator operator++(int) ENTT_NOEXCEPT { + range_iterator orig = *this; + return ++(*this), orig; + } + + [[nodiscard]] reference operator*() const ENTT_NOEXCEPT { + return it.operator->(); + } + + [[nodiscard]] bool operator==(const range_iterator &other) const ENTT_NOEXCEPT { + return other.it == it; + } + + [[nodiscard]] bool operator!=(const range_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + private: + typename internal::meta_range::iterator it{}; + }; + +public: + /*! @brief Node type. */ + using node_type = typename Type::node_type; + /*! @brief Input iterator type. */ + using iterator = range_iterator; + + /*! @brief Default constructor. */ + meta_range() ENTT_NOEXCEPT = default; + + /** + * @brief Constructs a meta range from a given node. + * @param head The underlying node with which to construct the range. + */ + meta_range(node_type *head) + : node{head} + {} + + /** + * @brief Returns an iterator to the beginning. + * @return An iterator to the first meta object of the range. + */ + [[nodiscard]] iterator begin() const ENTT_NOEXCEPT { + return iterator{node}; + } + + /** + * @brief Returns an iterator to the end. + * @return An iterator to the element following the last meta object of the + * range. + */ + [[nodiscard]] iterator end() const ENTT_NOEXCEPT { + return iterator{}; + } + +private: + node_type *node{nullptr}; +}; + + +} + + +#endif + +// #include "type_traits.hpp" + + + +namespace entt { + + +class meta_type; +class meta_any; + + +/*! @brief Proxy object for sequence containers. */ +class meta_sequence_container { + template + struct meta_sequence_container_proxy; + + class meta_iterator; + +public: + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Meta iterator type. */ + using iterator = meta_iterator; + + /*! @brief Default constructor. */ + meta_sequence_container() ENTT_NOEXCEPT + : instance{nullptr} + {} + + /** + * @brief Construct a proxy object for sequence containers. + * @tparam Type Type of container to wrap. + * @param container The container to wrap. + */ + template + meta_sequence_container(Type *container) ENTT_NOEXCEPT + : value_type_fn{&meta_sequence_container_proxy::value_type}, + size_fn{&meta_sequence_container_proxy::size}, + resize_fn{&meta_sequence_container_proxy::resize}, + clear_fn{&meta_sequence_container_proxy::clear}, + begin_fn{&meta_sequence_container_proxy::begin}, + end_fn{&meta_sequence_container_proxy::end}, + insert_fn{&meta_sequence_container_proxy::insert}, + erase_fn{&meta_sequence_container_proxy::erase}, + get_fn{&meta_sequence_container_proxy::get}, + instance{container} + {} + + [[nodiscard]] inline meta_type value_type() const ENTT_NOEXCEPT; + [[nodiscard]] inline size_type size() const ENTT_NOEXCEPT; + inline bool resize(size_type) const; + inline bool clear(); + [[nodiscard]] inline iterator begin(); + [[nodiscard]] inline iterator end(); + inline std::pair insert(iterator, meta_any); + inline std::pair erase(iterator); + [[nodiscard]] inline meta_any operator[](size_type); + [[nodiscard]] inline explicit operator bool() const ENTT_NOEXCEPT; + +private: + meta_type(* value_type_fn)() ENTT_NOEXCEPT; + size_type(* size_fn)(const void *) ENTT_NOEXCEPT; + bool(* resize_fn)(void *, size_type); + bool(* clear_fn)(void *); + iterator(* begin_fn)(void *); + iterator(* end_fn)(void *); + std::pair(* insert_fn)(void *, iterator, meta_any); + std::pair(* erase_fn)(void *, iterator); + meta_any(* get_fn)(void *, size_type); + void *instance; +}; + + +/*! @brief Proxy object for associative containers. */ +class meta_associative_container { + template + struct meta_associative_container_proxy; + + class meta_iterator; + +public: + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Meta iterator type. */ + using iterator = meta_iterator; + + /*! @brief Default constructor. */ + meta_associative_container() ENTT_NOEXCEPT + : instance{nullptr} + {} + + /** + * @brief Construct a proxy object for associative containers. + * @tparam Type Type of container to wrap. + * @param container The container to wrap. + */ + template + meta_associative_container(Type *container) ENTT_NOEXCEPT + : key_only_container{is_key_only_meta_associative_container_v}, + key_type_fn{&meta_associative_container_proxy::key_type}, + mapped_type_fn{&meta_associative_container_proxy::mapped_type}, + value_type_fn{&meta_associative_container_proxy::value_type}, + size_fn{&meta_associative_container_proxy::size}, + clear_fn{&meta_associative_container_proxy::clear}, + begin_fn{&meta_associative_container_proxy::begin}, + end_fn{&meta_associative_container_proxy::end}, + insert_fn{&meta_associative_container_proxy::insert}, + erase_fn{&meta_associative_container_proxy::erase}, + find_fn{&meta_associative_container_proxy::find}, + instance{container} + {} + + [[nodiscard]] inline bool key_only() const ENTT_NOEXCEPT; + [[nodiscard]] inline meta_type key_type() const ENTT_NOEXCEPT; + [[nodiscard]] inline meta_type mapped_type() const ENTT_NOEXCEPT; + [[nodiscard]] inline meta_type value_type() const ENTT_NOEXCEPT; + [[nodiscard]] inline size_type size() const ENTT_NOEXCEPT; + inline bool clear(); + [[nodiscard]] inline iterator begin(); + [[nodiscard]] inline iterator end(); + inline bool insert(meta_any, meta_any); + inline bool erase(meta_any); + [[nodiscard]] inline iterator find(meta_any); + [[nodiscard]] inline explicit operator bool() const ENTT_NOEXCEPT; + +private: + bool key_only_container; + meta_type(* key_type_fn)() ENTT_NOEXCEPT; + meta_type(* mapped_type_fn)() ENTT_NOEXCEPT; + meta_type(* value_type_fn)() ENTT_NOEXCEPT; + size_type(* size_fn)(const void *) ENTT_NOEXCEPT; + bool(* clear_fn)(void *); + iterator(* begin_fn)(void *); + iterator(* end_fn)(void *); + bool(* insert_fn)(void *, meta_any, meta_any); + bool(* erase_fn)(void *, meta_any); + iterator(* find_fn)(void *, meta_any); + void *instance; +}; + + +/** + * @brief Opaque wrapper for values of any type. + * + * This class uses a technique called small buffer optimization (SBO) to get rid + * of memory allocations if possible. This should improve overall performance. + */ +class meta_any { + using dereference_operator_type = meta_any(meta_any &); + + template + [[nodiscard]] static meta_any dereference_operator(meta_any &any) { + if constexpr(is_meta_pointer_like_v) { + if constexpr(std::is_const_v())>>) { + return *any.cast(); + } else { + return std::ref(*any.cast()); + } + } else { + return {}; + } + } + + template + [[nodiscard]] static meta_sequence_container meta_sequence_container_factory([[maybe_unused]] void *container) ENTT_NOEXCEPT { + if constexpr(has_meta_sequence_container_traits_v) { + return static_cast(container); + } else { + return {}; + } + } + + template + [[nodiscard]] static meta_associative_container meta_associative_container_factory([[maybe_unused]] void *container) ENTT_NOEXCEPT { + if constexpr(has_meta_associative_container_traits_v) { + return static_cast(container); + } else { + return {}; + } + } + +public: + /*! @brief Default constructor. */ + meta_any() ENTT_NOEXCEPT + : storage{}, + node{}, + deref{nullptr}, + seq_factory{nullptr}, + assoc_factory{nullptr} + {} + + /** + * @brief Constructs a meta any by directly initializing the new object. + * @tparam Type Type of object to use to initialize the wrapper. + * @tparam Args Types of arguments to use to construct the new instance. + * @param args Parameters to use to construct the instance. + */ + template + explicit meta_any(std::in_place_type_t, [[maybe_unused]] Args &&... args) + : storage(std::in_place_type, std::forward(args)...), + node{internal::meta_info::resolve()}, + deref{&dereference_operator}, + seq_factory{&meta_sequence_container_factory}, + assoc_factory{&meta_associative_container_factory} + {} + + /** + * @brief Constructs a meta any that holds an unmanaged object. + * @tparam Type Type of object to use to initialize the wrapper. + * @param value An instance of an object to use to initialize the wrapper. + */ + template + meta_any(std::reference_wrapper value) + : storage{value}, + node{internal::meta_info::resolve()}, + deref{&dereference_operator}, + seq_factory{&meta_sequence_container_factory}, + assoc_factory{&meta_associative_container_factory} + {} + + /** + * @brief Constructs a meta any from a given value. + * @tparam Type Type of object to use to initialize the wrapper. + * @param value An instance of an object to use to initialize the wrapper. + */ + template>, meta_any>>> + meta_any(Type &&value) + : meta_any{std::in_place_type>>, std::forward(value)} + {} + + /** + * @brief Copy constructor. + * @param other The instance to copy from. + */ + meta_any(const meta_any &other) = default; + + /** + * @brief Move constructor. + * @param other The instance to move from. + */ + meta_any(meta_any &&other) + : meta_any{} + { + swap(*this, other); + } + + /*! @brief Frees the internal storage, whatever it means. */ + ~meta_any() { + if(node && node->dtor) { + node->dtor(storage.data()); + } + } + + /** + * @brief Assignment operator. + * @param other The instance to assign from. + * @return This meta any object. + */ + meta_any & operator=(meta_any other) { + swap(other, *this); + return *this; + } + + /** + * @brief Returns the meta type of the underlying object. + * @return The meta type of the underlying object, if any. + */ + [[nodiscard]] inline meta_type type() const ENTT_NOEXCEPT; + + /** + * @brief Returns an opaque pointer to the contained instance. + * @return An opaque pointer the contained instance, if any. + */ + [[nodiscard]] const void * data() const ENTT_NOEXCEPT { + return storage.data(); + } + + /*! @copydoc data */ + [[nodiscard]] void * data() ENTT_NOEXCEPT { + return storage.data(); + } + + /** + * @brief Tries to cast an instance to a given type. + * @tparam Type Type to which to cast the instance. + * @return A (possibly null) pointer to the contained instance. + */ + template + [[nodiscard]] const Type * try_cast() const { + if(node) { + if(const auto type_id = internal::meta_info::resolve()->type_id; node->type_id == type_id) { + return static_cast(storage.data()); + } else if(const auto *base = internal::find_if<&internal::meta_type_node::base>([type_id](const auto *curr) { return curr->type()->type_id == type_id; }, node); base) { + return static_cast(base->cast(storage.data())); + } + } + + return nullptr; + } + + /*! @copydoc try_cast */ + template + [[nodiscard]] Type * try_cast() { + return const_cast(std::as_const(*this).try_cast()); + } + + /** + * @brief Tries to cast an instance to a given type. + * + * The type of the instance must be such that the cast is possible. + * + * @warning + * Attempting to perform a cast that isn't viable results in undefined + * behavior.
+ * An assertion will abort the execution at runtime in debug mode in case + * the cast is not feasible. + * + * @tparam Type Type to which to cast the instance. + * @return A reference to the contained instance. + */ + template + [[nodiscard]] const Type & cast() const { + auto * const actual = try_cast(); + ENTT_ASSERT(actual); + return *actual; + } + + /*! @copydoc cast */ + template + [[nodiscard]] Type & cast() { + return const_cast(std::as_const(*this).cast()); + } + + /** + * @brief Tries to convert an instance to a given type and returns it. + * @tparam Type Type to which to convert the instance. + * @return A valid meta any object if the conversion is possible, an invalid + * one otherwise. + */ + template + [[nodiscard]] meta_any convert() const { + if(node) { + if(const auto type_id = internal::meta_info::resolve()->type_id; node->type_id == type_id) { + return *this; + } else if(const auto * const conv = internal::find_if<&internal::meta_type_node::conv>([type_id](const auto *curr) { return curr->type()->type_id == type_id; }, node); conv) { + return conv->conv(storage.data()); + } + } + + return {}; + } + + /** + * @brief Tries to convert an instance to a given type. + * @tparam Type Type to which to convert the instance. + * @return True if the conversion is possible, false otherwise. + */ + template + bool convert() { + bool valid = (node && node->type_id == internal::meta_info::resolve()->type_id); + + if(!valid) { + if(auto any = std::as_const(*this).convert(); any) { + swap(any, *this); + valid = true; + } + } + + return valid; + } + + /** + * @brief Replaces the contained object by creating a new instance directly. + * @tparam Type Type of object to use to initialize the wrapper. + * @tparam Args Types of arguments to use to construct the new instance. + * @param args Parameters to use to construct the instance. + */ + template + void emplace(Args &&... args) { + *this = meta_any{std::in_place_type, std::forward(args)...}; + } + + /** + * @brief Aliasing constructor. + * @return A meta any that shares a reference to an unmanaged object. + */ + [[nodiscard]] meta_any ref() const ENTT_NOEXCEPT { + meta_any other{}; + other.node = node; + other.storage = storage.ref(); + other.deref = deref; + other.seq_factory = seq_factory; + other.assoc_factory = assoc_factory; + return other; + } + + /** + * @brief Returns a sequence container proxy. + * @return A sequence container proxy for the underlying object. + */ + [[nodiscard]] meta_sequence_container as_sequence_container() ENTT_NOEXCEPT { + return seq_factory(storage.data()); + } + + /** + * @brief Returns an associative container proxy. + * @return An associative container proxy for the underlying object. + */ + [[nodiscard]] meta_associative_container as_associative_container() ENTT_NOEXCEPT { + return assoc_factory(storage.data()); + } + + /** + * @brief Indirection operator for dereferencing opaque objects. + * @return A meta any that shares a reference to an unmanaged object if the + * wrapped element is dereferenceable, an invalid meta any otherwise. + */ + [[nodiscard]] meta_any operator*() ENTT_NOEXCEPT { + return deref(*this); + } + + /** + * @brief Returns false if a wrapper is empty, true otherwise. + * @return False if the wrapper is empty, true otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + + /** + * @brief Checks if two wrappers differ in their content. + * @param other Wrapper with which to compare. + * @return False if the two objects differ in their content, true otherwise. + */ + [[nodiscard]] bool operator==(const meta_any &other) const { + return (!node && !other.node) || (node && other.node && node->type_id == other.node->type_id && node->compare(storage.data(), other.storage.data())); + } + + /** + * @brief Swaps two meta any objects. + * @param lhs A valid meta any object. + * @param rhs A valid meta any object. + */ + friend void swap(meta_any &lhs, meta_any &rhs) { + using std::swap; + swap(lhs.storage, rhs.storage); + swap(lhs.node, rhs.node); + swap(lhs.deref, rhs.deref); + swap(lhs.seq_factory, rhs.seq_factory); + swap(lhs.assoc_factory, rhs.assoc_factory); + } + +private: + internal::meta_storage storage; + internal::meta_type_node *node; + dereference_operator_type *deref; + meta_sequence_container(* seq_factory)(void *); + meta_associative_container(* assoc_factory)(void *); +}; + + +/** + * @brief Checks if two wrappers differ in their content. + * @param lhs A meta any object, either empty or not. + * @param rhs A meta any object, either empty or not. + * @return True if the two wrappers differ in their content, false otherwise. + */ +[[nodiscard]] inline bool operator!=(const meta_any &lhs, const meta_any &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/** + * @brief Opaque pointers to instances of any type. + * + * A handle doesn't perform copies and isn't responsible for the contained + * object. It doesn't prolong the lifetime of the pointed instance.
+ * Handles are used to generate meta references to actual objects when needed. + */ +struct meta_handle { + /*! @brief Default constructor. */ + meta_handle() = default; + + /** + * @brief Creates a handle that points to an unmanaged object. + * @tparam Type Type of object to use to initialize the handle. + * @param value An instance of an object to use to initialize the handle. + */ + template>, meta_handle>>> + meta_handle(Type &&value) ENTT_NOEXCEPT + : meta_handle{} + { + if constexpr(std::is_same_v>, meta_any>) { + any = value.ref(); + } else { + static_assert(std::is_lvalue_reference_v, "Lvalue reference required"); + any = std::ref(value); + } + } + + /** + * @brief Dereference operator for accessing the contained opaque object. + * @return A meta any that shares a reference to an unmanaged object. + */ + [[nodiscard]] meta_any operator*() const { + return any; + } + + /** + * @brief Access operator for accessing the contained opaque object. + * @return A meta any that shares a reference to an unmanaged object. + */ + [[nodiscard]] meta_any * operator->() { + return &any; + } + +private: + meta_any any; +}; + + +/*! @brief Opaque wrapper for meta properties of any type. */ +struct meta_prop { + /*! @brief Node type. */ + using node_type = internal::meta_prop_node; + + /** + * @brief Constructs an instance from a given node. + * @param curr The underlying node with which to construct the instance. + */ + meta_prop(const node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /** + * @brief Returns the stored key. + * @return A meta any containing the key stored with the property. + */ + [[nodiscard]] meta_any key() const { + return node->key(); + } + + /** + * @brief Returns the stored value. + * @return A meta any containing the value stored with the property. + */ + [[nodiscard]] meta_any value() const { + return node->value(); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + +private: + const node_type *node; +}; + + +/*! @brief Opaque wrapper for meta base classes. */ +struct meta_base { + /*! @brief Node type. */ + using node_type = internal::meta_base_node; + + /*! @copydoc meta_prop::meta_prop */ + meta_base(const node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /** + * @brief Returns the meta type to which a meta object belongs. + * @return The meta type to which the meta object belongs. + */ + [[nodiscard]] inline meta_type parent() const ENTT_NOEXCEPT; + + /*! @copydoc meta_any::type */ + [[nodiscard]] inline meta_type type() const ENTT_NOEXCEPT; + + /** + * @brief Casts an instance from a parent type to a base type. + * @param instance The instance to cast. + * @return An opaque pointer to the base type. + */ + [[nodiscard]] const void * cast(const void *instance) const ENTT_NOEXCEPT { + return node->cast(instance); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + +private: + const node_type *node; +}; + + +/*! @brief Opaque wrapper for meta conversion functions. */ +struct meta_conv { + /*! @brief Node type. */ + using node_type = internal::meta_conv_node; + + /*! @copydoc meta_prop::meta_prop */ + meta_conv(const node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /*! @copydoc meta_base::parent */ + [[nodiscard]] inline meta_type parent() const ENTT_NOEXCEPT; + + /*! @copydoc meta_any::type */ + [[nodiscard]] inline meta_type type() const ENTT_NOEXCEPT; + + /** + * @brief Converts an instance to the underlying type. + * @param instance The instance to convert. + * @return An opaque pointer to the instance to convert. + */ + [[nodiscard]] meta_any convert(const void *instance) const { + return node->conv(instance); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + +private: + const node_type *node; +}; + + +/*! @brief Opaque wrapper for meta constructors. */ +struct meta_ctor { + /*! @brief Node type. */ + using node_type = internal::meta_ctor_node; + /*! @brief Unsigned integer type. */ + using size_type = typename node_type::size_type; + + /*! @copydoc meta_prop::meta_prop */ + meta_ctor(const node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /*! @copydoc meta_base::parent */ + [[nodiscard]] inline meta_type parent() const ENTT_NOEXCEPT; + + /** + * @brief Returns the number of arguments accepted by a meta constructor. + * @return The number of arguments accepted by the meta constructor. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return node->size; + } + + /** + * @brief Returns the meta type of the i-th argument of a meta constructor. + * @param index The index of the argument of which to return the meta type. + * @return The meta type of the i-th argument of a meta constructor, if any. + */ + [[nodiscard]] meta_type arg(size_type index) const ENTT_NOEXCEPT; + + /** + * @brief Creates an instance of the underlying type, if possible. + * + * To create a valid instance, the parameters must be such that a cast or + * conversion to the required types is possible. Otherwise, an empty and + * thus invalid wrapper is returned. + * + * @param args Parameters to use to construct the instance. + * @param sz Number of parameters to use to construct the instance. + * @return A meta any containing the new instance, if any. + */ + [[nodiscard]] meta_any invoke(meta_any * const args, const std::size_t sz) const { + return sz == size() ? node->invoke(args) : meta_any{}; + } + + /** + * @copybrief invoke + * + * @sa invoke + * + * @tparam Args Types of arguments to use to construct the instance. + * @param args Parameters to use to construct the instance. + * @return A meta any containing the new instance, if any. + */ + template + [[nodiscard]] meta_any invoke([[maybe_unused]] Args &&... args) const { + std::array arguments{std::forward(args)...}; + return invoke(arguments.data(), sizeof...(Args)); + } + + /** + * @brief Returns a range to use to visit all meta properties. + * @return An iterable range to use to visit all meta properties. + */ + [[nodiscard]] meta_range prop() const ENTT_NOEXCEPT { + return node->prop; + } + + /** + * @brief Returns the property associated with a given key. + * @param key The key to use to search for a property. + * @return The property associated with the given key, if any. + */ + [[nodiscard]] meta_prop prop(meta_any key) const { + internal::meta_range range{node->prop}; + return std::find_if(range.begin(), range.end(), [&key](const auto &curr) { return curr.key() == key; }).operator->(); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + +private: + const node_type *node; +}; + + +/*! @brief Opaque wrapper for meta data. */ +struct meta_data { + /*! @brief Node type. */ + using node_type = internal::meta_data_node; + + /*! @copydoc meta_prop::meta_prop */ + meta_data(const node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /*! @copydoc meta_type::id */ + [[nodiscard]] id_type id() const ENTT_NOEXCEPT { + return node->id; + } + + /*! @copydoc meta_base::parent */ + [[nodiscard]] inline meta_type parent() const ENTT_NOEXCEPT; + + /** + * @brief Indicates whether a meta data is constant or not. + * @return True if the meta data is constant, false otherwise. + */ + [[nodiscard]] bool is_const() const ENTT_NOEXCEPT { + return (node->set == nullptr); + } + + /** + * @brief Indicates whether a meta data is static or not. + * @return True if the meta data is static, false otherwise. + */ + [[nodiscard]] bool is_static() const ENTT_NOEXCEPT { + return node->is_static; + } + + /*! @copydoc meta_any::type */ + [[nodiscard]] inline meta_type type() const ENTT_NOEXCEPT; + + /** + * @brief Sets the value of a given variable. + * + * It must be possible to cast the instance to the parent type of the meta + * data. Otherwise, invoking the setter results in an undefined + * behavior.
+ * The type of the value must be such that a cast or conversion to the type + * of the variable is possible. Otherwise, invoking the setter does nothing. + * + * @tparam Type Type of value to assign. + * @param instance An opaque instance of the underlying type. + * @param value Parameter to use to set the underlying variable. + * @return True in case of success, false otherwise. + */ + template + bool set(meta_handle instance, Type &&value) const { + return node->set && node->set(std::move(instance), std::forward(value)); + } + + /** + * @brief Gets the value of a given variable. + * + * It must be possible to cast the instance to the parent type of the meta + * data. Otherwise, invoking the getter results in an undefined behavior. + * + * @param instance An opaque instance of the underlying type. + * @return A meta any containing the value of the underlying variable. + */ + [[nodiscard]] meta_any get(meta_handle instance) const { + return node->get(std::move(instance)); + } + + /*! @copydoc meta_ctor::prop */ + [[nodiscard]] meta_range prop() const ENTT_NOEXCEPT { + return node->prop; + } + + /** + * @brief Returns the property associated with a given key. + * @param key The key to use to search for a property. + * @return The property associated with the given key, if any. + */ + [[nodiscard]] meta_prop prop(meta_any key) const { + internal::meta_range range{node->prop}; + return std::find_if(range.begin(), range.end(), [&key](const auto &curr) { return curr.key() == key; }).operator->(); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + +private: + const node_type *node; +}; + + +/*! @brief Opaque wrapper for meta functions. */ +struct meta_func { + /*! @brief Node type. */ + using node_type = internal::meta_func_node; + /*! @brief Unsigned integer type. */ + using size_type = typename node_type::size_type; + + /*! @copydoc meta_prop::meta_prop */ + meta_func(const node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /*! @copydoc meta_type::id */ + [[nodiscard]] id_type id() const ENTT_NOEXCEPT { + return node->id; + } + + /*! @copydoc meta_base::parent */ + [[nodiscard]] inline meta_type parent() const ENTT_NOEXCEPT; + + /** + * @brief Returns the number of arguments accepted by a meta function. + * @return The number of arguments accepted by the meta function. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return node->size; + } + + /** + * @brief Indicates whether a meta function is constant or not. + * @return True if the meta function is constant, false otherwise. + */ + [[nodiscard]] bool is_const() const ENTT_NOEXCEPT { + return node->is_const; + } + + /** + * @brief Indicates whether a meta function is static or not. + * @return True if the meta function is static, false otherwise. + */ + [[nodiscard]] bool is_static() const ENTT_NOEXCEPT { + return node->is_static; + } + + /** + * @brief Returns the meta type of the return type of a meta function. + * @return The meta type of the return type of the meta function. + */ + [[nodiscard]] inline meta_type ret() const ENTT_NOEXCEPT; + + /** + * @brief Returns the meta type of the i-th argument of a meta function. + * @param index The index of the argument of which to return the meta type. + * @return The meta type of the i-th argument of a meta function, if any. + */ + [[nodiscard]] inline meta_type arg(size_type index) const ENTT_NOEXCEPT; + + /** + * @brief Invokes the underlying function, if possible. + * + * To invoke a meta function, the parameters must be such that a cast or + * conversion to the required types is possible. Otherwise, an empty and + * thus invalid wrapper is returned.
+ * It must be possible to cast the instance to the parent type of the meta + * function. Otherwise, invoking the underlying function results in an + * undefined behavior. + * + * @param instance An opaque instance of the underlying type. + * @param args Parameters to use to invoke the function. + * @param sz Number of parameters to use to invoke the function. + * @return A meta any containing the returned value, if any. + */ + [[nodiscard]] meta_any invoke(meta_handle instance, meta_any * const args, const std::size_t sz) const { + return sz == size() ? node->invoke(instance, args) : meta_any{}; + } + + /** + * @copybrief invoke + * + * @sa invoke + * + * @tparam Args Types of arguments to use to invoke the function. + * @param instance An opaque instance of the underlying type. + * @param args Parameters to use to invoke the function. + * @return A meta any containing the new instance, if any. + */ + template + meta_any invoke(meta_handle instance, Args &&... args) const { + std::array arguments{std::forward(args)...}; + return invoke(instance, arguments.data(), sizeof...(Args)); + } + + /*! @copydoc meta_ctor::prop */ + [[nodiscard]] meta_range prop() const ENTT_NOEXCEPT { + return node->prop; + } + + /** + * @brief Returns the property associated with a given key. + * @param key The key to use to search for a property. + * @return The property associated with the given key, if any. + */ + [[nodiscard]] meta_prop prop(meta_any key) const { + internal::meta_range range{node->prop}; + return std::find_if(range.begin(), range.end(), [&key](const auto &curr) { return curr.key() == key; }).operator->(); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + +private: + const node_type *node; +}; + + +/*! @brief Opaque wrapper for meta types. */ +class meta_type { + template + [[nodiscard]] auto ctor(std::index_sequence) const { + internal::meta_range range{node->ctor}; + + return std::find_if(range.begin(), range.end(), [](const auto &candidate) { + return candidate.size == sizeof...(Args) && ([](auto *from, auto *to) { + return (from->type_id == to->type_id) + || internal::find_if<&node_type::base>([to](const auto *curr) { return curr->type()->type_id == to->type_id; }, from) + || internal::find_if<&node_type::conv>([to](const auto *curr) { return curr->type()->type_id == to->type_id; }, from); + }(internal::meta_info::resolve(), candidate.arg(Indexes)) && ...); + }).operator->(); + } + +public: + /*! @brief Node type. */ + using node_type = internal::meta_type_node; + /*! @brief Unsigned integer type. */ + using size_type = typename node_type::size_type; + + /*! @copydoc meta_prop::meta_prop */ + meta_type(node_type *curr = nullptr) ENTT_NOEXCEPT + : node{curr} + {} + + /** + * @brief Returns the type id of the underlying type. + * @return The type id of the underlying type. + */ + [[nodiscard]] id_type type_id() const ENTT_NOEXCEPT { + return node->type_id; + } + + /** + * @brief Returns the identifier assigned to a meta object. + * @return The identifier assigned to the meta object. + */ + [[nodiscard]] id_type id() const ENTT_NOEXCEPT { + return node->id; + } + + /** + * @brief Checks whether a type refers to void or not. + * @return True if the underlying type is void, false otherwise. + */ + [[nodiscard]] bool is_void() const ENTT_NOEXCEPT { + return node->is_void; + } + + /** + * @brief Checks whether a type refers to an integral type or not. + * @return True if the underlying type is an integral type, false otherwise. + */ + [[nodiscard]] bool is_integral() const ENTT_NOEXCEPT { + return node->is_integral; + } + + /** + * @brief Checks whether a type refers to a floating-point type or not. + * @return True if the underlying type is a floating-point type, false + * otherwise. + */ + [[nodiscard]] bool is_floating_point() const ENTT_NOEXCEPT { + return node->is_floating_point; + } + + /** + * @brief Checks whether a type refers to an array type or not. + * @return True if the underlying type is an array type, false otherwise. + */ + [[nodiscard]] bool is_array() const ENTT_NOEXCEPT { + return node->is_array; + } + + /** + * @brief Checks whether a type refers to an enum or not. + * @return True if the underlying type is an enum, false otherwise. + */ + [[nodiscard]] bool is_enum() const ENTT_NOEXCEPT { + return node->is_enum; + } + + /** + * @brief Checks whether a type refers to an union or not. + * @return True if the underlying type is an union, false otherwise. + */ + [[nodiscard]] bool is_union() const ENTT_NOEXCEPT { + return node->is_union; + } + + /** + * @brief Checks whether a type refers to a class or not. + * @return True if the underlying type is a class, false otherwise. + */ + [[nodiscard]] bool is_class() const ENTT_NOEXCEPT { + return node->is_class; + } + + /** + * @brief Checks whether a type refers to a pointer or not. + * @return True if the underlying type is a pointer, false otherwise. + */ + [[nodiscard]] bool is_pointer() const ENTT_NOEXCEPT { + return node->is_pointer; + } + + /** + * @brief Checks whether a type refers to a function pointer or not. + * @return True if the underlying type is a function pointer, false + * otherwise. + */ + [[nodiscard]] bool is_function_pointer() const ENTT_NOEXCEPT { + return node->is_function_pointer; + } + + /** + * @brief Checks whether a type refers to a pointer to data member or not. + * @return True if the underlying type is a pointer to data member, false + * otherwise. + */ + [[nodiscard]] bool is_member_object_pointer() const ENTT_NOEXCEPT { + return node->is_member_object_pointer; + } + + /** + * @brief Checks whether a type refers to a pointer to member function or + * not. + * @return True if the underlying type is a pointer to member function, + * false otherwise. + */ + [[nodiscard]] bool is_member_function_pointer() const ENTT_NOEXCEPT { + return node->is_member_function_pointer; + } + + /** + * @brief Checks whether a type is a pointer-like type or not. + * @return True if the underlying type is a pointer-like one, false + * otherwise. + */ + [[nodiscard]] bool is_pointer_like() const ENTT_NOEXCEPT { + return node->is_pointer_like; + } + + /** + * @brief Checks whether a type refers to a sequence container or not. + * @return True if the underlying type is a sequence container, false + * otherwise. + */ + [[nodiscard]] bool is_sequence_container() const ENTT_NOEXCEPT { + return node->is_sequence_container; + } + + /** + * @brief Checks whether a type refers to an associative container or not. + * @return True if the underlying type is an associative container, false + * otherwise. + */ + [[nodiscard]] bool is_associative_container() const ENTT_NOEXCEPT { + return node->is_associative_container; + } + + /** + * @brief If a type refers to an array type, provides the number of + * dimensions of the array. + * @return The number of dimensions of the array if the underlying type is + * an array type, 0 otherwise. + */ + [[nodiscard]] size_type rank() const ENTT_NOEXCEPT { + return node->rank; + } + + /** + * @brief If a type refers to an array type, provides the number of elements + * along the given dimension of the array. + * @param dim The dimension of which to return the number of elements. + * @return The number of elements along the given dimension of the array if + * the underlying type is an array type, 0 otherwise. + */ + [[nodiscard]] size_type extent(size_type dim = {}) const ENTT_NOEXCEPT { + return node->extent(dim); + } + + /** + * @brief Provides the meta type for which the pointer is defined. + * @return The meta type for which the pointer is defined or this meta type + * if it doesn't refer to a pointer type. + */ + [[nodiscard]] meta_type remove_pointer() const ENTT_NOEXCEPT { + return node->remove_pointer(); + } + + /** + * @brief Provides the meta type for which the array is defined. + * @return The meta type for which the array is defined or this meta type + * if it doesn't refer to an array type. + */ + [[nodiscard]] meta_type remove_extent() const ENTT_NOEXCEPT { + return node->remove_extent(); + } + + /** + * @brief Returns a range to use to visit top-level meta bases. + * @return An iterable range to use to visit top-level meta bases. + */ + [[nodiscard]] meta_range base() const ENTT_NOEXCEPT { + return node->base; + } + + /** + * @brief Returns the meta base associated with a given identifier. + * @param id Unique identifier. + * @return The meta base associated with the given identifier, if any. + */ + [[nodiscard]] meta_base base(const id_type id) const { + return internal::find_if<&node_type::base>([id](const auto *curr) { + return curr->type()->id == id; + }, node); + } + + /** + * @brief Returns a range to use to visit top-level meta conversion + * functions. + * @return An iterable range to use to visit top-level meta conversion + * functions. + */ + [[nodiscard]] meta_range conv() const ENTT_NOEXCEPT { + return node->conv; + } + + /** + * @brief Returns the meta conversion function associated with a given type. + * @tparam Type The type to use to search for a meta conversion function. + * @return The meta conversion function associated with the given type, if + * any. + */ + template + [[nodiscard]] meta_conv conv() const { + return internal::find_if<&node_type::conv>([type_id = internal::meta_info::resolve()->type_id](const auto *curr) { + return curr->type()->type_id == type_id; + }, node); + } + + /** + * @brief Returns a range to use to visit top-level meta constructors. + * @return An iterable range to use to visit top-level meta constructors. + */ + [[nodiscard]] meta_range ctor() const ENTT_NOEXCEPT { + return node->ctor; + } + + /** + * @brief Returns the meta constructor that accepts a given list of types of + * arguments. + * @return The requested meta constructor, if any. + */ + template + [[nodiscard]] meta_ctor ctor() const { + return ctor(std::index_sequence_for{}); + } + + /** + * @brief Returns a range to use to visit top-level meta data. + * @return An iterable range to use to visit top-level meta data. + */ + [[nodiscard]] meta_range data() const ENTT_NOEXCEPT { + return node->data; + } + + /** + * @brief Returns the meta data associated with a given identifier. + * + * The meta data of the base classes will also be visited, if any. + * + * @param id Unique identifier. + * @return The meta data associated with the given identifier, if any. + */ + [[nodiscard]] meta_data data(const id_type id) const { + return internal::find_if<&node_type::data>([id](const auto *curr) { + return curr->id == id; + }, node); + } + + /** + * @brief Returns a range to use to visit top-level meta functions. + * @return An iterable range to use to visit top-level meta functions. + */ + [[nodiscard]] meta_range func() const ENTT_NOEXCEPT { + return node->func; + } + + /** + * @brief Returns the meta function associated with a given identifier. + * + * The meta functions of the base classes will also be visited, if any. + * + * @param id Unique identifier. + * @return The meta function associated with the given identifier, if any. + */ + [[nodiscard]] meta_func func(const id_type id) const { + return internal::find_if<&node_type::func>([id](const auto *curr) { + return curr->id == id; + }, node); + } + + /** + * @brief Creates an instance of the underlying type, if possible. + * + * To create a valid instance, the parameters must be such that a cast or + * conversion to the required types is possible. Otherwise, an empty and + * thus invalid wrapper is returned. + * + * @param args Parameters to use to construct the instance. + * @param sz Number of parameters to use to construct the instance. + * @return A meta any containing the new instance, if any. + */ + [[nodiscard]] meta_any construct(meta_any * const args, const std::size_t sz) const { + meta_any any{}; + + internal::find_if<&node_type::ctor>([args, sz, &any](const auto *curr) { + return (curr->size == sz) && (any = curr->invoke(args)); + }, node); + + return any; + } + + /** + * @copybrief construct + * + * @sa construct + * + * @tparam Args Types of arguments to use to construct the instance. + * @param args Parameters to use to construct the instance. + * @return A meta any containing the new instance, if any. + */ + template + [[nodiscard]] meta_any construct(Args &&... args) const { + std::array arguments{std::forward(args)...}; + return construct(arguments.data(), sizeof...(Args)); + } + + /** + * @brief Returns a range to use to visit top-level meta properties. + * @return An iterable range to use to visit top-level meta properties. + */ + [[nodiscard]] meta_range prop() const ENTT_NOEXCEPT { + return node->prop; + } + + /** + * @brief Returns the property associated with a given key. + * + * Properties of the base classes will also be visited, if any. + * + * @param key The key to use to search for a property. + * @return The property associated with the given key, if any. + */ + [[nodiscard]] meta_prop prop(meta_any key) const { + return internal::find_if<&node_type::prop>([key = std::move(key)](const auto *curr) { + return curr->key() == key; + }, node); + } + + /** + * @brief Returns true if a meta object is valid, false otherwise. + * @return True if the meta object is valid, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return !(node == nullptr); + } + + /** + * @brief Checks if two meta objects refer to the same type. + * @param other The meta object with which to compare. + * @return True if the two meta objects refer to the same type, false + * otherwise. + */ + [[nodiscard]] bool operator==(const meta_type &other) const ENTT_NOEXCEPT { + return (!node && !other.node) || (node && other.node && node->type_id == other.node->type_id); + } + + /** + * @brief Resets a meta type and all its parts. + * + * This function resets a meta type and all its data members, member + * functions and properties, as well as its constructors, destructors and + * conversion functions if any.
+ * Base classes aren't reset but the link between the two types is removed. + * + * The meta type is also removed from the list of searchable types. + */ + void reset() ENTT_NOEXCEPT { + auto** it = internal::meta_context::global(); + + while (*it && *it != node) { + it = &(*it)->next; + } + + if(*it) { + *it = (*it)->next; + } + + const auto unregister_all = y_combinator{ + [](auto &&self, auto **curr, auto... member) { + while(*curr) { + auto *prev = *curr; + (self(&(prev->*member)), ...); + *curr = prev->next; + prev->next = nullptr; + } + } + }; + + unregister_all(&node->prop); + unregister_all(&node->base); + unregister_all(&node->conv); + unregister_all(&node->ctor, &internal::meta_ctor_node::prop); + unregister_all(&node->data, &internal::meta_data_node::prop); + unregister_all(&node->func, &internal::meta_func_node::prop); + + node->id = {}; + node->dtor = nullptr; + } + +private: + node_type *node; +}; + + +/** + * @brief Checks if two meta objects refer to the same type. + * @param lhs A meta object, either valid or not. + * @param rhs A meta object, either valid or not. + * @return False if the two meta objects refer to the same node, true otherwise. + */ +[[nodiscard]] inline bool operator!=(const meta_type &lhs, const meta_type &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +[[nodiscard]] inline meta_type meta_any::type() const ENTT_NOEXCEPT { + return node; +} + + +[[nodiscard]] inline meta_type meta_base::parent() const ENTT_NOEXCEPT { + return node->parent; +} + + +[[nodiscard]] inline meta_type meta_base::type() const ENTT_NOEXCEPT { + return node->type(); +} + + +[[nodiscard]] inline meta_type meta_conv::parent() const ENTT_NOEXCEPT { + return node->parent; +} + + +[[nodiscard]] inline meta_type meta_conv::type() const ENTT_NOEXCEPT { + return node->type(); +} + + +[[nodiscard]] inline meta_type meta_ctor::parent() const ENTT_NOEXCEPT { + return node->parent; +} + + +[[nodiscard]] inline meta_type meta_ctor::arg(size_type index) const ENTT_NOEXCEPT { + return index < size() ? node->arg(index) : nullptr; +} + + +[[nodiscard]] inline meta_type meta_data::parent() const ENTT_NOEXCEPT { + return node->parent; +} + + +[[nodiscard]] inline meta_type meta_data::type() const ENTT_NOEXCEPT { + return node->type(); +} + + +[[nodiscard]] inline meta_type meta_func::parent() const ENTT_NOEXCEPT { + return node->parent; +} + + +[[nodiscard]] inline meta_type meta_func::ret() const ENTT_NOEXCEPT { + return node->ret(); +} + + +[[nodiscard]] inline meta_type meta_func::arg(size_type index) const ENTT_NOEXCEPT { + return index < size() ? node->arg(index) : nullptr; +} + + +/*! @brief Opaque iterator for meta sequence containers. */ +class meta_sequence_container::meta_iterator { + /*! @brief A meta sequence container can access the underlying iterator. */ + friend class meta_sequence_container; + + template + static void incr(meta_any any) { + ++any.cast(); + } + + template + [[nodiscard]] static meta_any deref(meta_any any) { + if constexpr(std::is_const_v())>>) { + return *any.cast(); + } else { + return std::ref(*any.cast()); + } + } + +public: + /*! @brief Signed integer type. */ + using difference_type = std::ptrdiff_t; + /*! @brief Type of elements returned by the iterator. */ + using value_type = meta_any; + /*! @brief Pointer type, `void` on purpose. */ + using pointer = void; + /*! @brief Reference type, it is **not** an actual reference. */ + using reference = value_type; + /*! @brief Iterator category. */ + using iterator_category = std::input_iterator_tag; + + /*! @brief Default constructor. */ + meta_iterator() ENTT_NOEXCEPT = default; + + /** + * @brief Constructs a meta iterator from a given iterator. + * @tparam It Type of actual iterator with which to build the meta iterator. + * @param iter The actual iterator with which to build the meta iterator. + */ + template + meta_iterator(It iter) + : next_fn{&incr}, + get_fn{&deref}, + handle{std::move(iter)} + {} + + /*! @brief Pre-increment operator. @return This iterator. */ + meta_iterator & operator++() ENTT_NOEXCEPT { + return next_fn(handle.ref()), *this; + } + + /*! @brief Post-increment operator. @return This iterator. */ + meta_iterator operator++(int) ENTT_NOEXCEPT { + meta_iterator orig = *this; + return ++(*this), orig; + } + + /** + * @brief Checks if two meta iterators refer to the same element. + * @param other The meta iterator with which to compare. + * @return True if the two meta iterators refer to the same element, false + * otherwise. + */ + [[nodiscard]] bool operator==(const meta_iterator &other) const ENTT_NOEXCEPT { + return handle == other.handle; + } + + /** + * @brief Checks if two meta iterators refer to the same element. + * @param other The meta iterator with which to compare. + * @return False if the two meta iterators refer to the same element, true + * otherwise. + */ + [[nodiscard]] bool operator!=(const meta_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + /** + * @brief Indirection operator. + * @return The element to which the meta pointer points. + */ + [[nodiscard]] reference operator*() const { + return get_fn(handle.ref()); + } + + /** + * @brief Returns false if an iterator is invalid, true otherwise. + * @return False if the iterator is invalid, true otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(handle); + } + +private: + void(* next_fn)(meta_any); + meta_any(* get_fn)(meta_any); + meta_any handle; +}; + + +template +struct meta_sequence_container::meta_sequence_container_proxy { + using traits_type = meta_sequence_container_traits; + + [[nodiscard]] static meta_type value_type() ENTT_NOEXCEPT { + return internal::meta_info::resolve(); + } + + [[nodiscard]] static size_type size(const void *container) ENTT_NOEXCEPT { + return traits_type::size(*static_cast(container)); + } + + [[nodiscard]] static bool resize(void *container, size_type sz) { + return traits_type::resize(*static_cast(container), sz); + } + + [[nodiscard]] static bool clear(void *container) { + return traits_type::clear(*static_cast(container)); + } + + [[nodiscard]] static iterator begin(void *container) { + return iterator{traits_type::begin(*static_cast(container))}; + } + + [[nodiscard]] static iterator end(void *container) { + return iterator{traits_type::end(*static_cast(container))}; + } + + [[nodiscard]] static std::pair insert(void *container, iterator it, meta_any value) { + if(const auto *v_ptr = value.try_cast(); v_ptr || value.convert()) { + auto ret = traits_type::insert(*static_cast(container), it.handle.cast(), v_ptr ? *v_ptr : value.cast()); + return {iterator{std::move(ret.first)}, ret.second}; + } + + return {}; + } + + [[nodiscard]] static std::pair erase(void *container, iterator it) { + auto ret = traits_type::erase(*static_cast(container), it.handle.cast()); + return {iterator{std::move(ret.first)}, ret.second}; + } + + [[nodiscard]] static meta_any get(void *container, size_type pos) { + return std::ref(traits_type::get(*static_cast(container), pos)); + } +}; + + +/** + * @brief Returns the value meta type of the wrapped container type. + * @return The value meta type of the wrapped container type. + */ +[[nodiscard]] inline meta_type meta_sequence_container::value_type() const ENTT_NOEXCEPT { + return value_type_fn(); +} + + +/** + * @brief Returns the size of the wrapped container. + * @return The size of the wrapped container. + */ +[[nodiscard]] inline meta_sequence_container::size_type meta_sequence_container::size() const ENTT_NOEXCEPT { + return size_fn(instance); +} + + +/** + * @brief Resizes the wrapped container to contain a given number of elements. + * @param sz The new size of the container. + * @return True in case of success, false otherwise. + */ +inline bool meta_sequence_container::resize(size_type sz) const { + return resize_fn(instance, sz); +} + + +/** + * @brief Clears the content of the wrapped container. + * @return True in case of success, false otherwise. + */ +inline bool meta_sequence_container::clear() { + return clear_fn(instance); +} + + +/** + * @brief Returns a meta iterator to the first element of the wrapped container. + * @return A meta iterator to the first element of the wrapped container. + */ +[[nodiscard]] inline meta_sequence_container::iterator meta_sequence_container::begin() { + return begin_fn(instance); +} + + +/** + * @brief Returns a meta iterator that is past the last element of the wrapped + * container. + * @return A meta iterator that is past the last element of the wrapped + * container. + */ +[[nodiscard]] inline meta_sequence_container::iterator meta_sequence_container::end() { + return end_fn(instance); +} + + +/** + * @brief Inserts an element at a specified location of the wrapped container. + * @param it Meta iterator before which the element will be inserted. + * @param value Element value to insert. + * @return A pair consisting of a meta iterator to the inserted element (in + * case of success) and a bool denoting whether the insertion took place. + */ +inline std::pair meta_sequence_container::insert(iterator it, meta_any value) { + return insert_fn(instance, it, value.ref()); +} + + +/** + * @brief Removes the specified element from the wrapped container. + * @param it Meta iterator to the element to remove. + * @return A pair consisting of a meta iterator following the last removed + * element (in case of success) and a bool denoting whether the insertion + * took place. + */ +inline std::pair meta_sequence_container::erase(iterator it) { + return erase_fn(instance, it); +} + + +/** + * @brief Returns a reference to the element at a specified location of the + * wrapped container (no bounds checking is performed). + * @param pos The position of the element to return. + * @return A reference to the requested element properly wrapped. + */ +[[nodiscard]] inline meta_any meta_sequence_container::operator[](size_type pos) { + return get_fn(instance, pos); +} + + +/** + * @brief Returns false if a proxy is invalid, true otherwise. + * @return False if the proxy is invalid, true otherwise. + */ +[[nodiscard]] inline meta_sequence_container::operator bool() const ENTT_NOEXCEPT { + return (instance != nullptr); +} + + +/*! @brief Opaque iterator for meta associative containers. */ +class meta_associative_container::meta_iterator { + template + static void incr(meta_any any) { + ++any.cast(); + } + + template + [[nodiscard]] static meta_any key(meta_any any) { + if constexpr(KeyOnly) { + return *any.cast(); + } else { + return any.cast()->first; + } + } + + template + [[nodiscard]] static meta_any value([[maybe_unused]] meta_any any) { + if constexpr(KeyOnly) { + return meta_any{}; + } else { + return std::ref(any.cast()->second); + } + } + +public: + /*! @brief Signed integer type. */ + using difference_type = std::ptrdiff_t; + /*! @brief Type of elements returned by the iterator. */ + using value_type = std::pair; + /*! @brief Pointer type, `void` on purpose. */ + using pointer = void; + /*! @brief Reference type, it is **not** an actual reference. */ + using reference = value_type; + /*! @brief Iterator category. */ + using iterator_category = std::input_iterator_tag; + + /*! @brief Default constructor. */ + meta_iterator() ENTT_NOEXCEPT = default; + + /** + * @brief Constructs a meta iterator from a given iterator. + * @tparam KeyOnly True if the associative container is also key-only, false + * otherwise. + * @tparam It Type of actual iterator with which to build the meta iterator. + * @param iter The actual iterator with which to build the meta iterator. + */ + template + meta_iterator(std::integral_constant, It iter) + : next_fn{&incr}, + key_fn{&key}, + value_fn{&value}, + handle{std::move(iter)} + {} + + /*! @brief Pre-increment operator. @return This iterator. */ + meta_iterator & operator++() ENTT_NOEXCEPT { + return next_fn(handle.ref()), *this; + } + + /*! @brief Post-increment operator. @return This iterator. */ + meta_iterator operator++(int) ENTT_NOEXCEPT { + meta_iterator orig = *this; + return ++(*this), orig; + } + + /** + * @brief Checks if two meta iterators refer to the same element. + * @param other The meta iterator with which to compare. + * @return True if the two meta iterators refer to the same element, false + * otherwise. + */ + [[nodiscard]] bool operator==(const meta_iterator &other) const ENTT_NOEXCEPT { + return handle == other.handle; + } + + /** + * @brief Checks if two meta iterators refer to the same element. + * @param other The meta iterator with which to compare. + * @return False if the two meta iterators refer to the same element, true + * otherwise. + */ + [[nodiscard]] bool operator!=(const meta_iterator &other) const ENTT_NOEXCEPT { + return !(*this == other); + } + + /** + * @brief Indirection operator. + * @return The element to which the meta pointer points. + */ + [[nodiscard]] reference operator*() const { + return { key_fn(handle.ref()), value_fn(handle.ref()) }; + } + + /** + * @brief Returns false if an iterator is invalid, true otherwise. + * @return False if the iterator is invalid, true otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(handle); + } + +private: + void(* next_fn)(meta_any); + meta_any(* key_fn)(meta_any); + meta_any(* value_fn)(meta_any); + meta_any handle; +}; + + +template +struct meta_associative_container::meta_associative_container_proxy { + using traits_type = meta_associative_container_traits; + + [[nodiscard]] static meta_type key_type() ENTT_NOEXCEPT { + return internal::meta_info::resolve(); + } + + [[nodiscard]] static meta_type mapped_type() ENTT_NOEXCEPT { + if constexpr(is_key_only_meta_associative_container_v) { + return meta_type{}; + } else { + return internal::meta_info::resolve(); + } + } + + [[nodiscard]] static meta_type value_type() ENTT_NOEXCEPT { + return internal::meta_info::resolve(); + } + + [[nodiscard]] static size_type size(const void *container) ENTT_NOEXCEPT { + return traits_type::size(*static_cast(container)); + } + + [[nodiscard]] static bool clear(void *container) { + return traits_type::clear(*static_cast(container)); + } + + [[nodiscard]] static iterator begin(void *container) { + return iterator{is_key_only_meta_associative_container{}, traits_type::begin(*static_cast(container))}; + } + + [[nodiscard]] static iterator end(void *container) { + return iterator{is_key_only_meta_associative_container{}, traits_type::end(*static_cast(container))}; + } + + [[nodiscard]] static bool insert(void *container, meta_any key, meta_any value) { + if(const auto *k_ptr = key.try_cast(); k_ptr || key.convert()) { + if constexpr(is_key_only_meta_associative_container_v) { + return traits_type::insert(*static_cast(container), k_ptr ? *k_ptr : key.cast()); + } else { + if(auto *m_ptr = value.try_cast(); m_ptr || value.convert()) { + return traits_type::insert(*static_cast(container), k_ptr ? *k_ptr : key.cast(), m_ptr ? *m_ptr : value.cast()); + } + } + } + + return false; + } + + [[nodiscard]] static bool erase(void *container, meta_any key) { + if(const auto *k_ptr = key.try_cast(); k_ptr || key.convert()) { + return traits_type::erase(*static_cast(container), k_ptr ? *k_ptr : key.cast()); + } + + return false; + } + + [[nodiscard]] static iterator find(void *container, meta_any key) { + if(const auto *k_ptr = key.try_cast(); k_ptr || key.convert()) { + return iterator{is_key_only_meta_associative_container{}, traits_type::find(*static_cast(container), k_ptr ? *k_ptr : key.cast())}; + } + + return {}; + } +}; + + +/** + * @brief Returns true if the associative container is also key-only, false + * otherwise. + * @return True if the associative container is also key-only, false otherwise. + */ +[[nodiscard]] inline bool meta_associative_container::key_only() const ENTT_NOEXCEPT { + return key_only_container; +} + + +/** + * @brief Returns the key meta type of the wrapped container type. + * @return The key meta type of the wrapped container type. + */ +[[nodiscard]] inline meta_type meta_associative_container::key_type() const ENTT_NOEXCEPT { + return key_type_fn(); +} + + +/** + * @brief Returns the mapped meta type of the wrapped container type. + * @return The mapped meta type of the wrapped container type. + */ +[[nodiscard]] inline meta_type meta_associative_container::mapped_type() const ENTT_NOEXCEPT { + return mapped_type_fn(); +} + + +/*! @copydoc meta_sequence_container::value_type */ +[[nodiscard]] inline meta_type meta_associative_container::value_type() const ENTT_NOEXCEPT { + return value_type_fn(); +} + + +/*! @copydoc meta_sequence_container::size */ +[[nodiscard]] inline meta_associative_container::size_type meta_associative_container::size() const ENTT_NOEXCEPT { + return size_fn(instance); +} + + +/*! @copydoc meta_sequence_container::clear */ +inline bool meta_associative_container::clear() { + return clear_fn(instance); +} + + +/*! @copydoc meta_sequence_container::begin */ +[[nodiscard]] inline meta_associative_container::iterator meta_associative_container::begin() { + return begin_fn(instance); +} + + +/*! @copydoc meta_sequence_container::end */ +[[nodiscard]] inline meta_associative_container::iterator meta_associative_container::end() { + return end_fn(instance); +} + + +/** + * @brief Inserts an element (a key/value pair) into the wrapped container. + * @param key The key of the element to insert. + * @param value The value of the element to insert. + * @return A bool denoting whether the insertion took place. + */ +inline bool meta_associative_container::insert(meta_any key, meta_any value = {}) { + return insert_fn(instance, key.ref(), value.ref()); +} + + +/** + * @brief Removes the specified element from the wrapped container. + * @param key The key of the element to remove. + * @return A bool denoting whether the removal took place. + */ +inline bool meta_associative_container::erase(meta_any key) { + return erase_fn(instance, key.ref()); +} + + +/** + * @brief Returns an iterator to the element with key equivalent to a given + * one, if any. + * @param key The key of the element to search. + * @return An iterator to the element with the given key, if any. + */ +[[nodiscard]] inline meta_associative_container::iterator meta_associative_container::find(meta_any key) { + return find_fn(instance, key.ref()); +} + + +/** + * @brief Returns false if a proxy is invalid, true otherwise. + * @return False if the proxy is invalid, true otherwise. + */ +[[nodiscard]] inline meta_associative_container::operator bool() const ENTT_NOEXCEPT { + return (instance != nullptr); +} + + +} + + +#endif + +// #include "policy.hpp" +#ifndef ENTT_META_POLICY_HPP +#define ENTT_META_POLICY_HPP + + +namespace entt { + + +/*! @brief Empty class type used to request the _as ref_ policy. */ +struct as_ref_t {}; + + +/*! @brief Disambiguation tag. */ +inline constexpr as_ref_t as_ref; + + +/*! @brief Empty class type used to request the _as-is_ policy. */ +struct as_is_t {}; + + +/*! @brief Empty class type used to request the _as void_ policy. */ +struct as_void_t {}; + + +} + + +#endif + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +struct meta_function_helper; + + +template +struct meta_function_helper { + using return_type = std::remove_cv_t>; + using args_type = std::tuple>...>; + + static constexpr auto is_const = Const; + + [[nodiscard]] static auto arg(typename internal::meta_func_node::size_type index) ENTT_NOEXCEPT { + return std::array{{meta_info::resolve()...}}[index]; + } +}; + + +template +constexpr meta_function_helper +to_meta_function_helper(Ret(Class:: *)(Args...) const); + + +template +constexpr meta_function_helper +to_meta_function_helper(Ret(Class:: *)(Args...)); + + +template +constexpr meta_function_helper +to_meta_function_helper(Ret(*)(Args...)); + + +constexpr void to_meta_function_helper(...); + + +template +using meta_function_helper_t = decltype(to_meta_function_helper(std::declval())); + + +template +[[nodiscard]] meta_any construct(meta_any * const args, std::index_sequence) { + [[maybe_unused]] auto direct = std::make_tuple((args+Indexes)->try_cast()...); + return ((std::get(direct) || (args+Indexes)->convert()) && ...) + ? Type{(std::get(direct) ? *std::get(direct) : (args+Indexes)->cast())...} + : meta_any{}; +} + + +template +[[nodiscard]] bool setter([[maybe_unused]] meta_handle instance, [[maybe_unused]] meta_any value) { + bool accepted = false; + + if constexpr(std::is_function_v>> || std::is_member_function_pointer_v) { + using helper_type = meta_function_helper_t; + using data_type = std::tuple_element_t, typename helper_type::args_type>; + + if(auto * const clazz = instance->try_cast(); clazz) { + if(auto * const direct = value.try_cast(); direct || value.convert()) { + std::invoke(Data, *clazz, direct ? *direct : value.cast()); + accepted = true; + } + } + } else if constexpr(std::is_member_object_pointer_v) { + using data_type = std::remove_cv_t().*Data)>>; + + if constexpr(!std::is_array_v) { + if(auto * const clazz = instance->try_cast(); clazz) { + if(auto * const direct = value.try_cast(); direct || value.convert()) { + std::invoke(Data, clazz) = (direct ? *direct : value.cast()); + accepted = true; + } + } + } + } else { + using data_type = std::remove_cv_t>; + + if constexpr(!std::is_array_v) { + if(auto * const direct = value.try_cast(); direct || value.convert()) { + *Data = (direct ? *direct : value.cast()); + accepted = true; + } + } + } + + return accepted; +} + + +template +[[nodiscard]] meta_any getter([[maybe_unused]] meta_handle instance) { + [[maybe_unused]] auto dispatch = [](auto &&value) { + if constexpr(std::is_same_v) { + return meta_any{std::in_place_type, std::forward(value)}; + } else if constexpr(std::is_same_v) { + return meta_any{std::ref(std::forward(value))}; + } else { + static_assert(std::is_same_v, "Policy not supported"); + return meta_any{std::forward(value)}; + } + }; + + if constexpr(std::is_function_v>> || std::is_member_function_pointer_v) { + auto * const clazz = instance->try_cast(); + return clazz ? dispatch(std::invoke(Data, *clazz)) : meta_any{}; + } else if constexpr(std::is_member_object_pointer_v) { + if constexpr(std::is_array_v().*Data)>>>) { + return meta_any{}; + } else { + auto * const clazz = instance->try_cast(); + return clazz ? dispatch(std::invoke(Data, clazz)) : meta_any{}; + } + } else if constexpr(std::is_pointer_v>) { + if constexpr(std::is_array_v>) { + return meta_any{}; + } else { + return dispatch(*Data); + } + } else { + return dispatch(Data); + } +} + + +template +[[nodiscard]] meta_any invoke([[maybe_unused]] meta_handle instance, meta_any *args, std::index_sequence) { + using helper_type = meta_function_helper_t; + + auto dispatch = [](auto *... params) { + if constexpr(std::is_void_v || std::is_same_v) { + std::invoke(Candidate, *params...); + return meta_any{std::in_place_type}; + } else if constexpr(std::is_same_v) { + return meta_any{std::ref(std::invoke(Candidate, *params...))}; + } else { + static_assert(std::is_same_v, "Policy not supported"); + return meta_any{std::invoke(Candidate, *params...)}; + } + }; + + [[maybe_unused]] const auto direct = std::make_tuple([](meta_any *any, auto *value) { + using arg_type = std::remove_reference_t; + + if(!value && any->convert()) { + value = any->try_cast(); + } + + return value; + }(args+Indexes, (args+Indexes)->try_cast>())...); + + if constexpr(std::is_function_v>>) { + return (std::get(direct) && ...) ? dispatch(std::get(direct)...) : meta_any{}; + } else { + auto * const clazz = instance->try_cast(); + return (clazz && (std::get(direct) && ...)) ? dispatch(clazz, std::get(direct)...) : meta_any{}; + } +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Meta factory to be used for reflection purposes. + * + * The meta factory is an utility class used to reflect types, data members and + * functions of all sorts. This class ensures that the underlying web of types + * is built correctly and performs some checks in debug mode to ensure that + * there are no subtle errors at runtime. + */ +template +class meta_factory; + + +/** + * @brief Extended meta factory to be used for reflection purposes. + * @tparam Type Reflected type for which the factory was created. + * @tparam Spec Property specialization pack used to disambiguate overloads. + */ +template +class meta_factory: public meta_factory { + [[nodiscard]] bool exists(const meta_any &key, const internal::meta_prop_node *node) ENTT_NOEXCEPT { + return node && (node->key() == key || exists(key, node->next)); + } + + template + void unpack(std::index_sequence, std::tuple property, Other &&... other) { + unroll(choice<3>, std::move(std::get(property))..., std::forward(other)...); + } + + template + void unroll(choice_t<3>, std::tuple property, Other &&... other) { + unpack(std::index_sequence_for{}, std::move(property), std::forward(other)...); + } + + template + void unroll(choice_t<2>, std::pair property, Other &&... other) { + assign(std::move(property.first), std::move(property.second)); + unroll(choice<3>, std::forward(other)...); + } + + template + std::enable_if_t> + unroll(choice_t<1>, Property &&property, Other &&... other) { + assign(std::forward(property)); + unroll(choice<3>, std::forward(other)...); + } + + template + void unroll(choice_t<0>, Func &&invocable, Other &&... other) { + unroll(choice<3>, std::forward(invocable)(), std::forward(other)...); + } + + template + void unroll(choice_t<0>) {} + + template + void assign(Key &&key, Value &&... value) { + static const auto property{std::make_tuple(std::forward(key), std::forward(value)...)}; + + static internal::meta_prop_node node{ + nullptr, + []() -> meta_any { + return std::get<0>(property); + }, + []() -> meta_any { + if constexpr(sizeof...(Value) == 0) { + return {}; + } else { + return std::get<1>(property); + } + } + }; + + ENTT_ASSERT(!exists(node.key(), *curr)); + node.next = *curr; + *curr = &node; + } + +public: + /** + * @brief Constructs an extended factory from a given node. + * @param target The underlying node to which to assign the properties. + */ + meta_factory(internal::meta_prop_node **target) ENTT_NOEXCEPT + : curr{target} + {} + + /** + * @brief Assigns a property to the last meta object created. + * + * Both the key and the value (if any) must be at least copy constructible. + * + * @tparam PropertyOrKey Type of the property or property key. + * @tparam Value Optional type of the property value. + * @param property_or_key Property or property key. + * @param value Optional property value. + * @return A meta factory for the parent type. + */ + template + auto prop(PropertyOrKey &&property_or_key, Value &&... value) && { + if constexpr(sizeof...(Value) == 0) { + unroll(choice<3>, std::forward(property_or_key)); + } else { + assign(std::forward(property_or_key), std::forward(value)...); + } + + return meta_factory{curr}; + } + + /** + * @brief Assigns properties to the last meta object created. + * + * Both the keys and the values (if any) must be at least copy + * constructible. + * + * @tparam Property Types of the properties. + * @param property Properties to assign to the last meta object created. + * @return A meta factory for the parent type. + */ + template + auto props(Property... property) && { + unroll(choice<3>, std::forward(property)...); + return meta_factory{curr}; + } + +private: + internal::meta_prop_node **curr; +}; + + +/** + * @brief Basic meta factory to be used for reflection purposes. + * @tparam Type Reflected type for which the factory was created. + */ +template +class meta_factory { + template + bool exists(const Node *candidate, const Node *node) ENTT_NOEXCEPT { + return node && (node == candidate || exists(candidate, node->next)); + } + + template + bool exists(const id_type id, const Node *node) ENTT_NOEXCEPT { + return node && (node->id == id || exists(id, node->next)); + } + +public: + /** + * @brief Makes a meta type _searchable_. + * @param id Optional unique identifier. + * @return An extended meta factory for the given type. + */ + auto type(const id_type id = type_info::id()) { + auto * const node = internal::meta_info::resolve(); + + ENTT_ASSERT(!exists(id, *internal::meta_context::global())); + ENTT_ASSERT(!exists(node, *internal::meta_context::global())); + node->id = id; + node->next = *internal::meta_context::global(); + *internal::meta_context::global() = node; + + return meta_factory{&node->prop}; + } + + /** + * @brief Assigns a meta base to a meta type. + * + * A reflected base class must be a real base class of the reflected type. + * + * @tparam Base Type of the base class to assign to the meta type. + * @return A meta factory for the parent type. + */ + template + auto base() ENTT_NOEXCEPT { + static_assert(std::is_base_of_v, "Invalid base type"); + auto * const type = internal::meta_info::resolve(); + + static internal::meta_base_node node{ + type, + nullptr, + &internal::meta_info::resolve, + [](const void *instance) ENTT_NOEXCEPT -> const void * { + return static_cast(static_cast(instance)); + } + }; + + ENTT_ASSERT(!exists(&node, type->base)); + node.next = type->base; + type->base = &node; + + return meta_factory{}; + } + + /** + * @brief Assigns a meta conversion function to a meta type. + * + * The given type must be such that an instance of the reflected type can be + * converted to it. + * + * @tparam To Type of the conversion function to assign to the meta type. + * @return A meta factory for the parent type. + */ + template + auto conv() ENTT_NOEXCEPT { + static_assert(std::is_convertible_v, "Could not convert to the required type"); + auto * const type = internal::meta_info::resolve(); + + static internal::meta_conv_node node{ + type, + nullptr, + &internal::meta_info::resolve, + [](const void *instance) -> meta_any { + return static_cast(*static_cast(instance)); + } + }; + + ENTT_ASSERT(!exists(&node, type->conv)); + node.next = type->conv; + type->conv = &node; + + return meta_factory{}; + } + + /** + * @brief Assigns a meta conversion function to a meta type. + * + * Conversion functions can be either free functions or member + * functions.
+ * In case of free functions, they must accept a const reference to an + * instance of the parent type as an argument. In case of member functions, + * they should have no arguments at all. + * + * @tparam Candidate The actual function to use for the conversion. + * @return A meta factory for the parent type. + */ + template + auto conv() ENTT_NOEXCEPT { + using conv_type = std::invoke_result_t; + auto * const type = internal::meta_info::resolve(); + + static internal::meta_conv_node node{ + type, + nullptr, + &internal::meta_info::resolve, + [](const void *instance) -> meta_any { + return std::invoke(Candidate, *static_cast(instance)); + } + }; + + ENTT_ASSERT(!exists(&node, type->conv)); + node.next = type->conv; + type->conv = &node; + + return meta_factory{}; + } + + /** + * @brief Assigns a meta constructor to a meta type. + * + * Free functions can be assigned to meta types in the role of constructors. + * All that is required is that they return an instance of the underlying + * type.
+ * From a client's point of view, nothing changes if a constructor of a meta + * type is a built-in one or a free function. + * + * @tparam Func The actual function to use as a constructor. + * @tparam Policy Optional policy (no policy set by default). + * @return An extended meta factory for the parent type. + */ + template + auto ctor() ENTT_NOEXCEPT { + using helper_type = internal::meta_function_helper_t; + static_assert(std::is_same_v, "The function doesn't return an object of the required type"); + auto * const type = internal::meta_info::resolve(); + + static internal::meta_ctor_node node{ + type, + nullptr, + nullptr, + std::tuple_size_v, + &helper_type::arg, + [](meta_any * const any) { + return internal::invoke({}, any, std::make_index_sequence>{}); + } + }; + + ENTT_ASSERT(!exists(&node, type->ctor)); + node.next = type->ctor; + type->ctor = &node; + + return meta_factory>{&node.prop}; + } + + /** + * @brief Assigns a meta constructor to a meta type. + * + * A meta constructor is uniquely identified by the types of its arguments + * and is such that there exists an actual constructor of the underlying + * type that can be invoked with parameters whose types are those given. + * + * @tparam Args Types of arguments to use to construct an instance. + * @return An extended meta factory for the parent type. + */ + template + auto ctor() ENTT_NOEXCEPT { + using helper_type = internal::meta_function_helper_t; + auto * const type = internal::meta_info::resolve(); + + static internal::meta_ctor_node node{ + type, + nullptr, + nullptr, + std::tuple_size_v, + &helper_type::arg, + [](meta_any * const any) { + return internal::construct>...>(any, std::make_index_sequence>{}); + } + }; + + ENTT_ASSERT(!exists(&node, type->ctor)); + node.next = type->ctor; + type->ctor = &node; + + return meta_factory{&node.prop}; + } + + /** + * @brief Assigns a meta destructor to a meta type. + * + * Free functions can be assigned to meta types in the role of destructors. + * The signature of the function should identical to the following: + * + * @code{.cpp} + * void(Type &); + * @endcode + * + * The purpose is to give users the ability to free up resources that + * require special treatment before an object is actually destroyed. + * + * @tparam Func The actual function to use as a destructor. + * @return A meta factory for the parent type. + */ + template + auto dtor() ENTT_NOEXCEPT { + static_assert(std::is_invocable_v, "The function doesn't accept an object of the type provided"); + auto * const type = internal::meta_info::resolve(); + + ENTT_ASSERT(!type->dtor); + + type->dtor = [](void *instance) { + if(instance) { + std::invoke(Func, *static_cast(instance)); + } + }; + + return meta_factory{}; + } + + /** + * @brief Assigns a meta data to a meta type. + * + * Both data members and static and global variables, as well as constants + * of any kind, can be assigned to a meta type.
+ * From a client's point of view, all the variables associated with the + * reflected object will appear as if they were part of the type itself. + * + * @tparam Data The actual variable to attach to the meta type. + * @tparam Policy Optional policy (no policy set by default). + * @param id Unique identifier. + * @return An extended meta factory for the parent type. + */ + template + auto data(const id_type id) ENTT_NOEXCEPT { + if constexpr(std::is_member_object_pointer_v) { + return data(id); + } else { + using data_type = std::remove_pointer_t>; + auto * const type = internal::meta_info::resolve(); + + static internal::meta_data_node node{ + {}, + type, + nullptr, + nullptr, + true, + &internal::meta_info::resolve, + []() -> std::remove_const_t { + if constexpr(std::is_same_v || std::is_const_v) { + return nullptr; + } else { + return &internal::setter; + } + }(), + &internal::getter + }; + + ENTT_ASSERT(!exists(id, type->data)); + ENTT_ASSERT(!exists(&node, type->data)); + node.id = id; + node.next = type->data; + type->data = &node; + + return meta_factory>{&node.prop}; + } + } + + /** + * @brief Assigns a meta data to a meta type by means of its setter and + * getter. + * + * Setters and getters can be either free functions, member functions or a + * mix of them.
+ * In case of free functions, setters and getters must accept a reference to + * an instance of the parent type as their first argument. A setter has then + * an extra argument of a type convertible to that of the parameter to + * set.
+ * In case of member functions, getters have no arguments at all, while + * setters has an argument of a type convertible to that of the parameter to + * set. + * + * @tparam Setter The actual function to use as a setter. + * @tparam Getter The actual function to use as a getter. + * @tparam Policy Optional policy (no policy set by default). + * @param id Unique identifier. + * @return An extended meta factory for the parent type. + */ + template + auto data(const id_type id) ENTT_NOEXCEPT { + using underlying_type = std::remove_reference_t>; + auto * const type = internal::meta_info::resolve(); + + static internal::meta_data_node node{ + {}, + type, + nullptr, + nullptr, + false, + &internal::meta_info::resolve, + []() -> std::remove_const_t { + if constexpr(std::is_same_v || (std::is_member_object_pointer_v && std::is_const_v)) { + return nullptr; + } else { + return &internal::setter; + } + }(), + &internal::getter + }; + + ENTT_ASSERT(!exists(id, type->data)); + ENTT_ASSERT(!exists(&node, type->data)); + node.id = id; + node.next = type->data; + type->data = &node; + + return meta_factory, std::integral_constant>{&node.prop}; + } + + /** + * @brief Assigns a meta funcion to a meta type. + * + * Both member functions and free functions can be assigned to a meta + * type.
+ * From a client's point of view, all the functions associated with the + * reflected object will appear as if they were part of the type itself. + * + * @tparam Candidate The actual function to attach to the meta type. + * @tparam Policy Optional policy (no policy set by default). + * @param id Unique identifier. + * @return An extended meta factory for the parent type. + */ + template + auto func(const id_type id) ENTT_NOEXCEPT { + using helper_type = internal::meta_function_helper_t; + auto * const type = internal::meta_info::resolve(); + + static internal::meta_func_node node{ + {}, + type, + nullptr, + nullptr, + std::tuple_size_v, + helper_type::is_const, + !std::is_member_function_pointer_v, + &internal::meta_info, void, typename helper_type::return_type>>::resolve, + &helper_type::arg, + [](meta_handle instance, meta_any *args) { + return internal::invoke(*instance, args, std::make_index_sequence>{}); + } + }; + + ENTT_ASSERT(!exists(id, type->func)); + ENTT_ASSERT(!exists(&node, type->func)); + node.id = id; + node.next = type->func; + type->func = &node; + + return meta_factory>{&node.prop}; + } +}; + + +/** + * @brief Utility function to use for reflection. + * + * This is the point from which everything starts.
+ * By invoking this function with a type that is not yet reflected, a meta type + * is created to which it will be possible to attach meta objects through a + * dedicated factory. + * + * @tparam Type Type to reflect. + * @return A meta factory for the given type. + */ +template +[[nodiscard]] auto meta() ENTT_NOEXCEPT { + auto * const node = internal::meta_info::resolve(); + // extended meta factory to allow assigning properties to opaque meta types + return meta_factory{&node->prop}; +} + + +} + + +#endif + +// #include "meta/internal.hpp" + +// #include "meta/meta.hpp" + +// #include "meta/pointer.hpp" +#ifndef ENTT_META_POINTER_HPP +#define ENTT_META_POINTER_HPP + +#include +#include +// #include "type_traits.hpp" + + + +namespace entt { + + +/** + * @brief Makes plain pointers pointer-like types for the meta system. + * @tparam Type Element type. + */ +template +struct is_meta_pointer_like + : std::true_type +{}; + + +/** + * @brief Makes `std::shared_ptr`s of any type pointer-like types for the meta + * system. + * @tparam Type Element type. + */ +template +struct is_meta_pointer_like> + : std::true_type +{}; + + +/** + * @brief Makes `std::unique_ptr`s of any type pointer-like types for the meta + * system. + * @tparam Type Element type. + * @tparam Args Other arguments. + */ +template +struct is_meta_pointer_like> + : std::true_type +{}; + + +} + + +#endif + +// #include "meta/policy.hpp" + +// #include "meta/range.hpp" + +// #include "meta/resolve.hpp" +#ifndef ENTT_META_RESOLVE_HPP +#define ENTT_META_RESOLVE_HPP + + +#include +// #include "ctx.hpp" + +// #include "meta.hpp" + +// #include "range.hpp" + + + +namespace entt { + + +/** + * @brief Returns the meta type associated with a given type. + * @tparam Type Type to use to search for a meta type. + * @return The meta type associated with the given type, if any. + */ +template +[[nodiscard]] meta_type resolve() ENTT_NOEXCEPT { + return internal::meta_info::resolve(); +} + + +/** + * @brief Returns a range to use to visit all meta types. + * @return An iterable range to use to visit all meta types. + */ +[[nodiscard]] inline meta_range resolve() { + return *internal::meta_context::global(); +} + + +/** + * @brief Returns the meta type associated with a given identifier, if any. + * @param id Unique identifier. + * @return The meta type associated with the given identifier, if any. + */ +[[nodiscard]] inline meta_type resolve_id(const id_type id) ENTT_NOEXCEPT { + internal::meta_range range{*internal::meta_context::global()}; + return std::find_if(range.begin(), range.end(), [id](const auto &curr) { return curr.id == id; }).operator->(); +} + + +/** + * @brief Returns the meta type associated with a given type id, if any. + * @param id Unique identifier. + * @return The meta type associated with the given type id, if any. + */ +[[nodiscard]] inline meta_type resolve_type(const id_type id) ENTT_NOEXCEPT { + internal::meta_range range{*internal::meta_context::global()}; + return std::find_if(range.begin(), range.end(), [id](const auto &curr) { return curr.type_id == id; }).operator->(); +} + + +} + + +#endif + +// #include "meta/type_traits.hpp" + +// #include "platform/android-ndk-r17.hpp" +#ifndef ENTT_PLATFORM_ANDROID_NDK_R17_HPP +#define ENTT_PLATFORM_ANDROID_NDK_R17_HPP + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +#ifdef __ANDROID__ +#include +#if __NDK_MAJOR__ == 17 + + +#include +#include +#include + + +namespace std { + + +namespace internal { + + +template +constexpr auto is_invocable(int) -> decltype(std::invoke(std::declval(), std::declval()...), std::true_type{}); + + +template +constexpr std::false_type is_invocable(...); + + +template +constexpr auto is_invocable_r(int) +-> std::enable_if_t(), std::declval()...)), Ret>, std::true_type>; + + +template +constexpr std::false_type is_invocable_r(...); + + +} + + +template +struct is_invocable: decltype(internal::is_invocable(0)) {}; + + +template +inline constexpr bool is_invocable_v = std::is_invocable::value; + + +template +struct is_invocable_r: decltype(internal::is_invocable_r(0)) {}; + + +template +inline constexpr bool is_invocable_r_v = std::is_invocable_r::value; + + +template +struct invoke_result { + using type = decltype(std::invoke(std::declval(), std::declval()...)); +}; + + +template +using invoke_result_t = typename std::invoke_result::type; + + +} + + +#endif +#endif + + +/** + * Internal details not to be documented. + * @endcond + */ + + +#endif + +// #include "process/process.hpp" +#ifndef ENTT_PROCESS_PROCESS_HPP +#define ENTT_PROCESS_PROCESS_HPP + + +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + +// #include "../core/type_traits.hpp" +#ifndef ENTT_CORE_TYPE_TRAITS_HPP +#define ENTT_CORE_TYPE_TRAITS_HPP + + +#include +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + +// #include "fwd.hpp" +#ifndef ENTT_CORE_FWD_HPP +#define ENTT_CORE_FWD_HPP + + +// #include "../config/config.h" + + + +namespace entt { + + +/*! @brief Alias declaration for type identifiers. */ +using id_type = ENTT_ID_TYPE; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Using declaration to be used to _repeat_ the same type a number of + * times equal to the size of a given parameter pack. + * @tparam Type A type to repeat. + */ +template +using unpack_as_t = Type; + + +/** + * @brief Helper variable template to be used to _repeat_ the same value a + * number of times equal to the size of a given parameter pack. + * @tparam Value A value to repeat. + */ +template +inline constexpr auto unpack_as_v = Value; + + +/** + * @brief Wraps a static constant. + * @tparam Value A static constant. + */ +template +using integral_constant = std::integral_constant; + + +/** + * @brief Alias template to ease the creation of named values. + * @tparam Value A constant value at least convertible to `id_type`. + */ +template +using tag = integral_constant; + + +/** + * @brief Utility class to disambiguate overloaded functions. + * @tparam N Number of choices available. + */ +template +struct choice_t + // Unfortunately, doxygen cannot parse such a construct. + /*! @cond TURN_OFF_DOXYGEN */ + : choice_t + /*! @endcond */ +{}; + + +/*! @copybrief choice_t */ +template<> +struct choice_t<0> {}; + + +/** + * @brief Variable template for the choice trick. + * @tparam N Number of choices available. + */ +template +inline constexpr choice_t choice{}; + + +/*! @brief A class to use to push around lists of types, nothing more. */ +template +struct type_list {}; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_size; + + +/** + * @brief Compile-time number of elements in a type list. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_size> + : std::integral_constant +{}; + + +/** + * @brief Helper variable template. + * @tparam List Type list. + */ +template +inline constexpr auto type_list_size_v = type_list_size::value; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_cat; + + +/*! @brief Concatenates multiple type lists. */ +template<> +struct type_list_cat<> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list<>; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the first type list. + * @tparam Other Types provided by the second type list. + * @tparam List Other type lists, if any. + */ +template +struct type_list_cat, type_list, List...> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = typename type_list_cat, List...>::type; +}; + + +/** + * @brief Concatenates multiple type lists. + * @tparam Type Types provided by the type list. + */ +template +struct type_list_cat> { + /*! @brief A type list composed by the types of all the type lists. */ + using type = type_list; +}; + + +/** + * @brief Helper type. + * @tparam List Type lists to concatenate. + */ +template +using type_list_cat_t = typename type_list_cat::type; + + +/*! @brief Primary template isn't defined on purpose. */ +template +struct type_list_unique; + + +/** + * @brief Removes duplicates types from a type list. + * @tparam Type One of the types provided by the given type list. + * @tparam Other The other types provided by the given type list. + */ +template +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = std::conditional_t< + std::disjunction_v...>, + typename type_list_unique>::type, + type_list_cat_t, typename type_list_unique>::type> + >; +}; + + +/*! @brief Removes duplicates types from a type list. */ +template<> +struct type_list_unique> { + /*! @brief A type list without duplicate types. */ + using type = type_list<>; +}; + + +/** + * @brief Helper type. + * @tparam Type A type list. + */ +template +using type_list_unique_t = typename type_list_unique::type; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * equality comparable, false otherwise. + * @tparam Type Potentially equality comparable type. + */ +template> +struct is_equality_comparable: std::false_type {}; + + +/*! @copydoc is_equality_comparable */ +template +struct is_equality_comparable() == std::declval())>> + : std::true_type +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially equality comparable type. + */ +template +inline constexpr auto is_equality_comparable_v = is_equality_comparable::value; + + +/** + * @brief Provides the member constant `value` to true if a given type is empty + * and the empty type optimization is enabled, false otherwise. + * @tparam Type Potential empty type. + */ +template +struct is_eto_eligible + : ENTT_IS_EMPTY(Type) +{}; + + +/** + * @brief Helper variable template. + * @tparam Type Potential empty type. + */ +template +inline constexpr auto is_eto_eligible_v = is_eto_eligible::value; + + +/** + * @brief Extracts the class of a non-static member object or function. + * @tparam Member A pointer to a non-static member object or function. + */ +template +class member_class { + static_assert(std::is_member_pointer_v, "Invalid pointer type to non-static member object or function"); + + template + static Class * clazz(Ret(Class:: *)(Args...)); + + template + static Class * clazz(Ret(Class:: *)(Args...) const); + + template + static Class * clazz(Type Class:: *); + +public: + /*! @brief The class of the given non-static member object or function. */ + using type = std::remove_pointer_t()))>; +}; + + +/** + * @brief Helper type. + * @tparam Member A pointer to a non-static member object or function. + */ +template +using member_class_t = typename member_class::type; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Base class for processes. + * + * This class stays true to the CRTP idiom. Derived classes must specify what's + * the intended type for elapsed times.
+ * A process should expose publicly the following member functions whether + * required: + * + * * @code{.cpp} + * void update(Delta, void *); + * @endcode + * + * It's invoked once per tick until a process is explicitly aborted or it + * terminates either with or without errors. Even though it's not mandatory to + * declare this member function, as a rule of thumb each process should at + * least define it to work properly. The `void *` parameter is an opaque + * pointer to user data (if any) forwarded directly to the process during an + * update. + * + * * @code{.cpp} + * void init(); + * @endcode + * + * It's invoked when the process joins the running queue of a scheduler. This + * happens as soon as it's attached to the scheduler if the process is a top + * level one, otherwise when it replaces its parent if the process is a + * continuation. + * + * * @code{.cpp} + * void succeeded(); + * @endcode + * + * It's invoked in case of success, immediately after an update and during the + * same tick. + * + * * @code{.cpp} + * void failed(); + * @endcode + * + * It's invoked in case of errors, immediately after an update and during the + * same tick. + * + * * @code{.cpp} + * void aborted(); + * @endcode + * + * It's invoked only if a process is explicitly aborted. There is no guarantee + * that it executes in the same tick, this depends solely on whether the + * process is aborted immediately or not. + * + * Derived classes can change the internal state of a process by invoking the + * `succeed` and `fail` protected member functions and even pause or unpause the + * process itself. + * + * @sa scheduler + * + * @tparam Derived Actual type of process that extends the class template. + * @tparam Delta Type to use to provide elapsed time. + */ +template +class process { + enum class state: unsigned int { + UNINITIALIZED = 0, + RUNNING, + PAUSED, + SUCCEEDED, + FAILED, + ABORTED, + FINISHED + }; + + template + auto next(integral_constant) + -> decltype(std::declval().init(), void()) { + static_cast(this)->init(); + } + + template + auto next(integral_constant, Delta delta, void *data) + -> decltype(std::declval().update(delta, data), void()) { + static_cast(this)->update(delta, data); + } + + template + auto next(integral_constant) + -> decltype(std::declval().succeeded(), void()) { + static_cast(this)->succeeded(); + } + + template + auto next(integral_constant) + -> decltype(std::declval().failed(), void()) { + static_cast(this)->failed(); + } + + template + auto next(integral_constant) + -> decltype(std::declval().aborted(), void()) { + static_cast(this)->aborted(); + } + + void next(...) const ENTT_NOEXCEPT {} + +protected: + /** + * @brief Terminates a process with success if it's still alive. + * + * The function is idempotent and it does nothing if the process isn't + * alive. + */ + void succeed() ENTT_NOEXCEPT { + if(alive()) { + current = state::SUCCEEDED; + } + } + + /** + * @brief Terminates a process with errors if it's still alive. + * + * The function is idempotent and it does nothing if the process isn't + * alive. + */ + void fail() ENTT_NOEXCEPT { + if(alive()) { + current = state::FAILED; + } + } + + /** + * @brief Stops a process if it's in a running state. + * + * The function is idempotent and it does nothing if the process isn't + * running. + */ + void pause() ENTT_NOEXCEPT { + if(current == state::RUNNING) { + current = state::PAUSED; + } + } + + /** + * @brief Restarts a process if it's paused. + * + * The function is idempotent and it does nothing if the process isn't + * paused. + */ + void unpause() ENTT_NOEXCEPT { + if(current == state::PAUSED) { + current = state::RUNNING; + } + } + +public: + /*! @brief Type used to provide elapsed time. */ + using delta_type = Delta; + + /*! @brief Default destructor. */ + virtual ~process() { + static_assert(std::is_base_of_v, "Incorrect use of the class template"); + } + + /** + * @brief Aborts a process if it's still alive. + * + * The function is idempotent and it does nothing if the process isn't + * alive. + * + * @param immediately Requests an immediate operation. + */ + void abort(const bool immediately = false) { + if(alive()) { + current = state::ABORTED; + + if(immediately) { + tick({}); + } + } + } + + /** + * @brief Returns true if a process is either running or paused. + * @return True if the process is still alive, false otherwise. + */ + [[nodiscard]] bool alive() const ENTT_NOEXCEPT { + return current == state::RUNNING || current == state::PAUSED; + } + + /** + * @brief Returns true if a process is already terminated. + * @return True if the process is terminated, false otherwise. + */ + [[nodiscard]] bool dead() const ENTT_NOEXCEPT { + return current == state::FINISHED; + } + + /** + * @brief Returns true if a process is currently paused. + * @return True if the process is paused, false otherwise. + */ + [[nodiscard]] bool paused() const ENTT_NOEXCEPT { + return current == state::PAUSED; + } + + /** + * @brief Returns true if a process terminated with errors. + * @return True if the process terminated with errors, false otherwise. + */ + [[nodiscard]] bool rejected() const ENTT_NOEXCEPT { + return stopped; + } + + /** + * @brief Updates a process and its internal state if required. + * @param delta Elapsed time. + * @param data Optional data. + */ + void tick(const Delta delta, void *data = nullptr) { + switch (current) { + case state::UNINITIALIZED: + next(integral_constant{}); + current = state::RUNNING; + break; + case state::RUNNING: + next(integral_constant{}, delta, data); + break; + default: + // suppress warnings + break; + } + + // if it's dead, it must be notified and removed immediately + switch(current) { + case state::SUCCEEDED: + next(integral_constant{}); + current = state::FINISHED; + break; + case state::FAILED: + next(integral_constant{}); + current = state::FINISHED; + stopped = true; + break; + case state::ABORTED: + next(integral_constant{}); + current = state::FINISHED; + stopped = true; + break; + default: + // suppress warnings + break; + } + } + +private: + state current{state::UNINITIALIZED}; + bool stopped{false}; +}; + + +/** + * @brief Adaptor for lambdas and functors to turn them into processes. + * + * Lambdas and functors can't be used directly with a scheduler for they are not + * properly defined processes with managed life cycles.
+ * This class helps in filling the gap and turning lambdas and functors into + * full featured processes usable by a scheduler. + * + * The signature of the function call operator should be equivalent to the + * following: + * + * @code{.cpp} + * void(Delta delta, void *data, auto succeed, auto fail); + * @endcode + * + * Where: + * + * * `delta` is the elapsed time. + * * `data` is an opaque pointer to user data if any, `nullptr` otherwise. + * * `succeed` is a function to call when a process terminates with success. + * * `fail` is a function to call when a process terminates with errors. + * + * The signature of the function call operator of both `succeed` and `fail` + * is equivalent to the following: + * + * @code{.cpp} + * void(); + * @endcode + * + * Usually users shouldn't worry about creating adaptors. A scheduler will + * create them internally each and avery time a lambda or a functor is used as + * a process. + * + * @sa process + * @sa scheduler + * + * @tparam Func Actual type of process. + * @tparam Delta Type to use to provide elapsed time. + */ +template +struct process_adaptor: process, Delta>, private Func { + /** + * @brief Constructs a process adaptor from a lambda or a functor. + * @tparam Args Types of arguments to use to initialize the actual process. + * @param args Parameters to use to initialize the actual process. + */ + template + process_adaptor(Args &&... args) + : Func{std::forward(args)...} + {} + + /** + * @brief Updates a process and its internal state if required. + * @param delta Elapsed time. + * @param data Optional data. + */ + void update(const Delta delta, void *data) { + Func::operator()(delta, data, [this]() { this->succeed(); }, [this]() { this->fail(); }); + } +}; + + +} + + +#endif + +// #include "process/scheduler.hpp" +#ifndef ENTT_PROCESS_SCHEDULER_HPP +#define ENTT_PROCESS_SCHEDULER_HPP + + +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "process.hpp" + + + +namespace entt { + + +/** + * @brief Cooperative scheduler for processes. + * + * A cooperative scheduler runs processes and helps managing their life cycles. + * + * Each process is invoked once per tick. If a process terminates, it's + * removed automatically from the scheduler and it's never invoked again.
+ * A process can also have a child. In this case, the process is replaced with + * its child when it terminates if it returns with success. In case of errors, + * both the process and its child are discarded. + * + * Example of use (pseudocode): + * + * @code{.cpp} + * scheduler.attach([](auto delta, void *, auto succeed, auto fail) { + * // code + * }).then(arguments...); + * @endcode + * + * In order to invoke all scheduled processes, call the `update` member function + * passing it the elapsed time to forward to the tasks. + * + * @sa process + * + * @tparam Delta Type to use to provide elapsed time. + */ +template +class scheduler { + struct process_handler { + using instance_type = std::unique_ptr; + using update_fn_type = bool(process_handler &, Delta, void *); + using abort_fn_type = void(process_handler &, bool); + using next_type = std::unique_ptr; + + instance_type instance; + update_fn_type *update; + abort_fn_type *abort; + next_type next; + }; + + struct continuation { + continuation(process_handler *ref) + : handler{ref} + { + ENTT_ASSERT(handler); + } + + template + continuation then(Args &&... args) { + static_assert(std::is_base_of_v, Proc>, "Invalid process type"); + auto proc = typename process_handler::instance_type{new Proc{std::forward(args)...}, &scheduler::deleter}; + handler->next.reset(new process_handler{std::move(proc), &scheduler::update, &scheduler::abort, nullptr}); + handler = handler->next.get(); + return *this; + } + + template + continuation then(Func &&func) { + return then, Delta>>(std::forward(func)); + } + + private: + process_handler *handler; + }; + + template + [[nodiscard]] static bool update(process_handler &handler, const Delta delta, void *data) { + auto *process = static_cast(handler.instance.get()); + process->tick(delta, data); + + auto dead = process->dead(); + + if(dead) { + if(handler.next && !process->rejected()) { + handler = std::move(*handler.next); + // forces the process to exit the uninitialized state + dead = handler.update(handler, {}, nullptr); + } else { + handler.instance.reset(); + } + } + + return dead; + } + + template + static void abort(process_handler &handler, const bool immediately) { + static_cast(handler.instance.get())->abort(immediately); + } + + template + static void deleter(void *proc) { + delete static_cast(proc); + } + +public: + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + + /*! @brief Default constructor. */ + scheduler() = default; + + /*! @brief Default move constructor. */ + scheduler(scheduler &&) = default; + + /*! @brief Default move assignment operator. @return This scheduler. */ + scheduler & operator=(scheduler &&) = default; + + /** + * @brief Number of processes currently scheduled. + * @return Number of processes currently scheduled. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return handlers.size(); + } + + /** + * @brief Returns true if at least a process is currently scheduled. + * @return True if there are scheduled processes, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return handlers.empty(); + } + + /** + * @brief Discards all scheduled processes. + * + * Processes aren't aborted. They are discarded along with their children + * and never executed again. + */ + void clear() { + handlers.clear(); + } + + /** + * @brief Schedules a process for the next tick. + * + * Returned value is an opaque object that can be used to attach a child to + * the given process. The child is automatically scheduled when the process + * terminates and only if the process returns with success. + * + * Example of use (pseudocode): + * + * @code{.cpp} + * // schedules a task in the form of a process class + * scheduler.attach(arguments...) + * // appends a child in the form of a lambda function + * .then([](auto delta, void *, auto succeed, auto fail) { + * // code + * }) + * // appends a child in the form of another process class + * .then(); + * @endcode + * + * @tparam Proc Type of process to schedule. + * @tparam Args Types of arguments to use to initialize the process. + * @param args Parameters to use to initialize the process. + * @return An opaque object to use to concatenate processes. + */ + template + auto attach(Args &&... args) { + static_assert(std::is_base_of_v, Proc>, "Invalid process type"); + auto proc = typename process_handler::instance_type{new Proc{std::forward(args)...}, &scheduler::deleter}; + process_handler handler{std::move(proc), &scheduler::update, &scheduler::abort, nullptr}; + // forces the process to exit the uninitialized state + handler.update(handler, {}, nullptr); + return continuation{&handlers.emplace_back(std::move(handler))}; + } + + /** + * @brief Schedules a process for the next tick. + * + * A process can be either a lambda or a functor. The scheduler wraps both + * of them in a process adaptor internally.
+ * The signature of the function call operator should be equivalent to the + * following: + * + * @code{.cpp} + * void(Delta delta, void *data, auto succeed, auto fail); + * @endcode + * + * Where: + * + * * `delta` is the elapsed time. + * * `data` is an opaque pointer to user data if any, `nullptr` otherwise. + * * `succeed` is a function to call when a process terminates with success. + * * `fail` is a function to call when a process terminates with errors. + * + * The signature of the function call operator of both `succeed` and `fail` + * is equivalent to the following: + * + * @code{.cpp} + * void(); + * @endcode + * + * Returned value is an opaque object that can be used to attach a child to + * the given process. The child is automatically scheduled when the process + * terminates and only if the process returns with success. + * + * Example of use (pseudocode): + * + * @code{.cpp} + * // schedules a task in the form of a lambda function + * scheduler.attach([](auto delta, void *, auto succeed, auto fail) { + * // code + * }) + * // appends a child in the form of another lambda function + * .then([](auto delta, void *, auto succeed, auto fail) { + * // code + * }) + * // appends a child in the form of a process class + * .then(arguments...); + * @endcode + * + * @sa process_adaptor + * + * @tparam Func Type of process to schedule. + * @param func Either a lambda or a functor to use as a process. + * @return An opaque object to use to concatenate processes. + */ + template + auto attach(Func &&func) { + using Proc = process_adaptor, Delta>; + return attach(std::forward(func)); + } + + /** + * @brief Updates all scheduled processes. + * + * All scheduled processes are executed in no specific order.
+ * If a process terminates with success, it's replaced with its child, if + * any. Otherwise, if a process terminates with an error, it's removed along + * with its child. + * + * @param delta Elapsed time. + * @param data Optional data. + */ + void update(const Delta delta, void *data = nullptr) { + bool clean = false; + + for(auto pos = handlers.size(); pos; --pos) { + auto &handler = handlers[pos-1]; + const bool dead = handler.update(handler, delta, data); + clean = clean || dead; + } + + if(clean) { + handlers.erase(std::remove_if(handlers.begin(), handlers.end(), [](auto &handler) { + return !handler.instance; + }), handlers.end()); + } + } + + /** + * @brief Aborts all scheduled processes. + * + * Unless an immediate operation is requested, the abort is scheduled for + * the next tick. Processes won't be executed anymore in any case.
+ * Once a process is fully aborted and thus finished, it's discarded along + * with its child, if any. + * + * @param immediately Requests an immediate operation. + */ + void abort(const bool immediately = false) { + decltype(handlers) exec; + exec.swap(handlers); + + for(auto &&handler: exec) { + handler.abort(handler, immediately); + } + + std::move(handlers.begin(), handlers.end(), std::back_inserter(exec)); + handlers.swap(exec); + } + +private: + std::vector handlers{}; +}; + + +} + + +#endif + +// #include "resource/cache.hpp" +#ifndef ENTT_RESOURCE_CACHE_HPP +#define ENTT_RESOURCE_CACHE_HPP + + +#include +#include +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + +// #include "../core/fwd.hpp" +#ifndef ENTT_CORE_FWD_HPP +#define ENTT_CORE_FWD_HPP + + +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/*! @brief Alias declaration for type identifiers. */ +using id_type = ENTT_ID_TYPE; + + +} + + +#endif + +// #include "handle.hpp" +#ifndef ENTT_RESOURCE_HANDLE_HPP +#define ENTT_RESOURCE_HANDLE_HPP + + +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" +#ifndef ENTT_RESOURCE_FWD_HPP +#define ENTT_RESOURCE_FWD_HPP + + +namespace entt { + + +template +struct resource_cache; + + +template +class resource_handle; + + +template +class resource_loader; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Shared resource handle. + * + * A shared resource handle is a small class that wraps a resource and keeps it + * alive even if it's deleted from the cache. It can be either copied or + * moved. A handle shares a reference to the same resource with all the other + * handles constructed for the same identifier.
+ * As a rule of thumb, resources should never be copied nor moved. Handles are + * the way to go to keep references to them. + * + * @tparam Resource Type of resource managed by a handle. + */ +template +class resource_handle { + /*! @brief Resource handles are friends of their caches. */ + friend struct resource_cache; + + resource_handle(std::shared_ptr res) ENTT_NOEXCEPT + : resource{std::move(res)} + {} + +public: + /*! @brief Default constructor. */ + resource_handle() ENTT_NOEXCEPT = default; + + /** + * @brief Gets a reference to the managed resource. + * + * @warning + * The behavior is undefined if the handle doesn't contain a resource.
+ * An assertion will abort the execution at runtime in debug mode if the + * handle is empty. + * + * @return A reference to the managed resource. + */ + [[nodiscard]] const Resource & get() const ENTT_NOEXCEPT { + ENTT_ASSERT(static_cast(resource)); + return *resource; + } + + /*! @copydoc get */ + [[nodiscard]] Resource & get() ENTT_NOEXCEPT { + return const_cast(std::as_const(*this).get()); + } + + /*! @copydoc get */ + [[nodiscard]] operator const Resource & () const ENTT_NOEXCEPT { + return get(); + } + + /*! @copydoc get */ + [[nodiscard]] operator Resource & () ENTT_NOEXCEPT { + return get(); + } + + /*! @copydoc get */ + [[nodiscard]] const Resource & operator *() const ENTT_NOEXCEPT { + return get(); + } + + /*! @copydoc get */ + [[nodiscard]] Resource & operator *() ENTT_NOEXCEPT { + return get(); + } + + /** + * @brief Gets a pointer to the managed resource. + * + * @warning + * The behavior is undefined if the handle doesn't contain a resource.
+ * An assertion will abort the execution at runtime in debug mode if the + * handle is empty. + * + * @return A pointer to the managed resource or `nullptr` if the handle + * contains no resource at all. + */ + [[nodiscard]] const Resource * operator->() const ENTT_NOEXCEPT { + ENTT_ASSERT(static_cast(resource)); + return resource.get(); + } + + /*! @copydoc operator-> */ + [[nodiscard]] Resource * operator->() ENTT_NOEXCEPT { + return const_cast(std::as_const(*this).operator->()); + } + + /** + * @brief Returns true if a handle contains a resource, false otherwise. + * @return True if the handle contains a resource, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(resource); + } + +private: + std::shared_ptr resource; +}; + + +} + + +#endif + +// #include "loader.hpp" +#ifndef ENTT_RESOURCE_LOADER_HPP +#define ENTT_RESOURCE_LOADER_HPP + + +#include +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Base class for resource loaders. + * + * Resource loaders must inherit from this class and stay true to the CRTP + * idiom. Moreover, a resource loader must expose a public, const member + * function named `load` that accepts a variable number of arguments and returns + * a shared pointer to the resource just created.
+ * As an example: + * + * @code{.cpp} + * struct my_resource {}; + * + * struct my_loader: entt::resource_loader { + * std::shared_ptr load(int) const { + * // use the integer value somehow + * return std::make_shared(); + * } + * }; + * @endcode + * + * In general, resource loaders should not have a state or retain data of any + * type. They should let the cache manage their resources instead. + * + * @note + * Base class and CRTP idiom aren't strictly required with the current + * implementation. One could argue that a cache can easily work with loaders of + * any type. However, future changes won't be breaking ones by forcing the use + * of a base class today and that's why the model is already in its place. + * + * @tparam Loader Type of the derived class. + * @tparam Resource Type of resource for which to use the loader. + */ +template +class resource_loader { + /*! @brief Resource loaders are friends of their caches. */ + friend struct resource_cache; + + /** + * @brief Loads the resource and returns it. + * @tparam Args Types of arguments for the loader. + * @param args Arguments for the loader. + * @return The resource just loaded or an empty pointer in case of errors. + */ + template + [[nodiscard]] std::shared_ptr get(Args &&... args) const { + return static_cast(this)->load(std::forward(args)...); + } +}; + + +} + + +#endif + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @brief Simple cache for resources of a given type. + * + * Minimal implementation of a cache for resources of a given type. It doesn't + * offer much functionalities but it's suitable for small or medium sized + * applications and can be freely inherited to add targeted functionalities for + * large sized applications. + * + * @tparam Resource Type of resources managed by a cache. + */ +template +struct resource_cache { + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Type of resources managed by a cache. */ + using resource_type = Resource; + + /*! @brief Default constructor. */ + resource_cache() = default; + + /*! @brief Default move constructor. */ + resource_cache(resource_cache &&) = default; + + /*! @brief Default move assignment operator. @return This cache. */ + resource_cache & operator=(resource_cache &&) = default; + + /** + * @brief Number of resources managed by a cache. + * @return Number of resources currently stored. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return resources.size(); + } + + /** + * @brief Returns true if a cache contains no resources, false otherwise. + * @return True if the cache contains no resources, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return resources.empty(); + } + + /** + * @brief Clears a cache and discards all its resources. + * + * Handles are not invalidated and the memory used by a resource isn't + * freed as long as at least a handle keeps the resource itself alive. + */ + void clear() ENTT_NOEXCEPT { + resources.clear(); + } + + /** + * @brief Loads the resource that corresponds to a given identifier. + * + * In case an identifier isn't already present in the cache, it loads its + * resource and stores it aside for future uses. Arguments are forwarded + * directly to the loader in order to construct properly the requested + * resource. + * + * @note + * If the identifier is already present in the cache, this function does + * nothing and the arguments are simply discarded. + * + * @warning + * If the resource cannot be loaded correctly, the returned handle will be + * invalid and any use of it will result in undefined behavior. + * + * @tparam Loader Type of loader to use to load the resource if required. + * @tparam Args Types of arguments to use to load the resource if required. + * @param id Unique resource identifier. + * @param args Arguments to use to load the resource if required. + * @return A handle for the given resource. + */ + template + resource_handle load(const id_type id, Args &&... args) { + static_assert(std::is_base_of_v, Loader>, "Invalid loader type"); + resource_handle resource{}; + + if(auto it = resources.find(id); it == resources.cend()) { + if(auto instance = Loader{}.get(std::forward(args)...); instance) { + resources[id] = instance; + resource = std::move(instance); + } + } else { + resource = it->second; + } + + return resource; + } + + /** + * @brief Reloads a resource or loads it for the first time if not present. + * + * Equivalent to the following snippet (pseudocode): + * + * @code{.cpp} + * cache.discard(id); + * cache.load(id, args...); + * @endcode + * + * Arguments are forwarded directly to the loader in order to construct + * properly the requested resource. + * + * @warning + * If the resource cannot be loaded correctly, the returned handle will be + * invalid and any use of it will result in undefined behavior. + * + * @tparam Loader Type of loader to use to load the resource. + * @tparam Args Types of arguments to use to load the resource. + * @param id Unique resource identifier. + * @param args Arguments to use to load the resource. + * @return A handle for the given resource. + */ + template + resource_handle reload(const id_type id, Args &&... args) { + return (discard(id), load(id, std::forward(args)...)); + } + + /** + * @brief Creates a temporary handle for a resource. + * + * Arguments are forwarded directly to the loader in order to construct + * properly the requested resource. The handle isn't stored aside and the + * cache isn't in charge of the lifetime of the resource itself. + * + * @tparam Loader Type of loader to use to load the resource. + * @tparam Args Types of arguments to use to load the resource. + * @param args Arguments to use to load the resource. + * @return A handle for the given resource. + */ + template + [[nodiscard]] resource_handle temp(Args &&... args) const { + return { Loader{}.get(std::forward(args)...) }; + } + + /** + * @brief Creates a handle for a given resource identifier. + * + * A resource handle can be in a either valid or invalid state. In other + * terms, a resource handle is properly initialized with a resource if the + * cache contains the resource itself. Otherwise the returned handle is + * uninitialized and accessing it results in undefined behavior. + * + * @sa resource_handle + * + * @param id Unique resource identifier. + * @return A handle for the given resource. + */ + [[nodiscard]] resource_handle handle(const id_type id) const { + auto it = resources.find(id); + return { it == resources.end() ? nullptr : it->second }; + } + + /** + * @brief Checks if a cache contains a given identifier. + * @param id Unique resource identifier. + * @return True if the cache contains the resource, false otherwise. + */ + [[nodiscard]] bool contains(const id_type id) const { + return (resources.find(id) != resources.cend()); + } + + /** + * @brief Discards the resource that corresponds to a given identifier. + * + * Handles are not invalidated and the memory used by the resource isn't + * freed as long as at least a handle keeps the resource itself alive. + * + * @param id Unique resource identifier. + */ + void discard(const id_type id) { + if(auto it = resources.find(id); it != resources.end()) { + resources.erase(it); + } + } + + /** + * @brief Iterates all resources. + * + * The function object is invoked for each element. It is provided with + * either the resource identifier, the resource handle or both of them.
+ * The signature of the function must be equivalent to one of the following + * forms: + * + * @code{.cpp} + * void(const entt::id_type); + * void(entt::resource_handle); + * void(const entt::id_type, entt::resource_handle); + * @endcode + * + * @tparam Func Type of the function object to invoke. + * @param func A valid function object. + */ + template + void each(Func func) const { + auto begin = resources.begin(); + auto end = resources.end(); + + while(begin != end) { + auto curr = begin++; + + if constexpr(std::is_invocable_v) { + func(curr->first); + } else if constexpr(std::is_invocable_v>) { + func(resource_handle{ curr->second }); + } else { + func(curr->first, resource_handle{ curr->second }); + } + } + } + +private: + std::unordered_map> resources; +}; + + +} + + +#endif + +// #include "resource/handle.hpp" + +// #include "resource/loader.hpp" + +// #include "signal/delegate.hpp" +#ifndef ENTT_SIGNAL_DELEGATE_HPP +#define ENTT_SIGNAL_DELEGATE_HPP + + +#include +#include +#include +#include +#include +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +auto function_pointer(Ret(*)(Args...)) -> Ret(*)(Args...); + + +template +auto function_pointer(Ret(*)(Type, Args...), Other &&) -> Ret(*)(Args...); + + +template +auto function_pointer(Ret(Class:: *)(Args...), Other &&...) -> Ret(*)(Args...); + + +template +auto function_pointer(Ret(Class:: *)(Args...) const, Other &&...) -> Ret(*)(Args...); + + +template +auto function_pointer(Type Class:: *, Other &&...) -> Type(*)(); + + +template +using function_pointer_t = decltype(internal::function_pointer(std::declval()...)); + + +template +[[nodiscard]] constexpr auto index_sequence_for(Ret(*)(Args...)) { + return std::index_sequence_for{}; +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/*! @brief Used to wrap a function or a member of a specified type. */ +template +struct connect_arg_t {}; + + +/*! @brief Constant of type connect_arg_t used to disambiguate calls. */ +template +inline constexpr connect_arg_t connect_arg{}; + + +/** + * @brief Basic delegate implementation. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is a function type. + */ +template +class delegate; + + +/** + * @brief Utility class to use to send around functions and members. + * + * Unmanaged delegate for function pointers and members. Users of this class are + * in charge of disconnecting instances before deleting them. + * + * A delegate can be used as a general purpose invoker without memory overhead + * for free functions possibly with payloads and bound or unbound members. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +class delegate { + template + [[nodiscard]] auto wrap(std::index_sequence) ENTT_NOEXCEPT { + return [](const void *, Args... args) -> Ret { + [[maybe_unused]] const auto arguments = std::forward_as_tuple(std::forward(args)...); + return Ret(std::invoke(Candidate, std::forward>>(std::get(arguments))...)); + }; + } + + template + [[nodiscard]] auto wrap(Type &, std::index_sequence) ENTT_NOEXCEPT { + return [](const void *payload, Args... args) -> Ret { + [[maybe_unused]] const auto arguments = std::forward_as_tuple(std::forward(args)...); + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, *curr, std::forward>>(std::get(arguments))...)); + }; + } + + template + [[nodiscard]] auto wrap(Type *, std::index_sequence) ENTT_NOEXCEPT { + return [](const void *payload, Args... args) -> Ret { + [[maybe_unused]] const auto arguments = std::forward_as_tuple(std::forward(args)...); + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, curr, std::forward>>(std::get(arguments))...)); + }; + } + +public: + /*! @brief Function type of the contained target. */ + using function_type = Ret(const void *, Args...); + /*! @brief Function type of the delegate. */ + using type = Ret(Args...); + /*! @brief Return type of the delegate. */ + using result_type = Ret; + + /*! @brief Default constructor. */ + delegate() ENTT_NOEXCEPT + : fn{nullptr}, data{nullptr} + {} + + /** + * @brief Constructs a delegate and connects a free function or an unbound + * member. + * @tparam Candidate Function or member to connect to the delegate. + */ + template + delegate(connect_arg_t) ENTT_NOEXCEPT { + connect(); + } + + /** + * @brief Constructs a delegate and connects a free function with payload or + * a bound member. + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + delegate(connect_arg_t, Type &&value_or_instance) ENTT_NOEXCEPT { + connect(std::forward(value_or_instance)); + } + + /** + * @brief Constructs a delegate and connects an user defined function with + * optional payload. + * @param function Function to connect to the delegate. + * @param payload User defined arbitrary data. + */ + delegate(function_type *function, const void *payload = nullptr) ENTT_NOEXCEPT { + connect(function, payload); + } + + /** + * @brief Connects a free function or an unbound member to a delegate. + * @tparam Candidate Function or member to connect to the delegate. + */ + template + void connect() ENTT_NOEXCEPT { + data = nullptr; + + if constexpr(std::is_invocable_r_v) { + fn = [](const void *, Args... args) -> Ret { + return Ret(std::invoke(Candidate, std::forward(args)...)); + }; + } else if constexpr(std::is_member_pointer_v) { + fn = wrap(internal::index_sequence_for>>(internal::function_pointer_t{})); + } else { + fn = wrap(internal::index_sequence_for(internal::function_pointer_t{})); + } + } + + /** + * @brief Connects a free function with payload or a bound member to a + * delegate. + * + * The delegate isn't responsible for the connected object or the payload. + * Users must always guarantee that the lifetime of the instance overcomes + * the one of the delegate.
+ * When used to connect a free function with payload, its signature must be + * such that the instance is the first argument before the ones used to + * define the delegate itself. + * + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid reference that fits the purpose. + */ + template + void connect(Type &value_or_instance) ENTT_NOEXCEPT { + data = &value_or_instance; + + if constexpr(std::is_invocable_r_v) { + fn = [](const void *payload, Args... args) -> Ret { + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, *curr, std::forward(args)...)); + }; + } else { + fn = wrap(value_or_instance, internal::index_sequence_for(internal::function_pointer_t{})); + } + } + + /** + * @brief Connects a free function with payload or a bound member to a + * delegate. + * + * @sa connect(Type &) + * + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid pointer that fits the purpose. + */ + template + void connect(Type *value_or_instance) ENTT_NOEXCEPT { + data = value_or_instance; + + if constexpr(std::is_invocable_r_v) { + fn = [](const void *payload, Args... args) -> Ret { + Type *curr = static_cast(const_cast, const void *, void *>>(payload)); + return Ret(std::invoke(Candidate, curr, std::forward(args)...)); + }; + } else { + fn = wrap(value_or_instance, internal::index_sequence_for(internal::function_pointer_t{})); + } + } + + /** + * @brief Connects an user defined function with optional payload to a + * delegate. + * + * The delegate isn't responsible for the connected object or the payload. + * Users must always guarantee that the lifetime of an instance overcomes + * the one of the delegate.
+ * The payload is returned as the first argument to the target function in + * all cases. + * + * @param function Function to connect to the delegate. + * @param payload User defined arbitrary data. + */ + void connect(function_type *function, const void *payload = nullptr) ENTT_NOEXCEPT { + fn = function; + data = payload; + } + + /** + * @brief Resets a delegate. + * + * After a reset, a delegate cannot be invoked anymore. + */ + void reset() ENTT_NOEXCEPT { + fn = nullptr; + data = nullptr; + } + + /** + * @brief Returns the instance or the payload linked to a delegate, if any. + * @return An opaque pointer to the underlying data. + */ + [[nodiscard]] const void * instance() const ENTT_NOEXCEPT { + return data; + } + + /** + * @brief Triggers a delegate. + * + * The delegate invokes the underlying function and returns the result. + * + * @warning + * Attempting to trigger an invalid delegate results in undefined + * behavior.
+ * An assertion will abort the execution at runtime in debug mode if the + * delegate has not yet been set. + * + * @param args Arguments to use to invoke the underlying function. + * @return The value returned by the underlying function. + */ + Ret operator()(Args... args) const { + ENTT_ASSERT(fn); + return fn(data, std::forward(args)...); + } + + /** + * @brief Checks whether a delegate actually stores a listener. + * @return False if the delegate is empty, true otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + // no need to test also data + return !(fn == nullptr); + } + + /** + * @brief Compares the contents of two delegates. + * @param other Delegate with which to compare. + * @return False if the two contents differ, true otherwise. + */ + [[nodiscard]] bool operator==(const delegate &other) const ENTT_NOEXCEPT { + return fn == other.fn && data == other.data; + } + +private: + function_type *fn; + const void *data; +}; + + +/** + * @brief Compares the contents of two delegates. + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + * @param lhs A valid delegate object. + * @param rhs A valid delegate object. + * @return True if the two contents differ, false otherwise. + */ +template +[[nodiscard]] bool operator!=(const delegate &lhs, const delegate &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/** + * @brief Deduction guide. + * @tparam Candidate Function or member to connect to the delegate. + */ +template +delegate(connect_arg_t) ENTT_NOEXCEPT +-> delegate>>; + + +/** + * @brief Deduction guide. + * @tparam Candidate Function or member to connect to the delegate. + * @tparam Type Type of class or type of payload. + */ +template +delegate(connect_arg_t, Type &&) ENTT_NOEXCEPT +-> delegate>>; + + +/*! @brief Deduction guide. */ +template +delegate(Ret(*)(const void *, Args...), const void * = nullptr) ENTT_NOEXCEPT +-> delegate; + + +} + + +#endif + +// #include "signal/dispatcher.hpp" +#ifndef ENTT_SIGNAL_DISPATCHER_HPP +#define ENTT_SIGNAL_DISPATCHER_HPP + + +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/fwd.hpp" +#ifndef ENTT_CORE_FWD_HPP +#define ENTT_CORE_FWD_HPP + + +// #include "../config/config.h" +#ifndef ENTT_CONFIG_CONFIG_H +#define ENTT_CONFIG_CONFIG_H + + +#ifndef ENTT_NOEXCEPT +# define ENTT_NOEXCEPT noexcept +#endif + + +#ifndef ENTT_HS_SUFFIX +# define ENTT_HS_SUFFIX _hs +#endif + + +#ifndef ENTT_HWS_SUFFIX +# define ENTT_HWS_SUFFIX _hws +#endif + + +#ifndef ENTT_USE_ATOMIC +# define ENTT_MAYBE_ATOMIC(Type) Type +#else +# include +# define ENTT_MAYBE_ATOMIC(Type) std::atomic +#endif + + +#ifndef ENTT_ID_TYPE +# include +# define ENTT_ID_TYPE std::uint32_t +#endif + + +#ifndef ENTT_PAGE_SIZE +# define ENTT_PAGE_SIZE 32768 +#endif + + +#ifndef ENTT_ASSERT +# include +# define ENTT_ASSERT(condition) assert(condition) +#endif + + +#ifndef ENTT_NO_ETO +# include +# define ENTT_IS_EMPTY(Type) std::is_empty +#else +# include +# define ENTT_IS_EMPTY(Type) std::false_type +#endif + + +#ifndef ENTT_STANDARD_CPP +# if defined __clang__ || (defined __GNUC__ && __GNUC__ > 8) +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined __GNUC__ +# define ENTT_PRETTY_FUNCTION __PRETTY_FUNCTION__ +# define ENTT_PRETTY_FUNCTION_PREFIX '=' +# define ENTT_PRETTY_FUNCTION_SUFFIX ']' +# elif defined _MSC_VER +# define ENTT_PRETTY_FUNCTION_CONSTEXPR +# define ENTT_PRETTY_FUNCTION __FUNCSIG__ +# define ENTT_PRETTY_FUNCTION_PREFIX '<' +# define ENTT_PRETTY_FUNCTION_SUFFIX '>' +# endif +#endif + + +#ifndef ENTT_STANDALONE +# define ENTT_FAST_PATH(...) false +#else +# define ENTT_FAST_PATH(Cond) Cond +#endif + + +#endif + + + +namespace entt { + + +/*! @brief Alias declaration for type identifiers. */ +using id_type = ENTT_ID_TYPE; + + +} + + +#endif + +// #include "../core/type_info.hpp" +#ifndef ENTT_CORE_TYPE_INFO_HPP +#define ENTT_CORE_TYPE_INFO_HPP + + +#include +// #include "../config/config.h" + +// #include "../core/attribute.h" +#ifndef ENTT_CORE_ATTRIBUTE_H +#define ENTT_CORE_ATTRIBUTE_H + + +#ifndef ENTT_EXPORT +# if defined _WIN32 || defined __CYGWIN__ || defined _MSC_VER +# define ENTT_EXPORT __declspec(dllexport) +# define ENTT_IMPORT __declspec(dllimport) +# define ENTT_HIDDEN +# elif defined __GNUC__ && __GNUC__ >= 4 +# define ENTT_EXPORT __attribute__((visibility("default"))) +# define ENTT_IMPORT __attribute__((visibility("default"))) +# define ENTT_HIDDEN __attribute__((visibility("hidden"))) +# else /* Unsupported compiler */ +# define ENTT_EXPORT +# define ENTT_IMPORT +# define ENTT_HIDDEN +# endif +#endif + + +#ifndef ENTT_API +# if defined ENTT_API_EXPORT +# define ENTT_API ENTT_EXPORT +# elif defined ENTT_API_IMPORT +# define ENTT_API ENTT_IMPORT +# else /* No API */ +# define ENTT_API +# endif +#endif + + +#endif + +// #include "hashed_string.hpp" +#ifndef ENTT_CORE_HASHED_STRING_HPP +#define ENTT_CORE_HASHED_STRING_HPP + + +#include +#include +// #include "../config/config.h" + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +template +struct fnv1a_traits; + + +template<> +struct fnv1a_traits { + using type = std::uint32_t; + static constexpr std::uint32_t offset = 2166136261; + static constexpr std::uint32_t prime = 16777619; +}; + + +template<> +struct fnv1a_traits { + using type = std::uint64_t; + static constexpr std::uint64_t offset = 14695981039346656037ull; + static constexpr std::uint64_t prime = 1099511628211ull; +}; + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Zero overhead unique identifier. + * + * A hashed string is a compile-time tool that allows users to use + * human-readable identifers in the codebase while using their numeric + * counterparts at runtime.
+ * Because of that, a hashed string can also be used in constant expressions if + * required. + * + * @tparam Char Character type. + */ +template +class basic_hashed_string { + using traits_type = internal::fnv1a_traits; + + struct const_wrapper { + // non-explicit constructor on purpose + constexpr const_wrapper(const Char *curr) ENTT_NOEXCEPT: str{curr} {} + const Char *str; + }; + + // Fowler–Noll–Vo hash function v. 1a - the good + [[nodiscard]] static constexpr id_type helper(const Char *curr) ENTT_NOEXCEPT { + auto value = traits_type::offset; + + while(*curr != 0) { + value = (value ^ static_cast(*(curr++))) * traits_type::prime; + } + + return value; + } + +public: + /*! @brief Character type. */ + using value_type = Char; + /*! @brief Unsigned integer type. */ + using hash_type = id_type; + + /** + * @brief Returns directly the numeric representation of a string. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * const auto value = basic_hashed_string::to_value("my.png"); + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + * @return The numeric representation of the string. + */ + template + [[nodiscard]] static constexpr hash_type value(const value_type (&str)[N]) ENTT_NOEXCEPT { + return helper(str); + } + + /** + * @brief Returns directly the numeric representation of a string. + * @param wrapper Helps achieving the purpose by relying on overloading. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const_wrapper wrapper) ENTT_NOEXCEPT { + return helper(wrapper.str); + } + + /** + * @brief Returns directly the numeric representation of a string view. + * @param str Human-readable identifer. + * @param size Length of the string to hash. + * @return The numeric representation of the string. + */ + [[nodiscard]] static hash_type value(const value_type *str, std::size_t size) ENTT_NOEXCEPT { + id_type partial{traits_type::offset}; + while(size--) { partial = (partial^(str++)[0])*traits_type::prime; } + return partial; + } + + /*! @brief Constructs an empty hashed string. */ + constexpr basic_hashed_string() ENTT_NOEXCEPT + : str{nullptr}, hash{} + {} + + /** + * @brief Constructs a hashed string from an array of const characters. + * + * Forcing template resolution avoids implicit conversions. An + * human-readable identifier can be anything but a plain, old bunch of + * characters.
+ * Example of use: + * @code{.cpp} + * basic_hashed_string hs{"my.png"}; + * @endcode + * + * @tparam N Number of characters of the identifier. + * @param curr Human-readable identifer. + */ + template + constexpr basic_hashed_string(const value_type (&curr)[N]) ENTT_NOEXCEPT + : str{curr}, hash{helper(curr)} + {} + + /** + * @brief Explicit constructor on purpose to avoid constructing a hashed + * string directly from a `const value_type *`. + * @param wrapper Helps achieving the purpose by relying on overloading. + */ + explicit constexpr basic_hashed_string(const_wrapper wrapper) ENTT_NOEXCEPT + : str{wrapper.str}, hash{helper(wrapper.str)} + {} + + /** + * @brief Returns the human-readable representation of a hashed string. + * @return The string used to initialize the instance. + */ + [[nodiscard]] constexpr const value_type * data() const ENTT_NOEXCEPT { + return str; + } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr hash_type value() const ENTT_NOEXCEPT { + return hash; + } + + /*! @copydoc data */ + [[nodiscard]] constexpr operator const value_type *() const ENTT_NOEXCEPT { return data(); } + + /** + * @brief Returns the numeric representation of a hashed string. + * @return The numeric representation of the instance. + */ + [[nodiscard]] constexpr operator hash_type() const ENTT_NOEXCEPT { return value(); } + + /** + * @brief Compares two hashed strings. + * @param other Hashed string with which to compare. + * @return True if the two hashed strings are identical, false otherwise. + */ + [[nodiscard]] constexpr bool operator==(const basic_hashed_string &other) const ENTT_NOEXCEPT { + return hash == other.hash; + } + +private: + const value_type *str; + hash_type hash; +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the character type of the hashed string directly from a + * human-readable identifer provided to the constructor. + * + * @tparam Char Character type. + * @tparam N Number of characters of the identifier. + * @param str Human-readable identifer. + */ +template +basic_hashed_string(const Char (&str)[N]) ENTT_NOEXCEPT +-> basic_hashed_string; + + +/** + * @brief Compares two hashed strings. + * @tparam Char Character type. + * @param lhs A valid hashed string. + * @param rhs A valid hashed string. + * @return True if the two hashed strings are identical, false otherwise. + */ +template +[[nodiscard]] constexpr bool operator!=(const basic_hashed_string &lhs, const basic_hashed_string &rhs) ENTT_NOEXCEPT { + return !(lhs == rhs); +} + + +/*! @brief Aliases for common character types. */ +using hashed_string = basic_hashed_string; + + +/*! @brief Aliases for common character types. */ +using hashed_wstring = basic_hashed_string; + + +} + + +/** + * @brief User defined literal for hashed strings. + * @param str The literal without its suffix. + * @return A properly initialized hashed string. + */ +[[nodiscard]] constexpr entt::hashed_string operator"" ENTT_HS_SUFFIX(const char *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_string{str}; +} + + +/** + * @brief User defined literal for hashed wstrings. + * @param str The literal without its suffix. + * @return A properly initialized hashed wstring. + */ +[[nodiscard]] constexpr entt::hashed_wstring operator"" ENTT_HWS_SUFFIX(const wchar_t *str, std::size_t) ENTT_NOEXCEPT { + return entt::hashed_wstring{str}; +} + + +#endif + +// #include "fwd.hpp" + + + +namespace entt { + + +/** + * @cond TURN_OFF_DOXYGEN + * Internal details not to be documented. + */ + + +namespace internal { + + +struct ENTT_API type_index { + [[nodiscard]] static id_type next() ENTT_NOEXCEPT { + static ENTT_MAYBE_ATOMIC(id_type) value{}; + return value++; + } +}; + + +template +[[nodiscard]] constexpr auto type_name() ENTT_NOEXCEPT { +#if defined ENTT_PRETTY_FUNCTION + std::string_view pretty_function{ENTT_PRETTY_FUNCTION}; + auto first = pretty_function.find_first_not_of(' ', pretty_function.find_first_of(ENTT_PRETTY_FUNCTION_PREFIX)+1); + auto value = pretty_function.substr(first, pretty_function.find_last_of(ENTT_PRETTY_FUNCTION_SUFFIX) - first); + return value; +#else + return std::string_view{}; +#endif +} + + +} + + +/** + * Internal details not to be documented. + * @endcond + */ + + +/** + * @brief Type index. + * @tparam Type Type for which to generate a sequential identifier. + */ +template +struct ENTT_API type_index { + /** + * @brief Returns the sequential identifier of a given type. + * @return The sequential identifier of a given type. + */ + [[nodiscard]] static id_type value() ENTT_NOEXCEPT { + static const id_type value = internal::type_index::next(); + return value; + } +}; + + +/** + * @brief Provides the member constant `value` to true if a given type is + * indexable, false otherwise. + * @tparam Type Potentially indexable type. + */ +template +struct has_type_index: std::false_type {}; + + +/*! @brief has_type_index */ +template +struct has_type_index::value())>>: std::true_type {}; + + +/** + * @brief Helper variable template. + * @tparam Type Potentially indexable type. + */ +template +inline constexpr bool has_type_index_v = has_type_index::value; + + +/** + * @brief Type info. + * @tparam Type Type for which to generate information. + */ +template +struct type_info { + /** + * @brief Returns the numeric representation of a given type. + * @return The numeric representation of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr id_type id() ENTT_NOEXCEPT { + constexpr auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + static const auto value = hashed_string::value(ENTT_PRETTY_FUNCTION); + return value; + } +#else + [[nodiscard]] static id_type id() ENTT_NOEXCEPT { + return type_index::value(); + } +#endif + + /** + * @brief Returns the name of a given type. + * @return The name of the given type. + */ +#if defined ENTT_PRETTY_FUNCTION_CONSTEXPR + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + constexpr auto value = internal::type_name(); + return value; + } +#elif defined ENTT_PRETTY_FUNCTION + [[nodiscard]] static std::string_view name() ENTT_NOEXCEPT { + static const auto value = internal::type_name(); + return value; + } +#else + [[nodiscard]] static constexpr std::string_view name() ENTT_NOEXCEPT { + return internal::type_name(); + } +#endif +}; + + +} + + +#endif + +// #include "sigh.hpp" +#ifndef ENTT_SIGNAL_SIGH_HPP +#define ENTT_SIGNAL_SIGH_HPP + + +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "delegate.hpp" + +// #include "fwd.hpp" +#ifndef ENTT_SIGNAL_FWD_HPP +#define ENTT_SIGNAL_FWD_HPP + + +namespace entt { + + +template +class delegate; + + +class dispatcher; + + +template +class emitter; + + +class connection; + + +struct scoped_connection; + + +template +class sink; + + +template +class sigh; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Sink class. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is a function type. + * + * @tparam Function A valid function type. + */ +template +class sink; + + +/** + * @brief Unmanaged signal handler. + * + * Primary template isn't defined on purpose. All the specializations give a + * compile-time error unless the template parameter is a function type. + * + * @tparam Function A valid function type. + */ +template +class sigh; + + +/** + * @brief Unmanaged signal handler. + * + * It works directly with references to classes and pointers to member functions + * as well as pointers to free functions. Users of this class are in charge of + * disconnecting instances before deleting them. + * + * This class serves mainly two purposes: + * + * * Creating signals to use later to notify a bunch of listeners. + * * Collecting results from a set of functions like in a voting system. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +class sigh { + /*! @brief A sink is allowed to modify a signal. */ + friend class sink; + +public: + /*! @brief Unsigned integer type. */ + using size_type = std::size_t; + /*! @brief Sink type. */ + using sink_type = sink; + + /** + * @brief Instance type when it comes to connecting member functions. + * @tparam Class Type of class to which the member function belongs. + */ + template + using instance_type = Class *; + + /** + * @brief Number of listeners connected to the signal. + * @return Number of listeners currently connected. + */ + [[nodiscard]] size_type size() const ENTT_NOEXCEPT { + return calls.size(); + } + + /** + * @brief Returns false if at least a listener is connected to the signal. + * @return True if the signal has no listeners connected, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return calls.empty(); + } + + /** + * @brief Triggers a signal. + * + * All the listeners are notified. Order isn't guaranteed. + * + * @param args Arguments to use to invoke listeners. + */ + void publish(Args... args) const { + for(auto &&call: std::as_const(calls)) { + call(args...); + } + } + + /** + * @brief Collects return values from the listeners. + * + * The collector must expose a call operator with the following properties: + * + * * The return type is either `void` or such that it's convertible to + * `bool`. In the second case, a true value will stop the iteration. + * * The list of parameters is empty if `Ret` is `void`, otherwise it + * contains a single element such that `Ret` is convertible to it. + * + * @tparam Func Type of collector to use, if any. + * @param func A valid function object. + * @param args Arguments to use to invoke listeners. + */ + template + void collect(Func func, Args... args) const { + for(auto &&call: calls) { + if constexpr(std::is_void_v) { + if constexpr(std::is_invocable_r_v) { + call(args...); + if(func()) { break; } + } else { + call(args...); + func(); + } + } else { + if constexpr(std::is_invocable_r_v) { + if(func(call(args...))) { break; } + } else { + func(call(args...)); + } + } + } + } + +private: + std::vector> calls; +}; + + +/** + * @brief Connection class. + * + * Opaque object the aim of which is to allow users to release an already + * estabilished connection without having to keep a reference to the signal or + * the sink that generated it. + */ +class connection { + /*! @brief A sink is allowed to create connection objects. */ + template + friend class sink; + + connection(delegate fn, void *ref) + : disconnect{fn}, signal{ref} + {} + +public: + /*! @brief Default constructor. */ + connection() = default; + + /** + * @brief Checks whether a connection is properly initialized. + * @return True if the connection is properly initialized, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(disconnect); + } + + /*! @brief Breaks the connection. */ + void release() { + if(disconnect) { + disconnect(signal); + disconnect.reset(); + } + } + +private: + delegate disconnect; + void *signal{}; +}; + + +/** + * @brief Scoped connection class. + * + * Opaque object the aim of which is to allow users to release an already + * estabilished connection without having to keep a reference to the signal or + * the sink that generated it.
+ * A scoped connection automatically breaks the link between the two objects + * when it goes out of scope. + */ +struct scoped_connection { + /*! @brief Default constructor. */ + scoped_connection() = default; + + /** + * @brief Constructs a scoped connection from a basic connection. + * @param other A valid connection object. + */ + scoped_connection(const connection &other) + : conn{other} + {} + + /*! @brief Default copy constructor, deleted on purpose. */ + scoped_connection(const scoped_connection &) = delete; + + /*! @brief Automatically breaks the link on destruction. */ + ~scoped_connection() { + conn.release(); + } + + /** + * @brief Default copy assignment operator, deleted on purpose. + * @return This scoped connection. + */ + scoped_connection & operator=(const scoped_connection &) = delete; + + /** + * @brief Acquires a connection. + * @param other The connection object to acquire. + * @return This scoped connection. + */ + scoped_connection & operator=(connection other) { + conn = std::move(other); + return *this; + } + + /** + * @brief Checks whether a scoped connection is properly initialized. + * @return True if the connection is properly initialized, false otherwise. + */ + [[nodiscard]] explicit operator bool() const ENTT_NOEXCEPT { + return static_cast(conn); + } + + /*! @brief Breaks the connection. */ + void release() { + conn.release(); + } + +private: + connection conn; +}; + + +/** + * @brief Sink class. + * + * A sink is used to connect listeners to signals and to disconnect them.
+ * The function type for a listener is the one of the signal to which it + * belongs. + * + * The clear separation between a signal and a sink permits to store the former + * as private data member without exposing the publish functionality to the + * users of the class. + * + * @warning + * Lifetime of a sink must not overcome that of the signal to which it refers. + * In any other case, attempting to use a sink results in undefined behavior. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +class sink { + using signal_type = sigh; + using difference_type = typename std::iterator_traits::difference_type; + + template + static void release(Type value_or_instance, void *signal) { + sink{*static_cast(signal)}.disconnect(value_or_instance); + } + + template + static void release(void *signal) { + sink{*static_cast(signal)}.disconnect(); + } + +public: + /** + * @brief Constructs a sink that is allowed to modify a given signal. + * @param ref A valid reference to a signal object. + */ + sink(sigh &ref) ENTT_NOEXCEPT + : offset{}, + signal{&ref} + {} + + /** + * @brief Returns false if at least a listener is connected to the sink. + * @return True if the sink has no listeners connected, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return signal->calls.empty(); + } + + /** + * @brief Returns a sink that connects before a given free function or an + * unbound member. + * @tparam Function A valid free function pointer. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before() { + delegate call{}; + call.template connect(); + + const auto &calls = signal->calls; + const auto it = std::find(calls.cbegin(), calls.cend(), std::move(call)); + + sink other{*this}; + other.offset = std::distance(it, calls.cend()); + return other; + } + + /** + * @brief Returns a sink that connects before a free function with payload + * or a bound member. + * @tparam Candidate Member or free function to look for. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before(Type &&value_or_instance) { + delegate call{}; + call.template connect(value_or_instance); + + const auto &calls = signal->calls; + const auto it = std::find(calls.cbegin(), calls.cend(), std::move(call)); + + sink other{*this}; + other.offset = std::distance(it, calls.cend()); + return other; + } + + /** + * @brief Returns a sink that connects before a given instance or specific + * payload. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before(Type &value_or_instance) { + return before(&value_or_instance); + } + + /** + * @brief Returns a sink that connects before a given instance or specific + * payload. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid pointer that fits the purpose. + * @return A properly initialized sink object. + */ + template + [[nodiscard]] sink before(Type *value_or_instance) { + sink other{*this}; + + if(value_or_instance) { + const auto &calls = signal->calls; + const auto it = std::find_if(calls.cbegin(), calls.cend(), [value_or_instance](const auto &delegate) { + return delegate.instance() == value_or_instance; + }); + + other.offset = std::distance(it, calls.cend()); + } + + return other; + } + + /** + * @brief Returns a sink that connects before anything else. + * @return A properly initialized sink object. + */ + [[nodiscard]] sink before() { + sink other{*this}; + other.offset = signal->calls.size(); + return other; + } + + /** + * @brief Connects a free function or an unbound member to a signal. + * + * The signal handler performs checks to avoid multiple connections for the + * same function. + * + * @tparam Candidate Function or member to connect to the signal. + * @return A properly initialized connection object. + */ + template + connection connect() { + disconnect(); + + delegate call{}; + call.template connect(); + signal->calls.insert(signal->calls.end() - offset, std::move(call)); + + delegate conn{}; + conn.template connect<&release>(); + return { std::move(conn), signal }; + } + + /** + * @brief Connects a free function with payload or a bound member to a + * signal. + * + * The signal isn't responsible for the connected object or the payload. + * Users must always guarantee that the lifetime of the instance overcomes + * the one of the signal. On the other side, the signal handler performs + * checks to avoid multiple connections for the same function.
+ * When used to connect a free function with payload, its signature must be + * such that the instance is the first argument before the ones used to + * define the signal itself. + * + * @tparam Candidate Function or member to connect to the signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + * @return A properly initialized connection object. + */ + template + connection connect(Type &&value_or_instance) { + disconnect(value_or_instance); + + delegate call{}; + call.template connect(value_or_instance); + signal->calls.insert(signal->calls.end() - offset, std::move(call)); + + delegate conn{}; + conn.template connect<&release>(value_or_instance); + return { std::move(conn), signal }; + } + + /** + * @brief Disconnects a free function or an unbound member from a signal. + * @tparam Candidate Function or member to disconnect from the signal. + */ + template + void disconnect() { + auto &calls = signal->calls; + delegate call{}; + call.template connect(); + calls.erase(std::remove(calls.begin(), calls.end(), std::move(call)), calls.end()); + } + + /** + * @brief Disconnects a free function with payload or a bound member from a + * signal. + * @tparam Candidate Function or member to disconnect from the signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type &&value_or_instance) { + auto &calls = signal->calls; + delegate call{}; + call.template connect(value_or_instance); + calls.erase(std::remove(calls.begin(), calls.end(), std::move(call)), calls.end()); + } + + /** + * @brief Disconnects free functions with payload or bound members from a + * signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type &value_or_instance) { + disconnect(&value_or_instance); + } + + /** + * @brief Disconnects free functions with payload or bound members from a + * signal. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type *value_or_instance) { + if(value_or_instance) { + auto &calls = signal->calls; + calls.erase(std::remove_if(calls.begin(), calls.end(), [value_or_instance](const auto &delegate) { + return delegate.instance() == value_or_instance; + }), calls.end()); + } + } + + /*! @brief Disconnects all the listeners from a signal. */ + void disconnect() { + signal->calls.clear(); + } + +private: + difference_type offset; + signal_type *signal; +}; + + +/** + * @brief Deduction guide. + * + * It allows to deduce the function type of a sink directly from the signal it + * refers to. + * + * @tparam Ret Return type of a function type. + * @tparam Args Types of arguments of a function type. + */ +template +sink(sigh &) ENTT_NOEXCEPT -> sink; + + +} + + +#endif + + + +namespace entt { + + +/** + * @brief Basic dispatcher implementation. + * + * A dispatcher can be used either to trigger an immediate event or to enqueue + * events to be published all together once per tick.
+ * Listeners are provided in the form of member functions. For each event of + * type `Event`, listeners are such that they can be invoked with an argument of + * type `Event &`, no matter what the return type is. + * + * The dispatcher creates instances of the `sigh` class internally. Refer to the + * documentation of the latter for more details. + */ +class dispatcher { + struct basic_pool { + virtual ~basic_pool() = default; + virtual void publish() = 0; + virtual void disconnect(void *) = 0; + virtual void clear() ENTT_NOEXCEPT = 0; + [[nodiscard]] virtual id_type type_id() const ENTT_NOEXCEPT = 0; + }; + + template + struct pool_handler final: basic_pool { + using signal_type = sigh; + using sink_type = typename signal_type::sink_type; + + void publish() override { + const auto length = events.size(); + + for(std::size_t pos{}; pos < length; ++pos) { + signal.publish(events[pos]); + } + + events.erase(events.cbegin(), events.cbegin()+length); + } + + void disconnect(void *instance) override { + sink().disconnect(instance); + } + + void clear() ENTT_NOEXCEPT override { + events.clear(); + } + + [[nodiscard]] sink_type sink() ENTT_NOEXCEPT { + return entt::sink{signal}; + } + + template + void trigger(Args &&... args) { + Event instance{std::forward(args)...}; + signal.publish(instance); + } + + template + void enqueue(Args &&... args) { + if constexpr(std::is_aggregate_v) { + events.push_back(Event{std::forward(args)...}); + } else { + events.emplace_back(std::forward(args)...); + } + } + + [[nodiscard]] id_type type_id() const ENTT_NOEXCEPT override { + return type_info::id(); + } + + private: + signal_type signal{}; + std::vector events; + }; + + template + [[nodiscard]] pool_handler & assure() { + static_assert(std::is_same_v>, "Invalid event type"); + + if constexpr(ENTT_FAST_PATH(has_type_index_v)) { + const auto index = type_index::value(); + + if(!(index < pools.size())) { + pools.resize(index+1u); + } + + if(!pools[index]) { + pools[index].reset(new pool_handler{}); + } + + return static_cast &>(*pools[index]); + } else { + auto it = std::find_if(pools.begin(), pools.end(), [id = type_info::id()](const auto &cpool) { return id == cpool->type_id(); }); + return static_cast &>(it == pools.cend() ? *pools.emplace_back(new pool_handler{}) : **it); + } + } + +public: + /** + * @brief Returns a sink object for the given event. + * + * A sink is an opaque object used to connect listeners to events. + * + * The function type for a listener is _compatible_ with: + * @code{.cpp} + * void(Event &); + * @endcode + * + * The order of invocation of the listeners isn't guaranteed. + * + * @sa sink + * + * @tparam Event Type of event of which to get the sink. + * @return A temporary sink object. + */ + template + [[nodiscard]] auto sink() { + return assure().sink(); + } + + /** + * @brief Triggers an immediate event of the given type. + * + * All the listeners registered for the given type are immediately notified. + * The event is discarded after the execution. + * + * @tparam Event Type of event to trigger. + * @tparam Args Types of arguments to use to construct the event. + * @param args Arguments to use to construct the event. + */ + template + void trigger(Args &&... args) { + assure().trigger(std::forward(args)...); + } + + /** + * @brief Triggers an immediate event of the given type. + * + * All the listeners registered for the given type are immediately notified. + * The event is discarded after the execution. + * + * @tparam Event Type of event to trigger. + * @param event An instance of the given type of event. + */ + template + void trigger(Event &&event) { + assure>().trigger(std::forward(event)); + } + + /** + * @brief Enqueues an event of the given type. + * + * An event of the given type is queued. No listener is invoked. Use the + * `update` member function to notify listeners when ready. + * + * @tparam Event Type of event to enqueue. + * @tparam Args Types of arguments to use to construct the event. + * @param args Arguments to use to construct the event. + */ + template + void enqueue(Args &&... args) { + assure().enqueue(std::forward(args)...); + } + + /** + * @brief Enqueues an event of the given type. + * + * An event of the given type is queued. No listener is invoked. Use the + * `update` member function to notify listeners when ready. + * + * @tparam Event Type of event to enqueue. + * @param event An instance of the given type of event. + */ + template + void enqueue(Event &&event) { + assure>().enqueue(std::forward(event)); + } + + /** + * @brief Utility function to disconnect everything related to a given value + * or instance from a dispatcher. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type &value_or_instance) { + disconnect(&value_or_instance); + } + + /** + * @brief Utility function to disconnect everything related to a given value + * or instance from a dispatcher. + * @tparam Type Type of class or type of payload. + * @param value_or_instance A valid object that fits the purpose. + */ + template + void disconnect(Type *value_or_instance) { + for(auto &&cpool: pools) { + if(cpool) { + cpool->disconnect(value_or_instance); + } + } + } + + /** + * @brief Discards all the events queued so far. + * + * If no types are provided, the dispatcher will clear all the existing + * pools. + * + * @tparam Event Type of events to discard. + */ + template + void clear() { + if constexpr(sizeof...(Event) == 0) { + for(auto &&cpool: pools) { + if(cpool) { + cpool->clear(); + } + } + } else { + (assure().clear(), ...); + } + } + + /** + * @brief Delivers all the pending events of the given type. + * + * This method is blocking and it doesn't return until all the events are + * delivered to the registered listeners. It's responsibility of the users + * to reduce at a minimum the time spent in the bodies of the listeners. + * + * @tparam Event Type of events to send. + */ + template + void update() { + assure().publish(); + } + + /** + * @brief Delivers all the pending events. + * + * This method is blocking and it doesn't return until all the events are + * delivered to the registered listeners. It's responsibility of the users + * to reduce at a minimum the time spent in the bodies of the listeners. + */ + void update() const { + for(auto pos = pools.size(); pos; --pos) { + if(auto &&cpool = pools[pos-1]; cpool) { + cpool->publish(); + } + } + } + +private: + std::vector> pools; +}; + + +} + + +#endif + +// #include "signal/emitter.hpp" +#ifndef ENTT_SIGNAL_EMITTER_HPP +#define ENTT_SIGNAL_EMITTER_HPP + + +#include +#include +#include +#include +#include +#include +#include +#include +// #include "../config/config.h" + +// #include "../core/fwd.hpp" + +// #include "../core/type_info.hpp" + + + +namespace entt { + + +/** + * @brief General purpose event emitter. + * + * The emitter class template follows the CRTP idiom. To create a custom emitter + * type, derived classes must inherit directly from the base class as: + * + * @code{.cpp} + * struct my_emitter: emitter { + * // ... + * } + * @endcode + * + * Pools for the type of events are created internally on the fly. It's not + * required to specify in advance the full list of accepted types.
+ * Moreover, whenever an event is published, an emitter provides the listeners + * with a reference to itself along with a reference to the event. Therefore + * listeners have an handy way to work with it without incurring in the need of + * capturing a reference to the emitter. + * + * @tparam Derived Actual type of emitter that extends the class template. + */ +template +class emitter { + struct basic_pool { + virtual ~basic_pool() = default; + virtual bool empty() const ENTT_NOEXCEPT = 0; + virtual void clear() ENTT_NOEXCEPT = 0; + virtual id_type type_id() const ENTT_NOEXCEPT = 0; + }; + + template + struct pool_handler final: basic_pool { + using listener_type = std::function; + using element_type = std::pair; + using container_type = std::list; + using connection_type = typename container_type::iterator; + + [[nodiscard]] bool empty() const ENTT_NOEXCEPT override { + auto pred = [](auto &&element) { return element.first; }; + + return std::all_of(once_list.cbegin(), once_list.cend(), pred) && + std::all_of(on_list.cbegin(), on_list.cend(), pred); + } + + void clear() ENTT_NOEXCEPT override { + if(publishing) { + for(auto &&element: once_list) { + element.first = true; + } + + for(auto &&element: on_list) { + element.first = true; + } + } else { + once_list.clear(); + on_list.clear(); + } + } + + connection_type once(listener_type listener) { + return once_list.emplace(once_list.cend(), false, std::move(listener)); + } + + connection_type on(listener_type listener) { + return on_list.emplace(on_list.cend(), false, std::move(listener)); + } + + void erase(connection_type conn) { + conn->first = true; + + if(!publishing) { + auto pred = [](auto &&element) { return element.first; }; + once_list.remove_if(pred); + on_list.remove_if(pred); + } + } + + void publish(Event &event, Derived &ref) { + container_type swap_list; + once_list.swap(swap_list); + + publishing = true; + + for(auto &&element: on_list) { + element.first ? void() : element.second(event, ref); + } + + for(auto &&element: swap_list) { + element.first ? void() : element.second(event, ref); + } + + publishing = false; + + on_list.remove_if([](auto &&element) { return element.first; }); + } + + [[nodiscard]] id_type type_id() const ENTT_NOEXCEPT override { + return type_info::id(); + } + + private: + bool publishing{false}; + container_type once_list{}; + container_type on_list{}; + }; + + template + [[nodiscard]] const pool_handler & assure() const { + static_assert(std::is_same_v>, "Invalid event type"); + + if constexpr(ENTT_FAST_PATH(has_type_index_v)) { + const auto index = type_index::value(); + + if(!(index < pools.size())) { + pools.resize(index+1u); + } + + if(!pools[index]) { + pools[index].reset(new pool_handler{}); + } + + return static_cast &>(*pools[index]); + } else { + auto it = std::find_if(pools.begin(), pools.end(), [id = type_info::id()](const auto &cpool) { return id == cpool->type_id(); }); + return static_cast &>(it == pools.cend() ? *pools.emplace_back(new pool_handler{}) : **it); + } + } + + template + [[nodiscard]] pool_handler & assure() { + return const_cast &>(std::as_const(*this).template assure()); + } + +public: + /** @brief Type of listeners accepted for the given event. */ + template + using listener = typename pool_handler::listener_type; + + /** + * @brief Generic connection type for events. + * + * Type of the connection object returned by the event emitter whenever a + * listener for the given type is registered.
+ * It can be used to break connections still in use. + * + * @tparam Event Type of event for which the connection is created. + */ + template + struct connection: private pool_handler::connection_type { + /** @brief Event emitters are friend classes of connections. */ + friend class emitter; + + /*! @brief Default constructor. */ + connection() = default; + + /** + * @brief Creates a connection that wraps its underlying instance. + * @param conn A connection object to wrap. + */ + connection(typename pool_handler::connection_type conn) + : pool_handler::connection_type{std::move(conn)} + {} + }; + + /*! @brief Default constructor. */ + emitter() = default; + + /*! @brief Default destructor. */ + virtual ~emitter() { + static_assert(std::is_base_of_v, Derived>, "Incorrect use of the class template"); + } + + /*! @brief Default move constructor. */ + emitter(emitter &&) = default; + + /*! @brief Default move assignment operator. @return This emitter. */ + emitter & operator=(emitter &&) = default; + + /** + * @brief Emits the given event. + * + * All the listeners registered for the specific event type are invoked with + * the given event. The event type must either have a proper constructor for + * the arguments provided or be an aggregate type. + * + * @tparam Event Type of event to publish. + * @tparam Args Types of arguments to use to construct the event. + * @param args Parameters to use to initialize the event. + */ + template + void publish(Args &&... args) { + Event instance{std::forward(args)...}; + assure().publish(instance, *static_cast(this)); + } + + /** + * @brief Registers a long-lived listener with the event emitter. + * + * This method can be used to register a listener designed to be invoked + * more than once for the given event type.
+ * The connection returned by the method can be freely discarded. It's meant + * to be used later to disconnect the listener if required. + * + * The listener is as a callable object that can be moved and the type of + * which is _compatible_ with `void(Event &, Derived &)`. + * + * @note + * Whenever an event is emitted, the emitter provides the listener with a + * reference to the derived class. Listeners don't have to capture those + * instances for later uses. + * + * @tparam Event Type of event to which to connect the listener. + * @param instance The listener to register. + * @return Connection object that can be used to disconnect the listener. + */ + template + connection on(listener instance) { + return assure().on(std::move(instance)); + } + + /** + * @brief Registers a short-lived listener with the event emitter. + * + * This method can be used to register a listener designed to be invoked + * only once for the given event type.
+ * The connection returned by the method can be freely discarded. It's meant + * to be used later to disconnect the listener if required. + * + * The listener is as a callable object that can be moved and the type of + * which is _compatible_ with `void(Event &, Derived &)`. + * + * @note + * Whenever an event is emitted, the emitter provides the listener with a + * reference to the derived class. Listeners don't have to capture those + * instances for later uses. + * + * @tparam Event Type of event to which to connect the listener. + * @param instance The listener to register. + * @return Connection object that can be used to disconnect the listener. + */ + template + connection once(listener instance) { + return assure().once(std::move(instance)); + } + + /** + * @brief Disconnects a listener from the event emitter. + * + * Do not use twice the same connection to disconnect a listener, it results + * in undefined behavior. Once used, discard the connection object. + * + * @tparam Event Type of event of the connection. + * @param conn A valid connection. + */ + template + void erase(connection conn) { + assure().erase(std::move(conn)); + } + + /** + * @brief Disconnects all the listeners for the given event type. + * + * All the connections previously returned for the given event are + * invalidated. Using them results in undefined behavior. + * + * @tparam Event Type of event to reset. + */ + template + void clear() { + assure().clear(); + } + + /** + * @brief Disconnects all the listeners. + * + * All the connections previously returned are invalidated. Using them + * results in undefined behavior. + */ + void clear() ENTT_NOEXCEPT { + for(auto &&cpool: pools) { + if(cpool) { + cpool->clear(); + } + } + } + + /** + * @brief Checks if there are listeners registered for the specific event. + * @tparam Event Type of event to test. + * @return True if there are no listeners registered, false otherwise. + */ + template + [[nodiscard]] bool empty() const { + return assure().empty(); + } + + /** + * @brief Checks if there are listeners registered with the event emitter. + * @return True if there are no listeners registered, false otherwise. + */ + [[nodiscard]] bool empty() const ENTT_NOEXCEPT { + return std::all_of(pools.cbegin(), pools.cend(), [](auto &&cpool) { + return !cpool || cpool->empty(); + }); + } + +private: + mutable std::vector> pools{}; +}; + + +} + + +#endif + +// #include "signal/sigh.hpp" + diff --git a/Sdk/Include/AI/Public/BehaviorProcessorSystem.h b/Sdk/Include/AI/Public/BehaviorProcessorSystem.h new file mode 100644 index 0000000..a313450 --- /dev/null +++ b/Sdk/Include/AI/Public/BehaviorProcessorSystem.h @@ -0,0 +1,19 @@ +#pragma once + +#include "ECS/Public/ISystem.h" +#include "Behaviors/IBehavior.h" + +namespace ECS +{ + class BehaviorProcessorSystem : public ISystem + { + virtual const char* GetName() override; + virtual int GetPriorityOrder() override; + virtual void Startup() override; + virtual void Update(float delta) override; + virtual void Shutdown() override; + }; + + // TODO: Move to an appropriate location + std::vector CreateMeccBehaviorTrees(std::shared_ptr entity); +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Behaviors/BehaviorBase.h b/Sdk/Include/AI/Public/Behaviors/BehaviorBase.h new file mode 100644 index 0000000..e4b5fd6 --- /dev/null +++ b/Sdk/Include/AI/Public/Behaviors/BehaviorBase.h @@ -0,0 +1,45 @@ +#pragma once + +#include "IBehavior.h" +#include "ECS/Public/Entity.h" +#include "GameObject/Public/Core.h" + +namespace AI +{ + template + class BehaviorBase : public IBehavior + { + protected: + inline static std::function CreateLeaf( + ECS::Entity* entity, + std::function processFunction) + { + using namespace std::placeholders; + + Object* object = GetObjFromEntity(entity); + return std::bind(processFunction, entity, object, _1); + } + + inline static std::function CreateLeaf( + ECS::Entity* entity, + std::function processFunction) + { + using namespace std::placeholders; + + Object* object = GetObjFromEntity(entity); + return std::bind(processFunction, entity, object, _1); + } + + private: + inline static Object* GetObjFromEntity(ECS::Entity* entity) + { + if (entity) + { + GameObj::ObjectRef& objectRef = entity->GetComponent(); + return objectRef.GetObj(); + } + + return nullptr; + } + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Behaviors/CombatBehavior.h b/Sdk/Include/AI/Public/Behaviors/CombatBehavior.h new file mode 100644 index 0000000..ce28755 --- /dev/null +++ b/Sdk/Include/AI/Public/Behaviors/CombatBehavior.h @@ -0,0 +1,18 @@ +#pragma once + +#include "BehaviorBase.h" +#include "AI/Public/Components/CombatBehaviorState.h" + +namespace AI +{ + class CombatBehavior : public BehaviorBase + { + public: + static InnerBehaviorTree Build(ECS::Entity* entity); + + private: + // Nodes + static bool EnemiesNearby(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static bool SeekTarget(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Behaviors/DodgeBehavior.h b/Sdk/Include/AI/Public/Behaviors/DodgeBehavior.h new file mode 100644 index 0000000..287ff4d --- /dev/null +++ b/Sdk/Include/AI/Public/Behaviors/DodgeBehavior.h @@ -0,0 +1,18 @@ +#pragma once + +#include "BehaviorBase.h" +#include "AI/Public/Components/DodgeBehaviorState.h" + +namespace AI +{ + class DodgeBehavior : public BehaviorBase + { + public: + static InnerBehaviorTree Build(ECS::Entity* entity); + + private: + // Nodes + static bool ShouldStartDodge(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static beehive::Status MoveToDodgeLocation(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + }; +} diff --git a/Sdk/Include/AI/Public/Behaviors/GetEquipmentBehavior.h b/Sdk/Include/AI/Public/Behaviors/GetEquipmentBehavior.h new file mode 100644 index 0000000..56cb79a --- /dev/null +++ b/Sdk/Include/AI/Public/Behaviors/GetEquipmentBehavior.h @@ -0,0 +1,24 @@ +#pragma once + +#include "BehaviorBase.h" +#include "AI/Public/Components/GetEquipmentBehaviorState.h" + +namespace AI +{ + class GetEquipmentBehavior : public BehaviorBase + { + public: + static InnerBehaviorTree Build(ECS::Entity* entity); + + private: + // Nodes + static bool NeedsEquipment(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static bool SetMoveGoals(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static beehive::Status MoveToShop(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static beehive::Status EquipSelf(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + + static void EquipMeccLoadout(ECS::Entity* entity, Object* object, Object* shop); + static void EquipReaperLoadout(ECS::Entity* entity, Object* object, Object* shop); + static P3D SelectShopRefPt(Object* object, Object* shop); + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Behaviors/IBehavior.h b/Sdk/Include/AI/Public/Behaviors/IBehavior.h new file mode 100644 index 0000000..5c1ee46 --- /dev/null +++ b/Sdk/Include/AI/Public/Behaviors/IBehavior.h @@ -0,0 +1,99 @@ +#pragma once + +#include "ECS/Public/Entity.h" + +#if 0 +#define TRACE_BEHAVIOR(...) DEBUG_INFOC(__VA_ARGS__) +#else +#define TRACE_BEHAVIOR(...) do { } while(0) +#endif + +namespace AI +{ + using InnerBehaviorTree = beehive::Tree; + + struct BehaviorTreeContext + { + BehaviorTreeContext(std::shared_ptr entity, beehive::Tree* tree) + : entity(entity), + innerTree(tree), + innerTreeState(tree->make_state()) + { + assert(entity); + } + + ECS::Entity* GetEntity() const + { + return entity.get(); + } + + void ResetResumeIndex() + { + innerTreeState = innerTree->make_state(); + } + + bool GetChainingEnabled() const { return chainingEnabled; } + + void SetChainingEnabled(bool chainingEnabled) + { + this->chainingEnabled = chainingEnabled; + } + + private: + beehive::TreeState& GetBeehiveTreeState() + { + return innerTreeState; + } + + std::shared_ptr entity; + beehive::Tree* innerTree; + beehive::TreeState innerTreeState; + bool chainingEnabled = true; // True if we allow behavior trees further down in the processing order to execute + + friend class BehaviorTree; + }; + + using BehaviorTreeBuilder = beehive::Builder; + + class BehaviorTree + { + public: + BehaviorTree( + const char* name, + std::shared_ptr entity, + std::shared_ptr innerTree) + : m_name(name), + m_innerTree(innerTree), + m_treeContext(entity, m_innerTree.get()) + { + } + + bool Process() + { + beehive::TreeState& beehiveTreeState = m_treeContext.GetBeehiveTreeState(); + + m_innerTree->process(beehiveTreeState, m_treeContext); + + // Check to see if we should allow any trees further down the priority order to execute: + bool continueExecution = m_treeContext.GetChainingEnabled(); + m_treeContext.SetChainingEnabled(true); // Always re-enable for next frame + + return continueExecution; + } + + const char* GetName() + { + return m_name; + } + + private: + std::shared_ptr m_innerTree; + BehaviorTreeContext m_treeContext; + const char* m_name; + }; + + struct IBehavior + { + virtual InnerBehaviorTree Build() = 0; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Behaviors/PatrolBehavior.h b/Sdk/Include/AI/Public/Behaviors/PatrolBehavior.h new file mode 100644 index 0000000..99e5430 --- /dev/null +++ b/Sdk/Include/AI/Public/Behaviors/PatrolBehavior.h @@ -0,0 +1,19 @@ +#pragma once + +#include "BehaviorBase.h" +#include "AI/Public/Components/PatrolBehaviorState.h" + +namespace AI +{ + class PatrolBehavior : public BehaviorBase + { + public: + static InnerBehaviorTree Build(ECS::Entity* entity); + + private: + // Nodes + static bool ShouldPatrol(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static beehive::Status SetMoveGoals(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + static beehive::Status MoveToLocation(ECS::Entity* entity, Object* object, BehaviorTreeContext& context); + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Command.h b/Sdk/Include/AI/Public/Command.h new file mode 100644 index 0000000..8528955 --- /dev/null +++ b/Sdk/Include/AI/Public/Command.h @@ -0,0 +1,31 @@ +#pragma once + +namespace AI +{ + enum class CommandType + { + Move = 0, + FireWeapon, + + NumTypes + }; + + struct Command + { + virtual~ Command() { } + + CommandType Type{}; + }; + + struct MoveCommand : Command + { + SBYTE Turn{}; // Positive is left (counterclockwise) + SBYTE LookUpDown{}; + SBYTE Run{}; // Positive is forward + SBYTE Side{}; // Positive is left + SBYTE Thrust{}; + DWORD Flags{}; + float TurnGoal{}; + P3D MoveGoal{}; + }; +} diff --git a/Sdk/Include/AI/Public/Components/BehaviorProcessor.h b/Sdk/Include/AI/Public/Components/BehaviorProcessor.h new file mode 100644 index 0000000..d67ae03 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/BehaviorProcessor.h @@ -0,0 +1,11 @@ +#pragma once + +#include "AI/Public/Behaviors/IBehavior.h" + +namespace AI +{ + struct BehaviorProcessor + { + std::vector Trees; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/CombatBehaviorState.h b/Sdk/Include/AI/Public/Components/CombatBehaviorState.h new file mode 100644 index 0000000..643ac3f --- /dev/null +++ b/Sdk/Include/AI/Public/Components/CombatBehaviorState.h @@ -0,0 +1,9 @@ +#pragma once + +namespace AI +{ + struct CombatBehaviorState + { + std::weak_ptr CurrentTarget{}; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/DodgeBehavior.cpp b/Sdk/Include/AI/Public/Components/DodgeBehavior.cpp new file mode 100644 index 0000000..b3a7e28 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/DodgeBehavior.cpp @@ -0,0 +1,179 @@ +#include "AI/Public/Behaviors/DodgeBehavior.h" +#include "AI/Public/Components/Senses.h" +#include "AI/Public/Components/MoveEnactor.h" +#include "AI/Public/Components/PhysicsView.h" +#include "GameObject/Public/Core.h" +#include "Navigation/Public/Core.h" +#include "projectile.h" + +namespace AI +{ + using namespace beehive; + using namespace ECS; + using namespace GameObj; + using namespace Nav; + + // TODO: Parameterize + const float DodgeStartTime = 5.0f; + const float DodgeSafeRadius = 10.0f; + + static bool DodgeTargetValid(const std::optional& target) + { + return target && !target->Projectile.expired(); + } + + static float GetTimeToImpact(const P3D& avoidLocation, const PhysicsView& projectilePhysics) + { + float timeToImpact = avoidLocation.Distance3D(projectilePhysics.Location) / projectilePhysics.Velocity.Length(); + return timeToImpact; + } + + static void AbortDodge(DodgeBehaviorState& state, BehaviorTreeContext& context) + { + state.DodgeTarget = std::nullopt; + context.ResetResumeIndex(); + } + + static std::optional ChooseDodgeTarget(Entity* entity, Senses& senses, const PhysicsView& objectPhysics) + { + Object* aiObject = entity->GetComponent().GetObj(); + + std::sort(senses.KnownEnemyProjectiles.begin(), senses.KnownEnemyProjectiles.end(), [&objectPhysics](const auto& e1, const auto& e2) + { + const PhysicsView& entity1Physics = e1->GetComponent(); + const PhysicsView& entity2Physics = e2->GetComponent(); + + return entity1Physics.Location.DistanceSquared3D(objectPhysics.Location) < entity2Physics.Location.DistanceSquared3D(objectPhysics.Location); + }); + + for (const auto& projectile : senses.KnownEnemyProjectiles) + { + Object* projectileObject = projectile->GetComponent().GetObj(); + const PhysicsView& projectilePhysics = projectile->GetComponent(); + ObjSpecProjectile* spec = ObjSpecProjectile::Cast(projectileObject); + + if (!spec->ground_collision) + { + TRACE_BEHAVIOR("Projectile is not expected to collide with ground"); + continue; + } + + const PROJ_Def* def = PROJ_DefGet(spec->projectile_index); + + float speed = projectileObject->velocity.Length(); + float timeToImpact = GetTimeToImpact(spec->ground_collision_position, projectilePhysics); + if (timeToImpact > DodgeStartTime) + { + TRACE_BEHAVIOR("Projectile time to impact of {0} is above threshold", timeToImpact); + continue; + } + + P3D aiProjectedLocation = aiObject->location + (aiObject->velocity * timeToImpact); + if (aiProjectedLocation.DistanceSquared3D(spec->ground_collision_position) < Squared(def->fardist)) + { + return DodgeTarget{ projectile, def->fardist, spec->ground_collision_position }; + } + } + + return std::nullopt; + } + + bool DodgeBehavior::ShouldStartDodge(Entity* entity, Object* object, BehaviorTreeContext& context) + { + if (!entity->HasComponent()) + return false; + + if (!World->navMesh) + return false; + + auto& state = entity->GetComponent(); + if (!DodgeTargetValid(state.DodgeTarget)) + { + // Make sure invalid targets have been cleared out + state.DodgeTarget = std::nullopt; + } + else + { + // Already have dodge in progress + return false; + } + + auto [senses, physicsView] = entity->GetComponent(); + + state.DodgeTarget = ChooseDodgeTarget(entity, senses, physicsView); + if (!state.DodgeTarget) + { + // No nearby threats + return false; + } + + float angleToCenter = dirfcalcp3d(&state.DodgeTarget->AvoidLocation, &physicsView.Location); + + float sa, ca; + calc_sincosd(dirfadjust(angleToCenter), &sa, &ca); + P3D wantLoc = state.DodgeTarget->AvoidLocation; + wantLoc.x += (state.DodgeTarget->ImpactRadius + DodgeSafeRadius) * ca; + wantLoc.y += (state.DodgeTarget->ImpactRadius + DodgeSafeRadius) * sa; + + MoveGoalParams params; + params.Speed = MoveGoalSpeed::Fast; + std::shared_ptr path = PathUtil::GetPath(entity, World->navMesh.get(), physicsView.Location, wantLoc, ¶ms); + if (!path) + { + TRACE_BEHAVIOR("Couldn't path away from projectile"); + return false; + } + + TRACE_BEHAVIOR("Dodging to location {0} {1} {2}, est. impact time {3}", + wantLoc.x, + wantLoc.y, + wantLoc.z, + GetTimeToImpact(state.DodgeTarget->AvoidLocation, state.DodgeTarget->Projectile.lock()->GetComponent())); + + PathUtil::SetPath(entity, path); + + context.SetChainingEnabled(false); + return true; + } + + Status DodgeBehavior::MoveToDodgeLocation(Entity* entity, Object* object, BehaviorTreeContext& context) + { + auto& state = entity->GetComponent(); + const auto& moveEnactor = entity->GetComponent(); + + if (!DodgeTargetValid(state.DodgeTarget) + || !moveEnactor.Path) + { + TRACE_BEHAVIOR("Dodge target no longer valid; canceling"); + AbortDodge(state, context); + return Status::SUCCESS; + } + + const auto& aiPhysics = entity->GetComponent(); + + float distanceFromImpactSq = state.DodgeTarget->AvoidLocation.DistanceSquared3D(aiPhysics.Location); + if (distanceFromImpactSq > Squared(state.DodgeTarget->ImpactRadius + DodgeSafeRadius)) + { + // Got far enough away somehow, we can reset + TRACE_BEHAVIOR("AI is {0} units away from impact location with still active dodge behavior; canceling", sqrt(distanceFromImpactSq)); + AbortDodge(state, context); + return Status::SUCCESS; + } + + return Status::RUNNING; + } + + InnerBehaviorTree DodgeBehavior::Build(ECS::Entity* entity) + { + assert(entity); + + DodgeBehaviorState& state = entity->AddComponent(); + + return BehaviorTreeBuilder() + .sequence() + .leaf(CreateLeaf(entity, ShouldStartDodge)) + .leaf(CreateLeaf(entity, MoveToDodgeLocation)) + .end() + .build(); + } +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/DodgeBehaviorState.h b/Sdk/Include/AI/Public/Components/DodgeBehaviorState.h new file mode 100644 index 0000000..d071076 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/DodgeBehaviorState.h @@ -0,0 +1,16 @@ +#pragma once + +namespace AI +{ + struct DodgeTarget + { + std::weak_ptr Projectile{}; + float ImpactRadius{}; + P3D AvoidLocation{}; + }; + + struct DodgeBehaviorState + { + std::optional DodgeTarget{}; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/GetEquipmentBehaviorState.h b/Sdk/Include/AI/Public/Components/GetEquipmentBehaviorState.h new file mode 100644 index 0000000..d95ab38 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/GetEquipmentBehaviorState.h @@ -0,0 +1,10 @@ +#pragma once + +namespace AI +{ + struct GetEquipmentBehaviorState + { + std::optional ShopWantLocation{}; + std::optional StuckTimer{}; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/Loadout.h b/Sdk/Include/AI/Public/Components/Loadout.h new file mode 100644 index 0000000..bce04c6 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/Loadout.h @@ -0,0 +1,26 @@ +#pragma once + +#include "GameObject/Public/Components/Inventory.h" + +namespace AI +{ + struct PlayerLoadout + { + PlayerLoadout(int primaryCapacity, int otherCapacity) + : DesiredInventory(primaryCapacity, otherCapacity) + { + } + + static PlayerLoadout CreateMeccLoadout() + { + return PlayerLoadout(GameObj::MeccNumPrimaryWeapons, GameObj::MeccNumOther); + } + + static PlayerLoadout CreateReaperLoadout() + { + return PlayerLoadout(GameObj::ReaperNumPrimaryWeapons, GameObj::ReaperNumOther); + } + + GameObj::InventoryState DesiredInventory; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/MoveEnactor.h b/Sdk/Include/AI/Public/Components/MoveEnactor.h new file mode 100644 index 0000000..518e288 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/MoveEnactor.h @@ -0,0 +1,41 @@ +#pragma once + +#include "AI/Public/Goal.h" +#include "Navigation/Public/Path.h" +#include "ECS/Public/Component.h" +#include "Core/Private/EventDispatcher.h" + +class Object; + +namespace AI +{ + const float DefaultBackwardsMaximumDistance = 30.0f; + const float DefaultBackwardsMaximumAngle = 40.0f; + const float DefaultSideMaximumDistance = 50.f; + const float DefaultSideMaximumAngle = 40.0f; + const float DefaultSlowDistance = 6.0f; + + struct MoveEnactor + { + MoveEnactor(Object* object); + + Object* Object; + + std::shared_ptr Path; + + float GetRunMaxSpeed() const { return RunMaxSpeed; } + float GetSlowRunSpeed() const { return SlowRunSpeed; } + float GetTurnMaxSpeed() const { return TurnMaxSpeed; } + + float BackwardsMaximumDistance = DefaultBackwardsMaximumDistance; // If distance to goal is below threshold, we'll walk/run backwards + float BackwardsMaximumAngle = DefaultBackwardsMaximumAngle; // If angle to goal is below threshold, we'll walk/run backwards + float SideMaximumDistance = DefaultSideMaximumDistance; // If distance to goal is below threshold, we'll sidestep + float SideMaximumAngle = DefaultSideMaximumAngle; // If angle to goal is below threshold, we'll sidestep + float SlowDistance = DefaultSlowDistance; // Distance at which we'll use the slow run speed instead of max (1.0f) + + private: + float RunMaxSpeed; + float SlowRunSpeed; + float TurnMaxSpeed; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/MoveEnactorDebug.h b/Sdk/Include/AI/Public/Components/MoveEnactorDebug.h new file mode 100644 index 0000000..7b7d8d1 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/MoveEnactorDebug.h @@ -0,0 +1,51 @@ +#pragma once + +#include "MoveEnactor.h" +#include "../MoveUtil.h" + +namespace ECS +{ +#ifdef ENABLE_DEBUG + template<> + inline void EntityComponentEditor(AI::MoveEnactor& component) + { + if (ImGui::TreeNode("MoveEnactor")) + { + if (component.Path) + { + for (size_t i = 0; i < component.Path->MoveGoals.size(); i++) + { + const auto& goal = component.Path->MoveGoals[i]; + + ImGui::Text("%sMoveGoal: %.2f %.2f %.2f", i == component.Path->CurrentGoal ? "(Active) " : "", goal.loc.x, goal.loc.y, goal.loc.z); + } + } + + if (ImGui::Button("Move to Player")) + { + /* + MoveUtil::ClearAllGoals(component); + + AI::MoveGoal goal; + goal.loc = PlayerObj->location; + FlagSet(goal.gflags, AI::GoalFlag::Is3D | AI::GoalFlag::Flyer); + + component.Path.MoveGoals.push_back(goal); + */ + } + + if (ImGui::Button("Clear Goals")) + { + AI::MoveUtil::ClearAllGoals(component); + } + + if (ImGui::Button("Move To Shop")) + { + + } + + ImGui::TreePop(); + } + } +#endif +} diff --git a/Sdk/Include/AI/Public/Components/PatrolBehaviorState.h b/Sdk/Include/AI/Public/Components/PatrolBehaviorState.h new file mode 100644 index 0000000..5a17674 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/PatrolBehaviorState.h @@ -0,0 +1,9 @@ +#pragma once + +namespace AI +{ + struct PatrolBehaviorState + { + P3D TargetLocation{}; + }; +} diff --git a/Sdk/Include/AI/Public/Components/PhysicsView.h b/Sdk/Include/AI/Public/Components/PhysicsView.h new file mode 100644 index 0000000..6b126f4 --- /dev/null +++ b/Sdk/Include/AI/Public/Components/PhysicsView.h @@ -0,0 +1,16 @@ +#pragma once + +namespace AI +{ + struct PhysicsView + { + P3D Location{}; + P3D LastLocation{}; + P3D Velocity{}; + P3D LastVelocity{}; + float Facing{}; + float Speed{}; + float ForwardSpeed{}; + P3D Force{}; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Components/Senses.h b/Sdk/Include/AI/Public/Components/Senses.h new file mode 100644 index 0000000..635249e --- /dev/null +++ b/Sdk/Include/AI/Public/Components/Senses.h @@ -0,0 +1,24 @@ +#pragma once + +#include "ECS/Public/Entity.h" + +namespace AI +{ + const float DefaultSightRange = 600.0f; + const float DefaultProjectileHearingRange = 300.0f; + const float DefaultMortarHearingRange = DefaultProjectileHearingRange * 2.0f; + + struct Senses + { + float EnemySightRange = DefaultSightRange; + float EnemyProjectileSightRange = DefaultSightRange; + float EnemyProjectileHearingRange = DefaultProjectileHearingRange; + float EnemyMortarHearingRange = DefaultMortarHearingRange; + + bool TrackEnemies = true; + bool TrackEnemyProjectiles = true; + + std::vector> KnownEnemies; + std::vector> KnownEnemyProjectiles; + }; +} diff --git a/Sdk/Include/AI/Public/Core.h b/Sdk/Include/AI/Public/Core.h new file mode 100644 index 0000000..cc4292d --- /dev/null +++ b/Sdk/Include/AI/Public/Core.h @@ -0,0 +1,27 @@ +#pragma once + +// Common components +#include "Command.h" +#include "Goal.h" +#include "Components/BehaviorProcessor.h" +#include "Components/Loadout.h" +#include "Components/MoveEnactor.h" +#include "Components/PhysicsView.h" +#include "Components/Senses.h" + +// Behaviors +#include "Behaviors/BehaviorBase.h" +#include "Behaviors/CombatBehavior.h" +#include "Behaviors/DodgeBehavior.h" +#include "Behaviors/GetEquipmentBehavior.h" +#include "Behaviors/PatrolBehavior.h" + +// Core utilities +#include "JetpackUtil.h" +#include "InputUtil.h" +#include "MoveUtil.h" + +// System +#include "BehaviorProcessorSystem.h" +#include "MoveEnactorSystem.h" +#include "SensesSystem.h" \ No newline at end of file diff --git a/Sdk/Include/AI/Public/Goal.h b/Sdk/Include/AI/Public/Goal.h new file mode 100644 index 0000000..3d77f28 --- /dev/null +++ b/Sdk/Include/AI/Public/Goal.h @@ -0,0 +1,53 @@ +#pragma once + +namespace AI +{ + enum class GoalFlag : unsigned int + { + None = 0x00000000, + Intermediate = 0x00000004, // This goal is an intermediate goal towards the final goal + Is3D = 0x00000008, // Goal complete when within specified radius of X, Y, and Z location + DirOnly = 0x00000020, // We care only about the direction part of the goal + GridOK = 0x00000040, // Goal is complete when in same grid (usually intermediate) + DirMove = 0x00000400, // Move in direction for specified amount of time; no specific target location + ClimbOK = 0x00000800, // Goal is complete if can climb on this grid + }; + + enum class MoveGoalSpeed + { + Normal = 0, + Fast = 1 + }; + + struct Goal + { + float timer{}; + GoalFlag gflags{}; + }; + + const float DefaultGoalCompleteDistance = 2.0f; + + struct MoveGoalParams + { + float GoalCompleteDistance = DefaultGoalCompleteDistance; + MoveGoalSpeed Speed = MoveGoalSpeed::Normal; + }; + + struct MoveGoal : Goal + { + MoveGoal() + { + } + + MoveGoal(MoveGoalParams& params) + : Params(params) + { + } + + P3D loc{}; + P3D sloc{}; + float dir{}; + + MoveGoalParams Params; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/InputSystem.h b/Sdk/Include/AI/Public/InputSystem.h new file mode 100644 index 0000000..5520020 --- /dev/null +++ b/Sdk/Include/AI/Public/InputSystem.h @@ -0,0 +1,15 @@ +#pragma once + +#include "ECS/Public/ISystem.h" + +namespace AI +{ + class InputSystem : public ECS::ISystem + { + virtual const char* GetName() override; + virtual int GetPriorityOrder() override; + virtual void Startup() override; + virtual void Update(float delta) override; + virtual void Shutdown() override; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/InputUtil.h b/Sdk/Include/AI/Public/InputUtil.h new file mode 100644 index 0000000..eaf6229 --- /dev/null +++ b/Sdk/Include/AI/Public/InputUtil.h @@ -0,0 +1,14 @@ +#pragma once + +#include "AI/Public/Command.h" +#include "ECS/Public/Entity.h" + +namespace AI +{ + class InputUtil + { + public: + static void AddCommand(ECS::Entity* entity, std::unique_ptr command); + static void StopMovement(ECS::Entity* entity); + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/JetpackUtil.h b/Sdk/Include/AI/Public/JetpackUtil.h new file mode 100644 index 0000000..c6726fe --- /dev/null +++ b/Sdk/Include/AI/Public/JetpackUtil.h @@ -0,0 +1,12 @@ +#pragma once + +#include "GameObject/Public/Components/Jetpack.h" + +namespace AI +{ + class JetpackUtil + { + public: + static SBYTE CalcThrustForHeight(GameObj::Jetpack& component, Object* obj, float minHeight, float height); + }; +} diff --git a/Sdk/Include/AI/Public/MoveEnactorSystem.h b/Sdk/Include/AI/Public/MoveEnactorSystem.h new file mode 100644 index 0000000..773cb4f --- /dev/null +++ b/Sdk/Include/AI/Public/MoveEnactorSystem.h @@ -0,0 +1,15 @@ +#pragma once + +#include "ECS/Public/ISystem.h" + +namespace AI +{ + class MoveEnactorSystem : public ECS::ISystem + { + virtual const char* GetName() override; + virtual int GetPriorityOrder() override; + virtual void Startup() override; + virtual void Update(float delta) override; + virtual void Shutdown() override; + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/MoveUtil.h b/Sdk/Include/AI/Public/MoveUtil.h new file mode 100644 index 0000000..cb2a386 --- /dev/null +++ b/Sdk/Include/AI/Public/MoveUtil.h @@ -0,0 +1,21 @@ +#pragma once + +#include "AI/Public/Goal.h" +#include "Components/MoveEnactor.h" +#include "Components/MoveEnactor.h" +#include "AI/Public/Command.h" +#include "Components/PhysicsView.h" + +namespace AI +{ + class MoveUtil + { + public: + static void ClearAllGoals(MoveEnactor& moveEnactor); + static void RemoveActiveGoal(MoveEnactor& moveEnactor); + static SBYTE GetTurnForAngle(float currentFacing, float newFacing); + static bool IsActiveGoalComplete(MoveEnactor& moveEnactor); + static bool ChaseAngle(ECS::Entity* entity, MoveEnactor& moveEnactor, PhysicsView& physicsView); + static bool ChaseLocation(ECS::Entity* entity, MoveEnactor& moveEnactor, PhysicsView& physicsView); + }; +} \ No newline at end of file diff --git a/Sdk/Include/AI/Public/SensesSystem.h b/Sdk/Include/AI/Public/SensesSystem.h new file mode 100644 index 0000000..b890c4c --- /dev/null +++ b/Sdk/Include/AI/Public/SensesSystem.h @@ -0,0 +1,16 @@ +#pragma once + +#include "ECS/Public/ISystem.h" + +namespace AI +{ + class SensesSystem : public ECS::ISystem + { + // Inherited via ISystem + virtual const char* GetName() override; + virtual int GetPriorityOrder() override; + virtual void Startup() override; + virtual void Update(float delta) override; + virtual void Shutdown() override; + }; +} \ No newline at end of file diff --git a/Sdk/Include/ComponentBase.h b/Sdk/Include/ComponentBase.h deleted file mode 100644 index 896fc0f..0000000 --- a/Sdk/Include/ComponentBase.h +++ /dev/null @@ -1,119 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#pragma warning (disable:26487) // Disable LIFETIMES_FUNCTION_POSTCONDITION_VIOLATION: not COM-aware - -template using NthTypeOf = -typename std::tuple_element>::type; - -///

-// Base class for game components, automatically implements IUnknown. -// On construction, registers self with the component container (which is guaranteed to exist -// for the lifetime of the game process, hence the naked pointer stored in this type). -// On final release, the component is removed from the container. -// -// Note: following C#'s example here, we don't allow multiple inheritance. -// The first interface is the "primary" interface that we inherit from. The additional interfaces -// expanded from the parameter pack are registered with the container, but not inherited. -/// -/// The interfaces that this component can be located by. -template -struct ComponentBase : public NthTypeOf<0, TInterfaces...> -{ - static constexpr std::array ImplementedIids = { { __uuidof(TInterfaces)... } }; - ComponentBase(IComponentContainer* container) - { - m_pContainer = container; - - AddInterfaces(); - } - - virtual ~ComponentBase() - { - RemoveInterfaces(); - } - - HRESULT STDMETHODCALLTYPE QueryInterface( - const GUID& riid, - _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) override - { - if (!ppvObject) - { - return E_INVALIDARG; - } - - *ppvObject = nullptr; - - if (riid == IID_IUnknown - || riid == IID_IComponent - || IsExplicitlyImplementedInterface(riid)) - { - *ppvObject = this; - return S_OK; - } - - return E_NOINTERFACE; - } - - unsigned long STDMETHODCALLTYPE AddRef() override - { - InterlockedIncrement(&m_refs); - return m_refs; - } - - unsigned long STDMETHODCALLTYPE Release() override - { - const unsigned long refCount = InterlockedDecrement(&m_refs); - assert(refCount >= 0); - if (refCount == 0) - { - delete this; - } - - return refCount; - } - -protected: - IComponentContainer* m_pContainer = nullptr; - -private: - void AddInterfaces() - { - for (const auto& implementedIid : ImplementedIids) - { - m_pContainer->Add(implementedIid, this); - } - } - - bool IsExplicitlyImplementedInterface(const GUID& iid) noexcept - { - for (const auto& implementedIid : ImplementedIids) - { - if (IsEqualGUID(implementedIid, iid)) - { - return true; - } - } - - return false; - } - - void RemoveInterfaces() noexcept - { - for (const auto& implementedIid : ImplementedIids) - { - m_pContainer->Remove(implementedIid); - } - } - - unsigned long m_refs = 0; -}; - -#pragma warning (default:26487) \ No newline at end of file diff --git a/Sdk/Include/Core/Public/BBox.h b/Sdk/Include/Core/Public/BBox.h new file mode 100644 index 0000000..22119b2 --- /dev/null +++ b/Sdk/Include/Core/Public/BBox.h @@ -0,0 +1,8 @@ +#pragma once + +// Bounding box definition +struct BBOX +{ + P3D min; + P3D max; +}; \ No newline at end of file diff --git a/Sdk/Include/ConfigConstants.h b/Sdk/Include/Core/Public/ConfigConstants.h similarity index 100% rename from Sdk/Include/ConfigConstants.h rename to Sdk/Include/Core/Public/ConfigConstants.h diff --git a/Sdk/Include/ConfigEvents.h b/Sdk/Include/Core/Public/ConfigEvents.h similarity index 100% rename from Sdk/Include/ConfigEvents.h rename to Sdk/Include/Core/Public/ConfigEvents.h diff --git a/Sdk/Include/Core/Public/Core.h b/Sdk/Include/Core/Public/Core.h new file mode 100644 index 0000000..cd9a84f --- /dev/null +++ b/Sdk/Include/Core/Public/Core.h @@ -0,0 +1,19 @@ +#pragma once + +// Core data structures +#include "DataTypes.h" +#include "BBox.h" +#include "MinMax.h" +#include "Plane.h" +#include "Quaternion.h" + +// Services +#include "IGameService.h" +#include "IGameServiceProvider.h" + +// Events +#include "IEventSource.h" + +// Config +#include "ConfigConstants.h" +#include "ConfigEvents.h" \ No newline at end of file diff --git a/Sdk/Include/Core/Public/DataTypes.h b/Sdk/Include/Core/Public/DataTypes.h new file mode 100644 index 0000000..b97299d --- /dev/null +++ b/Sdk/Include/Core/Public/DataTypes.h @@ -0,0 +1,284 @@ +#pragma once + +#include +#include +#include +#include +#include + +////////////////////////////////////////////////////////////////////////////////////// +// Basic game data types and macros + +#define ADJUSTABLE 1 + +#define TRUE 1 +#define FALSE 0 + +typedef unsigned int uint; +typedef unsigned char UBYTE; +typedef signed char SBYTE; +typedef unsigned short UWORD; +typedef int BOOL; +typedef unsigned long ULONG; +typedef unsigned long DWORD; +typedef std::int64_t int64; +typedef std::uint64_t uint64; +#ifdef UNICODE +typedef std::wstring tstring; +typedef std::wstring_view tstring_view; +#else +typedef std::string tstring; +typedef std::string_view tstring_view; +#endif + +#define countof(array) (sizeof((array)) / sizeof((array)[0])) + +#define FlagSet(b, f) ((b) |= (f)) +#define FlagClear(b, f) ((b) &= ~(f)) +#define FlagIsClear(b, f) (!FlagIsSet(b, f)) +#define FlagIsClearE(b, f) (!FlagIsSetE(b, f)) +#define FlagFlip(b, f) ((b) ^= (f)) +#define FlagIsSet(b, f) (((b) & (f)) != 0) +#define FlagIsSetE(b, f) (((b) & (f)) == f) + +#define PI (3.14159265358979f) + +////////////////////////////////////////////////////////////////////////////////////// +// Vectors + +struct P4D +{ + P4D() noexcept { } + explicit P4D(float x, float y, float z, float w) noexcept : x(x), y(y), z(z), w(w) { } + + float x, y, z, w; +}; + +struct P3D +{ + float x; + float y; + float z; + + inline const P3D& operator -= (const P3D& other) + { + x -= other.x; + y -= other.y; + z -= other.z; + + return *this; + } + + inline const P3D& operator += (const P3D& other) + { + x += other.x; + y += other.y; + z += other.z; + + return *this; + } + + inline const P3D& operator *= (float scale) + { + x *= scale; + y *= scale; + z *= scale; + + return *this; + } + + inline P3D operator * (float scale) const + { + return Scale(scale); + } + + inline P3D operator - (const P3D& other) const + { + P3D result; + result.x = x - other.x; + result.y = y - other.y; + result.z = z - other.z; + + return result; + } + + inline P3D operator + (const P3D& other) + { + P3D result; + result.x = x + other.x; + result.y = y + other.y; + result.z = z + other.z; + + return result; + } + + inline P3D Cross(const P3D& other) const + { + P3D result; + + result.x = y * other.z - z * other.y; + result.y = z * other.x - x * other.z; + result.z = x * other.y - y * other.x; + + return result; + } + + inline float Dot(const P3D& v2) const + { + return x * v2.x + y * v2.y + z * v2.z; + } + + inline float Length() const + { + return (float)(sqrt(x * x + y * y + z * z)); + } + + inline P3D Scale(float scale) const + { + P3D scaled; + + scaled.x = x * scale; + scaled.y = y * scale; + scaled.z = z * scale; + + return scaled; + } + + inline P3D Normalize() const + { + const float length = Length(); + P3D normalized = *this; + + float factor = 0.0f; + if (length) + { + factor = 1 / length; + } + else + { + factor = 1.0f; + } + + normalized.x *= factor; + normalized.y *= factor; + normalized.z *= factor; + + return normalized; + } + + inline bool IsNaN() const + { + return (_isnan(x) || _isnan(y) || _isnan(z)); + } + + inline bool Finite() const + { + return (_finite(x) && _finite(y) && _finite(z)); + } + + inline float Distance2D(const P3D& other) const + { + return ((float)sqrt((other.x - this->x) * (other.x - this->x) + (other.y - this->y) * (other.y - this->y))); + } + + inline float DistanceSquared2D(const P3D& other) const + { + return ((float)((other.x - this->x) * (other.x - this->x) + (other.y - this->y) * (other.y - this->y))); + } + + inline float Distance3D(const P3D& other) const + { + return ((float)sqrt((other.x - this->x) * (other.x - this->x) + (other.y - this->y) * (other.y - this->y) + (other.z - this->z) * (other.z - this->z))); + } + + inline float DistanceSquared3D(const P3D& other) const + { + return ((float)((other.x - this->x) * (other.x - this->x) + (other.y - this->y) * (other.y - this->y) + (other.z - this->z) * (other.z - this->z))); + } + + inline bool Empty() const + { + return this->x == 0.0f && this->y == 0.0f && this->z == 0.0f; + } + + inline P4D AsP4D() const + { + P4D temp; + temp.x = this->x; + temp.y = this->y; + temp.z = this->z; + temp.w = 0.0f; + + return temp; + } +}; + +#pragma pack (push, 1) +// Optimized 3D vector for network packets. +struct NetP3D +{ + short x, y, z; +}; +#pragma pack (pop) + +////////////////////////////////////////////////////////////////////////////////////// + +// Matrix + +struct M4X4 +{ + union + { + struct + { + float _11, _12, _13, _14; + float _21, _22, _23, _24; + float _31, _32, _33, _34; + float _41, _42, _43, _44; + }; + float m[4][4]; + }; + + inline M4X4 operator*(const M4X4& pm2) + { + M4X4 out; + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + out.m[i][j] = m[i][0] * pm2.m[0][j] + m[i][1] * pm2.m[1][j] + m[i][2] * pm2.m[2][j] + m[i][3] * pm2.m[3][j]; + } + } + + return out; + } + + bool operator==(const M4X4& other) const + { + return memcmp(this, &other, sizeof(*this)) == 0; + } + + bool operator!=(const M4X4& other) const + { + return !(*this == other); + } +}; + +struct RGBFloat +{ + float r{}; + float g{}; + float b{}; +}; + +struct VertRGB +{ + unsigned char r; + unsigned char g; + unsigned char b; +}; + +struct UV +{ + float u, v; +}; \ No newline at end of file diff --git a/Sdk/Include/Core/Public/GLUtil.h b/Sdk/Include/Core/Public/GLUtil.h new file mode 100644 index 0000000..60a2f3a --- /dev/null +++ b/Sdk/Include/Core/Public/GLUtil.h @@ -0,0 +1,22 @@ +#pragma once + +// Return a new point in the coordinate system typically used by OpenGL applications. +inline P3D GameToGLPoint(const P3D& gamePoint) +{ + P3D newPoint; + newPoint.x = gamePoint.x * -1.0f; + newPoint.y = gamePoint.z; + newPoint.z = gamePoint.y; + return newPoint; +} + +// Return a new point in the game's coordinate system from an OpenGL point. +inline static P3D GLToGamePoint(const P3D& glPoint) +{ + P3D newPoint; + newPoint.x = glPoint.x * -1.0f; + newPoint.z = glPoint.y; + newPoint.y = glPoint.z; + + return newPoint; +} diff --git a/Sdk/Include/Core/Public/IConfig.h b/Sdk/Include/Core/Public/IConfig.h new file mode 100644 index 0000000..a3a5767 --- /dev/null +++ b/Sdk/Include/Core/Public/IConfig.h @@ -0,0 +1,36 @@ +#pragma once + +#include "Core/Public/Core.h" + +DEFINE_SERVICE_MULTI("{599E6624-694C-41B6-B354-62EEA1132041}", IConfig, IEventSource) +{ + virtual ~IConfig() = default; + + virtual void Read() = 0; + + virtual void Save() = 0; + + virtual float GetFloat(const tstring_view& section, const tstring_view& setting) const = 0; + + virtual std::vector GetFloatArray(const tstring_view& section, const tstring_view& setting) const = 0; + + virtual int GetInteger(const tstring_view& section, const tstring_view& setting) const = 0; + + virtual std::vector GetIntegerArray(const tstring_view& section, const tstring_view& setting) const = 0; + + virtual tstring GetString(const tstring_view& section, const tstring_view& setting) const = 0; + + virtual std::vector GetStringArray(const tstring_view& section, const tstring_view& setting) const = 0; + + virtual void SetFloat(const tstring_view& section, const tstring_view& setting, float value) = 0; + + virtual void SetFloatArray(const tstring_view& section, const tstring_view& setting, std::vector&& values) = 0; + + virtual void SetInteger(const tstring_view& section, const tstring_view& setting, int value) = 0; + + virtual void SetIntegerArray(const tstring_view& section, const tstring_view& setting, std::vector&& values) = 0; + + virtual void SetString(const tstring_view& section, const tstring_view& setting, tstring_view value) = 0; + + virtual void SetStringArray(const tstring_view& section, const tstring_view& setting, std::vector&& values) = 0; +}; \ No newline at end of file diff --git a/Sdk/Include/Core/Public/IEventSource.h b/Sdk/Include/Core/Public/IEventSource.h new file mode 100644 index 0000000..a30ea7c --- /dev/null +++ b/Sdk/Include/Core/Public/IEventSource.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +template +struct IEventSource +{ + virtual ~IEventSource() = default; + + virtual GUID Listen(TEventType event, std::function function) noexcept = 0; + virtual void Unlisten(TEventType event, GUID uuid) noexcept = 0; +}; \ No newline at end of file diff --git a/Sdk/Include/Core/Public/IExceptionHandler.h b/Sdk/Include/Core/Public/IExceptionHandler.h new file mode 100644 index 0000000..a701a82 --- /dev/null +++ b/Sdk/Include/Core/Public/IExceptionHandler.h @@ -0,0 +1,14 @@ +#pragma once + +#include "IGameService.h" + +DEFINE_SERVICE("{9C4C8F9C-D4C1-4749-A073-D710548D3154}", IExceptionHandler) +{ + virtual ~IExceptionHandler() = default; + + virtual void AttachToCurrentThread() = 0; + virtual void DetachFromCurrentThread() = 0; + virtual void Initialize() = 0; + virtual void PostLoad() = 0; + virtual void Shutdown() = 0; +}; \ No newline at end of file diff --git a/Sdk/Include/Core/Public/IGameService.h b/Sdk/Include/Core/Public/IGameService.h new file mode 100644 index 0000000..50039b7 --- /dev/null +++ b/Sdk/Include/Core/Public/IGameService.h @@ -0,0 +1,24 @@ +#pragma once + +#ifndef DECLSPEC_UUID +#if (_MSC_VER >= 1100) && defined(__cplusplus) +#define DECLSPEC_UUID(x) __declspec(uuid(x)) +#else +#define DECLSPEC_UUID(x) +#endif +#endif + +#define DEFINE_SERVICE(iid, iface) \ +struct DECLSPEC_UUID(iid) iface; \ +struct iface : IGameService \ + +#define DEFINE_SERVICE_MULTI(iid, iface, ...) \ +struct DECLSPEC_UUID(iid) iface; \ +struct iface : IGameService, __VA_ARGS__ \ + +/// +/// Base interface for a game service. +/// +struct IGameService +{ +}; diff --git a/Sdk/Include/Core/Public/IGameServiceProvider.h b/Sdk/Include/Core/Public/IGameServiceProvider.h new file mode 100644 index 0000000..8acd60a --- /dev/null +++ b/Sdk/Include/Core/Public/IGameServiceProvider.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include + +#include "IGameService.h" + +template +struct TypedGet +{ + template + std::shared_ptr Get() + { + std::shared_ptr pComponent = ((T*)this)->Get(__uuidof(TGet)); + if (pComponent) + { + return std::static_pointer_cast(pComponent); + } + + return nullptr; + } +}; + +template +struct TypedAdd +{ + template + void Add(std::shared_ptr component) + { + static constexpr std::array InterfaceUuids = { { __uuidof(TInterfaces)... } }; + + for (const auto& uuid : InterfaceUuids) + { + ((T*)this)->Add(uuid, component); + } + } +}; + +/// +/// Container for game components. +/// Facilitates resource acquisition across module boundaries, and interop with .NET code. +/// +struct IGameServiceProvider : public TypedGet, public TypedAdd +{ + virtual ~IGameServiceProvider() { } + + virtual void Add(const IID& iid, std::shared_ptr component) = 0; + virtual std::shared_ptr Get(const IID& iid) noexcept = 0; + virtual void Remove(const IID& iid) noexcept = 0; + virtual void ReleaseAll() = 0; + + template + void Add(std::shared_ptr component) + { + return TypedAdd::Add(component); + } + + template + std::shared_ptr Get() + { + return TypedGet::Get(); + } +}; \ No newline at end of file diff --git a/Sdk/Include/ITextLookupService.h b/Sdk/Include/Core/Public/ITextLookupService.h similarity index 79% rename from Sdk/Include/ITextLookupService.h rename to Sdk/Include/Core/Public/ITextLookupService.h index 12f05a4..47b40b2 100644 --- a/Sdk/Include/ITextLookupService.h +++ b/Sdk/Include/Core/Public/ITextLookupService.h @@ -1,7 +1,7 @@ #pragma once -#include -#include +#include "DataTypes.h" +#include "IGameService.h" #include enum class NetPlayerState; @@ -13,7 +13,7 @@ inline const GUID IID_ITextLookupService = { 0x770debd3, 0x165d, 0x4340, 0x82, 0 /// /// Service providing localization of text placeholders and friendly-name mappings of common enums. /// -struct ITextLookupService : public IComponent +DEFINE_SERVICE("{770DEBD3-165D-4340-829D-5262F473FBE3}", ITextLookupService) { virtual std::string STDMETHODCALLTYPE GetLocalized(tstring_view lookup) = 0; @@ -22,6 +22,4 @@ struct ITextLookupService : public IComponent virtual std::string STDMETHODCALLTYPE GetGameTeamName(GameTeam team) = 0; virtual std::string STDMETHODCALLTYPE GetPlayerTeamName(int teamIndex) = 0; -}; - -struct DECLSPEC_UUID("{770DEBD3-165D-4340-829D-5262F473FBE3}") ITextLookupService; +}; \ No newline at end of file diff --git a/Sdk/Include/Core/Public/MinMax.h b/Sdk/Include/Core/Public/MinMax.h new file mode 100644 index 0000000..685de87 --- /dev/null +++ b/Sdk/Include/Core/Public/MinMax.h @@ -0,0 +1,11 @@ +#pragma once + +struct MinMaxInt +{ + int min, max; +}; + +struct MinMaxFloat +{ + float min, max; +}; \ No newline at end of file diff --git a/Sdk/Include/Core/Public/Plane.h b/Sdk/Include/Core/Public/Plane.h new file mode 100644 index 0000000..a19e7ce --- /dev/null +++ b/Sdk/Include/Core/Public/Plane.h @@ -0,0 +1,6 @@ +#pragma once + +struct PLANE3D +{ + float a, b, c, d; +}; // plane equation diff --git a/Sdk/Include/Core/Public/Quaternion.h b/Sdk/Include/Core/Public/Quaternion.h new file mode 100644 index 0000000..e85fb6a --- /dev/null +++ b/Sdk/Include/Core/Public/Quaternion.h @@ -0,0 +1,6 @@ +#pragma once + +struct QUAT +{ + float x, y, z, w; +}; diff --git a/Sdk/Include/Version.h b/Sdk/Include/Core/Public/Version.h similarity index 100% rename from Sdk/Include/Version.h rename to Sdk/Include/Core/Public/Version.h diff --git a/Sdk/Include/Core/Public/VersionUtil.h b/Sdk/Include/Core/Public/VersionUtil.h new file mode 100644 index 0000000..a881ffe --- /dev/null +++ b/Sdk/Include/Core/Public/VersionUtil.h @@ -0,0 +1,8 @@ +#pragma once + +#include "Version.h" + +const tstring GetAppName(); +const Version& GetAppVersion(); +int VersionToInt(const Version& version); +void GameVersionRender(); \ No newline at end of file diff --git a/Sdk/Include/DataTypes.h b/Sdk/Include/DataTypes.h deleted file mode 100644 index 638b576..0000000 --- a/Sdk/Include/DataTypes.h +++ /dev/null @@ -1,165 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -////////////////////////////////////////////////////////////////////////////////////// -// Basic game data types and macros - -typedef unsigned int uint; -typedef unsigned char UBYTE; -typedef signed char SBYTE; -typedef unsigned short UWORD; -typedef int BOOL; -typedef unsigned long ULONG; -typedef unsigned long DWORD; -typedef std::int64_t int64; -typedef std::uint64_t uint64; -#ifdef UNICODE -typedef std::wstring tstring; -typedef std::wstring_view tstring_view; -#else -typedef std::string tstring; -typedef std::string_view tstring_view; -#endif - -#define countof(array) (sizeof((array)) / sizeof((array)[0])) - -#define FlagSet(b, f) ((b) |= (f)) -#define FlagClear(b, f) ((b) &= ~(f)) -#define FlagIsClear(b, f) (!FlagIsSet(b, f)) -#define FlagFlip(b, f) ((b) ^= (f)) -#define FlagIsSet(b, f) (((b) & (f)) != 0) - -////////////////////////////////////////////////////////////////////////////////////// -// Vectors - -struct P3D -{ - float x; - float y; - float z; - - inline const P3D& operator -= (const P3D& rhs) - { - x -= rhs.x; - y -= rhs.y; - z -= rhs.z; - - return *this; - } - - inline const P3D& operator += (const P3D& rhs) - { - x += rhs.x; - y += rhs.y; - z += rhs.z; - - return *this; - } - - inline P3D operator - (const P3D& rhs) - { - P3D result; - - result.x = x - rhs.x; - result.y = y - rhs.y; - result.z = z - rhs.z; - - return result; - } - - inline P3D Cross(const P3D& v2) - { - P3D result; - - result.x = y * v2.z - z * v2.y; - result.y = z * v2.x - x * v2.z; - result.z = x * v2.y - y * v2.x; - - return result; - } - - inline float Dot(const P3D& v2) - { - return x * v2.x + y * v2.y + z * v2.z; - } - - inline float Length() - { - return (float)(sqrt(x * x + y * y + z * z)); - } - - inline P3D Scale(float scale) - { - x *= scale; - y *= scale; - z *= scale; - - return *this; - } - - inline P3D Normalize() - { - const float length = Length(); - P3D normalized = *this; - - float factor = 0.0f; - if (length) - { - factor = 1 / length; - } - else - { - factor = 1.0f; - } - - normalized.x *= factor; - normalized.y *= factor; - normalized.z *= factor; - - return normalized; - } - - bool IsNaN() const - { - return (_isnan(x) || _isnan(y) || _isnan(z)); - } - - bool Finite() const - { - return (_finite(x) && _finite(y) && _finite(z)); - } -}; - -#pragma pack (push, 1) -// Optimized 3D vector for network packets. -struct NetP3D -{ - short x, y, z; -}; -#pragma pack (pop) - -////////////////////////////////////////////////////////////////////////////////////// - -struct RGBFloat -{ - float r{}; - float g{}; - float b{}; -}; - -struct VertRGB -{ - unsigned char r; - unsigned char g; - unsigned char b; -}; - -struct UV -{ - float u, v; -}; \ No newline at end of file diff --git a/Sdk/Include/ECS/Public/Component.h b/Sdk/Include/ECS/Public/Component.h new file mode 100644 index 0000000..d160915 --- /dev/null +++ b/Sdk/Include/ECS/Public/Component.h @@ -0,0 +1,57 @@ +#pragma once + +namespace ECS +{ +#ifdef ENABLE_DEBUG + template + void EntityComponentEditor(TComponent& component) + { + static_assert(false, "No specialization has been defined for the component."); + } + + template + inline T EntityValueParser(const std::string& inputValue) + { + static_assert(false, "No parser is defined for the property type."); + } + + template<> + inline float EntityValueParser(const std::string& inputValue) + { + return std::stof(inputValue); + } + + template<> + inline int EntityValueParser(const std::string& inputValue) + { + return std::stoi(inputValue); + } + + template<> + inline SBYTE EntityValueParser(const std::string& inputValue) + { + return std::stoi(inputValue); + } + + template + inline void EntityPropertyEditor(const char* propertyName, T& value) + { + std::string valueAsString = fmt::format("{0}", value); + if (ImGui::InputText(propertyName, &valueAsString)) + { + if (!valueAsString.empty()) + { + try + { + value = EntityValueParser(valueAsString); + } + catch (const std::exception&) + { + // Ignore + } + } + } + } + +#endif +} \ No newline at end of file diff --git a/Sdk/Include/ECS/Public/Core.h b/Sdk/Include/ECS/Public/Core.h new file mode 100644 index 0000000..3e2c8a4 --- /dev/null +++ b/Sdk/Include/ECS/Public/Core.h @@ -0,0 +1,10 @@ +#pragma once + +// Common components +#include "Component.h" +#include "Entity.h" + +// Systems +#include "ISystem.h" +#include "SystemManager.h" +#include "SystemPriority.h" \ No newline at end of file diff --git a/Sdk/Include/ECS/Public/Entity.h b/Sdk/Include/ECS/Public/Entity.h new file mode 100644 index 0000000..366081a --- /dev/null +++ b/Sdk/Include/ECS/Public/Entity.h @@ -0,0 +1,77 @@ +#pragma once + +#include "Core/Public/Core.h" +#include + +namespace ECS +{ + class Entity + { + public: + Entity(entt::entity entity, entt::registry& registry) + : m_registry(registry) + { + m_entity = entity; + } + + Entity(entt::registry& registry) + : m_registry(registry) + { + m_entity = m_registry.create(); + } + + void Destroy() + { + m_registry.destroy(m_entity); + } + + template + T& AddComponent(Args&&... args) + { + assert(!HasComponent()); + return m_registry.emplace(m_entity, std::forward(args)...); + } + + template + void AddOrReplaceComponent(Args&&... args) + { + m_registry.emplace_or_replace(m_entity, std::forward(args)...); + } + + template + decltype(auto) GetComponent() + { + return m_registry.get(m_entity); + } + + template + bool HasComponent() + { + return m_registry.has(m_entity); + } + + template + T& RemoveComponent(Args&&... args) + { + assert(HasComponent()); + return m_registry.remove(m_entity, std::forward(args)...); + } + + template + std::size_t RemoveComponentIfExists(Args&&... args) + { + return m_registry.remove_if_exists(m_entity, std::forward(args)...); + } + + template + void PatchComponent(Func &&... func) + { + assert(HasComponent()); + m_registry.patch(m_entity, std::forward(func)...); + } + + private: + entt::entity m_entity; + entt::registry& m_registry; + }; +} diff --git a/Sdk/Include/ECS/Public/EntityRegistry.h b/Sdk/Include/ECS/Public/EntityRegistry.h new file mode 100644 index 0000000..e739c61 --- /dev/null +++ b/Sdk/Include/ECS/Public/EntityRegistry.h @@ -0,0 +1,8 @@ +#pragma once + +#include "entt/entt.hpp" + +void EntityRegistryCreate(); +void EntityRegistryDestroy(); + +extern std::unique_ptr Registry; \ No newline at end of file diff --git a/Sdk/Include/ECS/Public/ISystem.h b/Sdk/Include/ECS/Public/ISystem.h new file mode 100644 index 0000000..a5a434e --- /dev/null +++ b/Sdk/Include/ECS/Public/ISystem.h @@ -0,0 +1,15 @@ +#pragma once + +namespace ECS +{ + struct ISystem + { + virtual ~ISystem() { } + + virtual const char* GetName() = 0; + virtual int GetPriorityOrder() = 0; + virtual void Startup() = 0; + virtual void Update(float delta) = 0; + virtual void Shutdown() = 0; + }; +} \ No newline at end of file diff --git a/Sdk/Include/ECS/Public/SystemManager.h b/Sdk/Include/ECS/Public/SystemManager.h new file mode 100644 index 0000000..e3faf66 --- /dev/null +++ b/Sdk/Include/ECS/Public/SystemManager.h @@ -0,0 +1,23 @@ +#pragma once + +#include "ECS/Public/ISystem.h" + +namespace ECS +{ + class SystemManager + { + public: + template + void AddSystem() + { + m_systems.push_back(std::make_unique()); + } + + void StartSystems(); + void UpdateSystems(float delta); + void ShutdownSystems(); + + private: + std::vector> m_systems; + }; +} \ No newline at end of file diff --git a/Sdk/Include/ECS/Public/SystemPriority.h b/Sdk/Include/ECS/Public/SystemPriority.h new file mode 100644 index 0000000..48cf1a9 --- /dev/null +++ b/Sdk/Include/ECS/Public/SystemPriority.h @@ -0,0 +1,13 @@ +#pragma once + +namespace ECS +{ + // TODO: DAG with constraints. This will work for now. + enum class SystemPriority : int + { + Senses = 0, + BehaviorProcessor = 1, + MoveEnactor = 2, + Input = 3, + }; +} \ No newline at end of file diff --git a/Sdk/Include/GameObject/Public/Components/Inventory.h b/Sdk/Include/GameObject/Public/Components/Inventory.h new file mode 100644 index 0000000..f5aeefa --- /dev/null +++ b/Sdk/Include/GameObject/Public/Components/Inventory.h @@ -0,0 +1,57 @@ +#pragma once + +namespace GameObj +{ + const int MeccNumPrimaryWeapons = 4; + const int MeccNumOther = 3; + + const int ReaperNumPrimaryWeapons = 4; + const int ReaperMaxSpells = 5; + const int ReaperNumOther = 2; + + struct InventoryIcon + { + int IconId{}; + int Count{}; + }; + + struct SpellInventoryIcon + { + int IconId{}; + float ManaCost{}; + float Energy{}; + }; + + struct InventoryState + { + InventoryState(int primaryCapacity, int otherCapacity) + { + PrimaryIcons.resize(primaryCapacity); + OtherIcons.resize(otherCapacity); + } + + static InventoryState CreateMeccInventory() + { + return InventoryState(MeccNumPrimaryWeapons, MeccNumOther); + } + + static InventoryState CreateReaperInventory() + { + return InventoryState(ReaperNumPrimaryWeapons, ReaperNumOther); + } + + std::vector PrimaryIcons; + std::vector OtherIcons; + InventoryIcon SpecialIcon; + }; + + struct SpellInventoryState + { + SpellInventoryState(int numSpells = ReaperMaxSpells) + { + SpellIcons.reserve(numSpells); + } + + std::vector SpellIcons; + }; +} \ No newline at end of file diff --git a/Sdk/Include/GameObject/Public/Components/Jetpack.h b/Sdk/Include/GameObject/Public/Components/Jetpack.h new file mode 100644 index 0000000..7734817 --- /dev/null +++ b/Sdk/Include/GameObject/Public/Components/Jetpack.h @@ -0,0 +1,27 @@ +#pragma once + +namespace GameObj +{ + struct ThrustParameters + { + float ThrustMaxHeight{}; + float ThrustPower{}; + float ThrustLowPct{}; + float ThrustHighPct{}; + float ThrustPowerMax{}; + float ThrustFwdSlowMax{}; + float ThrustDrowning{}; + }; + + struct Jetpack + { + Jetpack( + const ThrustParameters& thrustParameters) + : ThrustParameters(thrustParameters) + { + } + + ThrustParameters ThrustParameters; + SBYTE ThrustControl{}; // Thrust control state + }; +} \ No newline at end of file diff --git a/Sdk/Include/GameObject/Public/Components/JetpackDebug.h b/Sdk/Include/GameObject/Public/Components/JetpackDebug.h new file mode 100644 index 0000000..d0b5f15 --- /dev/null +++ b/Sdk/Include/GameObject/Public/Components/JetpackDebug.h @@ -0,0 +1,26 @@ +#pragma once + +#include "Jetpack.h" + +namespace ECS +{ +#ifdef ENABLE_DEBUG + template<> + inline void EntityComponentEditor(GameObj::Jetpack& component) + { + if (ImGui::TreeNode("Jetpack")) + { + EntityPropertyEditor("ThrustMaxHeight", component.ThrustParameters.ThrustMaxHeight); + EntityPropertyEditor("ThrustPower", component.ThrustParameters.ThrustPower); + EntityPropertyEditor("ThrustLowPct", component.ThrustParameters.ThrustLowPct); + EntityPropertyEditor("ThrustHighPct", component.ThrustParameters.ThrustHighPct); + EntityPropertyEditor("ThrustPowerMax", component.ThrustParameters.ThrustPowerMax); + EntityPropertyEditor("ThrustFwdSlowMax", component.ThrustParameters.ThrustDrowning); + EntityPropertyEditor("ThrustDrowning", component.ThrustParameters.ThrustFwdSlowMax); + EntityPropertyEditor("ThrustMode", component.ThrustControl); + + ImGui::TreePop(); + } + } +#endif +} \ No newline at end of file diff --git a/Sdk/Include/GameObject/Public/Components/ObjectDead.h b/Sdk/Include/GameObject/Public/Components/ObjectDead.h new file mode 100644 index 0000000..b3de1a8 --- /dev/null +++ b/Sdk/Include/GameObject/Public/Components/ObjectDead.h @@ -0,0 +1,9 @@ +#pragma once + +namespace GameObj +{ + struct ObjectDead + { + + }; +} diff --git a/Sdk/Include/GameObject/Public/Components/ObjectRef.h b/Sdk/Include/GameObject/Public/Components/ObjectRef.h new file mode 100644 index 0000000..f649788 --- /dev/null +++ b/Sdk/Include/GameObject/Public/Components/ObjectRef.h @@ -0,0 +1,17 @@ +#pragma once + +class Object; + +namespace GameObj +{ + // Interface back to legacy Object pointer from ECS land. + struct ObjectRef + { + ObjectRef(Object* object) : Object(object) { } + + [[nodiscard]] Object* GetObj() const { return this->Object; } + + private: + Object* Object; + }; +} \ No newline at end of file diff --git a/Sdk/Include/GameObject/Public/Core.h b/Sdk/Include/GameObject/Public/Core.h new file mode 100644 index 0000000..69b1725 --- /dev/null +++ b/Sdk/Include/GameObject/Public/Core.h @@ -0,0 +1,11 @@ +#pragma once + +// Components +#include "Components/Inventory.h" +#include "Components/ObjectDead.h" +#include "Components/ObjectRef.h" +#include "Components/Jetpack.h" + +// Core utilities +#include "InventoryUtil.h" +#include "ObjectUtil.h" \ No newline at end of file diff --git a/Sdk/Include/GameObject/Public/InventoryUtil.h b/Sdk/Include/GameObject/Public/InventoryUtil.h new file mode 100644 index 0000000..52a692b --- /dev/null +++ b/Sdk/Include/GameObject/Public/InventoryUtil.h @@ -0,0 +1,13 @@ +#pragma once + +#include "ECS/Public/Entity.h" + +namespace GameObj +{ + class InventoryUtil + { + public: + static void ClearInventory(ECS::Entity* entity); + static void SetPrimaryWeapon(ECS::Entity* entity, IconId icon, int slot, int ammo); + }; +} diff --git a/Sdk/Include/GameObject/Public/ObjectUtil.h b/Sdk/Include/GameObject/Public/ObjectUtil.h new file mode 100644 index 0000000..b096de7 --- /dev/null +++ b/Sdk/Include/GameObject/Public/ObjectUtil.h @@ -0,0 +1,11 @@ +#pragma once + +namespace GameObj +{ + class ObjectUtil + { + public: + static void NotifyDead(ECS::Entity* entity); + static void NotifyRespawned(ECS::Entity* entity); + }; +} \ No newline at end of file diff --git a/Sdk/Include/IComponent.h b/Sdk/Include/IComponent.h deleted file mode 100644 index bde23d8..0000000 --- a/Sdk/Include/IComponent.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -// {779CF758-3E3F-4FEE-9513-60106522686A} -inline const GUID IID_IComponent = { 0x779cf758, 0x3e3f, 0x4fee, 0x95, 0x13, 0x60, 0x10, 0x65, 0x22, 0x68, 0x6a }; - -/// -/// Base interface for a game COM component. -/// -struct IComponent : public IUnknown -{ -}; - -struct DECLSPEC_UUID("{779CF758-3E3F-4FEE-9513-60106522686A}") IComponent; \ No newline at end of file diff --git a/Sdk/Include/IComponentContainer.h b/Sdk/Include/IComponentContainer.h deleted file mode 100644 index 7e616b0..0000000 --- a/Sdk/Include/IComponentContainer.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include -#include - -template -using ComPtr = Microsoft::WRL::ComPtr; - -// {C942AA9B-C576-4D3F-A54F-B135B500E611} -inline const GUID IID_IComponentContainer = { 0xc942aa9b, 0xc576, 0x4d3f, 0xa5, 0x4f, 0xb1, 0x35, 0xb5, 0x0, 0xe6, 0x11 }; - -template -struct TypedGet -{ - template - ComPtr Get() - { - ComPtr temp; - ComPtr pComponent = ((T*)this)->Get(__uuidof(TGet)); - if (pComponent) - { - HRESULT hr = pComponent.As(&temp); - if (FAILED(hr)) - { - if (hr == E_NOINTERFACE) - { - throw std::invalid_argument("The interface is not supported."); - } - - throw std::invalid_argument(fmt::format("Unknown exception {0:x} querying interface.", hr)); - } - - pComponent.Detach(); - return temp; - } - - return ComPtr(); // Null - } -}; - -/// -/// Container for game components. -/// Facilitates resource acquisition across module boundaries, and interop with .NET code. -/// -struct IComponentContainer : public TypedGet -{ - virtual ~IComponentContainer() { } - - virtual void Add(const IID& iid, IComponent* component) = 0; - virtual ComPtr Get(const IID& iid) noexcept = 0; - virtual void Remove(const IID& iid) noexcept = 0; - virtual void ReleaseAll() = 0; - - template - ComPtr Get() - { - return TypedGet::Get(); - } -}; - -struct DECLSPEC_UUID("{C942AA9B-C576-4D3F-A54F-B135B500E611}") IComponentContainer; \ No newline at end of file diff --git a/Sdk/Include/IConfig.h b/Sdk/Include/IConfig.h deleted file mode 100644 index e5d05e6..0000000 --- a/Sdk/Include/IConfig.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -// {599E6624-694C-41B6-B354-62EEA1132041} -inline const GUID IID_IConfig = { 0x599e6624, 0x694c, 0x41b6, 0xb3, 0x54, 0x62, 0xee, 0xa1, 0x13, 0x20, 0x41 }; - -struct IConfig : IComponent, IEventSource -{ - virtual ~IConfig() = default; - - virtual void STDMETHODCALLTYPE Read() = 0; - - virtual void STDMETHODCALLTYPE Save() = 0; - - virtual float STDMETHODCALLTYPE GetFloat(const tstring_view& section, const tstring_view& setting) const = 0; - - virtual std::vector STDMETHODCALLTYPE GetFloatArray(const tstring_view& section, const tstring_view& setting) const = 0; - - virtual int STDMETHODCALLTYPE GetInteger(const tstring_view& section, const tstring_view& setting) const = 0; - - virtual std::vector STDMETHODCALLTYPE GetIntegerArray(const tstring_view& section, const tstring_view& setting) const = 0; - - virtual tstring STDMETHODCALLTYPE GetString(const tstring_view& section, const tstring_view& setting) const = 0; - - virtual std::vector STDMETHODCALLTYPE GetStringArray(const tstring_view& section, const tstring_view& setting) const = 0; - - virtual void STDMETHODCALLTYPE SetFloat(const tstring_view& section, const tstring_view& setting, float value) = 0; - - virtual void STDMETHODCALLTYPE SetFloatArray(const tstring_view& section, const tstring_view& setting, std::vector&& values) = 0; - - virtual void STDMETHODCALLTYPE SetInteger(const tstring_view& section, const tstring_view& setting, int value) = 0; - - virtual void STDMETHODCALLTYPE SetIntegerArray(const tstring_view& section, const tstring_view& setting, std::vector&& values) = 0; - - virtual void STDMETHODCALLTYPE SetString(const tstring_view& section, const tstring_view& setting, tstring_view value) = 0; - - virtual void STDMETHODCALLTYPE SetStringArray(const tstring_view& section, const tstring_view& setting, std::vector&& values) = 0; -}; - -struct DECLSPEC_UUID("{599E6624-694C-41B6-B354-62EEA1132041}") IConfig; \ No newline at end of file diff --git a/Sdk/Include/IEventSource.h b/Sdk/Include/IEventSource.h deleted file mode 100644 index 5951663..0000000 --- a/Sdk/Include/IEventSource.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -template -struct IEventSource -{ - virtual ~IEventSource() = default; - - virtual UUID STDMETHODCALLTYPE Listen(TEventType event, std::function function) noexcept = 0; - virtual void STDMETHODCALLTYPE Unlisten(TEventType event, UUID uuid) noexcept = 0; -}; \ No newline at end of file diff --git a/Sdk/Include/IExceptionHandler.h b/Sdk/Include/IExceptionHandler.h deleted file mode 100644 index 0d7de6e..0000000 --- a/Sdk/Include/IExceptionHandler.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - -// {9C4C8F9C-D4C1-4749-A073-D710548D3154} -inline const GUID IID_IExceptionHandler = { 0x9c4c8f9c, 0xd4c1, 0x4749, 0xa0, 0x73, 0xd7, 0x10, 0x54, 0x8d, 0x31, 0x54 }; -struct IExceptionHandler : IComponent -{ - virtual ~IExceptionHandler() = default; - - virtual void STDMETHODCALLTYPE AttachToCurrentThread() = 0; - virtual void STDMETHODCALLTYPE DetachFromCurrentThread() = 0; - virtual void STDMETHODCALLTYPE Initialize() = 0; - virtual void STDMETHODCALLTYPE PostLoad() = 0; - virtual void STDMETHODCALLTYPE Shutdown() = 0; -}; - -struct DECLSPEC_UUID("{9C4C8F9C-D4C1-4749-A073-D710548D3154}") IExceptionHandler; \ No newline at end of file diff --git a/Sdk/Include/IGameServerConsole.h b/Sdk/Include/IGameServerConsole.h deleted file mode 100644 index be2f128..0000000 --- a/Sdk/Include/IGameServerConsole.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include -#include -#include - -// {3B2D43AC-2557-4C28-991D-A456B59D76CB} -inline const GUID IID_IGameServerConsole = { 0x3b2d43ac, 0x2557, 0x4c28, 0x99, 0x1d, 0xa4, 0x56, 0xb5, 0x9d, 0x76, 0xcb }; - -/// -/// Interface for dedicated server consoles. -/// -struct IGameServerConsole : IComponent -{ - ~IGameServerConsole() = default; - - virtual void STDMETHODCALLTYPE CloseDialog() = 0; - virtual void STDMETHODCALLTYPE ShowDialog() = 0; - - static const int ApiVersion = 1; -}; - -struct DECLSPEC_UUID("{3B2D43AC-2557-4C28-991D-A456B59D76CB}") IGameServerConsole; \ No newline at end of file diff --git a/Sdk/Include/ImGui/Public/IImGuiLayer.h b/Sdk/Include/ImGui/Public/IImGuiLayer.h new file mode 100644 index 0000000..8c907a2 --- /dev/null +++ b/Sdk/Include/ImGui/Public/IImGuiLayer.h @@ -0,0 +1,16 @@ +#pragma once + +struct IImGuiLayer +{ + virtual ~IImGuiLayer() { } + + virtual void PreBeginFrame() = 0; + + virtual void BeginFrame() = 0; + + virtual void EndFrame() = 0; + + virtual bool WantsControlFocus() const = 0; + + virtual bool IsActive() const = 0; +}; \ No newline at end of file diff --git a/Sdk/Include/ImGui/Public/IImGuiService.h b/Sdk/Include/ImGui/Public/IImGuiService.h new file mode 100644 index 0000000..948f140 --- /dev/null +++ b/Sdk/Include/ImGui/Public/IImGuiService.h @@ -0,0 +1,24 @@ +#pragma once + +#include "Core/Public/IGameService.h" + +struct IImGuiLayer; + +DEFINE_SERVICE("{B2D9DF30-25ED-4312-9DC2-343DAE156182}", IImGuiService) +{ + virtual bool PreBeginFrame() = 0; + + virtual void BeginFrame() = 0; + + virtual void EndFrame() = 0; + + virtual void* GetContext() const = 0; + + virtual bool HasControlFocus() = 0; + + virtual void SetControlFocus(bool focused) = 0; + + virtual void RegisterLayer(std::shared_ptr layer) = 0; +}; + +void ImGuiServiceCreate(); \ No newline at end of file diff --git a/Sdk/Include/ImGui/Public/ImGuiInit.h b/Sdk/Include/ImGui/Public/ImGuiInit.h new file mode 100644 index 0000000..de06431 --- /dev/null +++ b/Sdk/Include/ImGui/Public/ImGuiInit.h @@ -0,0 +1,3 @@ +#pragma once + +void ImGuiLayersCreate(); diff --git a/Sdk/Include/MasterServer/IGiantsApiClient.h b/Sdk/Include/MasterServer/IGiantsApiClient.h deleted file mode 100644 index 071c5d3..0000000 --- a/Sdk/Include/MasterServer/IGiantsApiClient.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - -#include -#include - -using njson = nlohmann::json; - -typedef std::future> ServerInfoFuture; - -// {EE129A81-0A86-49C4-8D23-A771A7350952} -inline const GUID IID_IGiantsApiClient = { 0xee129a81, 0xa86, 0x49c4, 0x8d, 0x23, 0xa7, 0x71, 0xa7, 0x35, 0x9, 0x52 }; - -struct IGiantsApiClient : IComponent -{ - virtual ~IGiantsApiClient() = default; - - virtual void STDMETHODCALLTYPE DeleteServerInformationAsync() = 0; - virtual ServerInfoFuture STDMETHODCALLTYPE GetServerInformationAsync() = 0; - virtual void STDMETHODCALLTYPE PostServerInformationAsync(const njson& requestBody) = 0; -}; - -struct DECLSPEC_UUID("{EE129A81-0A86-49C4-8D23-A771A7350952}") IGiantsApiClient; \ No newline at end of file diff --git a/Sdk/Include/Navigation/Public/Core.h b/Sdk/Include/Navigation/Public/Core.h new file mode 100644 index 0000000..ffce290 --- /dev/null +++ b/Sdk/Include/Navigation/Public/Core.h @@ -0,0 +1,7 @@ +#pragma once + +// Common components +#include "Path.h" + +// Utilities +#include "PathUtil.h" \ No newline at end of file diff --git a/Sdk/Include/Navigation/Public/NavMesh.h b/Sdk/Include/Navigation/Public/NavMesh.h new file mode 100644 index 0000000..326ba8b --- /dev/null +++ b/Sdk/Include/Navigation/Public/NavMesh.h @@ -0,0 +1,17 @@ +#pragma once + +#include "DetourNavMeshQuery.h" + +namespace Nav +{ + class NavMesh + { + public: + static std::shared_ptr Load(const std::filesystem::path& path); + std::shared_ptr GetQuery() const; + + private: + std::shared_ptr m_navMesh; + std::shared_ptr m_navMeshQuery; + }; +} \ No newline at end of file diff --git a/Sdk/Include/Navigation/Public/Path.h b/Sdk/Include/Navigation/Public/Path.h new file mode 100644 index 0000000..a3c2315 --- /dev/null +++ b/Sdk/Include/Navigation/Public/Path.h @@ -0,0 +1,96 @@ +#pragma once + +#include "AI/Public/Goal.h" + +namespace Nav +{ + // Forward declarations + namespace Private + { + struct DetourPath; + } + + enum class PathFlags + { + None = 0x0, + IsPartial = 0x01, + }; + + const int InvalidGoalIndex = -1; + + struct Path + { + Path( + const P3D& startPos, + const P3D& endPos, + PathFlags flags, + std::shared_ptr detourPath, + std::vector&& moveGoals) + : DetourPath(detourPath), + MoveGoals(std::move(moveGoals)), + Flags(flags) + { + if (MoveGoals.size() > 0) + CurrentGoal = 0; + } + + bool CompleteGoal() + { + if (IsValid()) + { + if (++CurrentGoal >= (int)MoveGoals.size()) + { + CurrentGoal = InvalidGoalIndex; + return false; + } + + return true; + } + + return false; + } + + const AI::MoveGoal* GetGoal() const + { + if (IsValid()) + { + return &MoveGoals[CurrentGoal]; + } + + return nullptr; + } + + const AI::MoveGoal* GetNextGoal() const + { + if (CurrentGoal > InvalidGoalIndex && CurrentGoal + 1 < (int)MoveGoals.size()) + { + return &MoveGoals[CurrentGoal + 1]; + } + + return nullptr; + } + + const AI::MoveGoal* GetFinalGoal() const + { + if (IsValid()) + { + return &MoveGoals.back(); + } + + return nullptr; + } + + bool IsValid() const { return !MoveGoals.empty() && CurrentGoal > InvalidGoalIndex; } + bool IsPartial() const { return FlagIsSetE(Flags, PathFlags::IsPartial); } + + int CurrentGoal = InvalidGoalIndex; + std::vector MoveGoals; + PathFlags Flags{}; + + private: + std::shared_ptr DetourPath; + + friend class PathUtil; + friend class MoveEnactorSystem; + }; +} diff --git a/Sdk/Include/Navigation/Public/PathDebugDraw.h b/Sdk/Include/Navigation/Public/PathDebugDraw.h new file mode 100644 index 0000000..6e43c50 --- /dev/null +++ b/Sdk/Include/Navigation/Public/PathDebugDraw.h @@ -0,0 +1,55 @@ +#pragma once + +#include "DebugDraw.h" +#include "DetourDebugDraw.h" + +namespace Nav +{ + class PathDebugDraw : public duDebugDraw + { + public: + ////////////////////////////////////////////////// + // Recast interface implementation + virtual void depthMask(bool state) { } + + virtual void texture(bool state) { } + + virtual void begin(duDebugDrawPrimitives prim, float size = 1.0f) override + { + m_primitiveType = prim; + } + + virtual void vertex(const float* pos, unsigned int color) override + { + vertex(pos[0], pos[1], pos[2], color, 0.0f, 0.0f); + } + + virtual void vertex(const float x, const float y, const float z, unsigned int color) override + { + vertex(x, y, z, color, 0.0f, 0.0f); + } + + virtual void vertex(const float* pos, unsigned int color, const float* uv) override + { + vertex(pos[0], pos[1], pos[2], color, uv[0], uv[1]); + } + + virtual void vertex(const float x, const float y, const float z, unsigned int color, const float u, const float v) override; + + virtual void end() override; + + ////////////////////////////////////////////////// + // Giants-specific code + + void StartFrame(); + void EndFrame(); + + private: + std::vector m_lineVertices; + std::vector m_lineColors; + + duDebugDrawPrimitives m_primitiveType = DU_DRAW_LINES; + }; + + extern PathDebugDraw g_PathDebugDraw; +} \ No newline at end of file diff --git a/Sdk/Include/Navigation/Public/PathUtil.h b/Sdk/Include/Navigation/Public/PathUtil.h new file mode 100644 index 0000000..83de8ae --- /dev/null +++ b/Sdk/Include/Navigation/Public/PathUtil.h @@ -0,0 +1,17 @@ +#pragma once + +#include "AI/Public/Goal.h" +#include "Navigation/Public/NavMesh.h" +#include "Navigation/Public/Path.h" + +const int DT_INVALID_POLY = 0; + +namespace Nav +{ + class PathUtil + { + public: + static std::shared_ptr GetPath(ECS::Entity* entity, Nav::NavMesh* navMesh, const P3D& startPos, const P3D& endPos, AI::MoveGoalParams* params = nullptr); + static void SetPath(ECS::Entity* entity, std::shared_ptr path); + }; +} \ No newline at end of file diff --git a/Sdk/Include/GameServerEvents.h b/Sdk/Include/Network/Public/GameServerEvents.h similarity index 96% rename from Sdk/Include/GameServerEvents.h rename to Sdk/Include/Network/Public/GameServerEvents.h index 44e2c2a..823d62f 100644 --- a/Sdk/Include/GameServerEvents.h +++ b/Sdk/Include/Network/Public/GameServerEvents.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include "Core/Public/DataTypes.h" #include "NetCommon.h" /// diff --git a/Sdk/Include/IGameServer.h b/Sdk/Include/Network/Public/IGameServer.h similarity index 68% rename from Sdk/Include/IGameServer.h rename to Sdk/Include/Network/Public/IGameServer.h index 33c8792..fc6fe73 100644 --- a/Sdk/Include/IGameServer.h +++ b/Sdk/Include/Network/Public/IGameServer.h @@ -4,10 +4,7 @@ #include #include -#include -#include -#include - +#include "Core/Public/Core.h" #include "GameServerEvents.h" #include "NetCommon.h" @@ -17,7 +14,7 @@ inline const GUID IID_IGameServer = { 0xb2d67ee7, 0x8063, 0x488f, 0xb3, 0xb9, 0x /// /// Defines an API for communicating with the game server. /// -struct IGameServer : IComponent, IEventSource +DEFINE_SERVICE_MULTI("{B2D67EE7-8063-488F-B3B9-E7DA675CB752}", IGameServer, IEventSource) { virtual ~IGameServer() = default; @@ -28,14 +25,14 @@ struct IGameServer : IComponent, IEventSourceText color. /// Flags for the message. /// The index to send the message to. If 0, it will be sent to all players. - virtual void STDMETHODCALLTYPE SendChatMessage(const tstring_view& message, ChatColor color, int flags, PlayerIndex indexTo) = 0; + virtual void SendChatMessage(const tstring_view& message, ChatColor color, ChatFlag flags, PlayerIndex indexTo) = 0; /// /// Bans the player at the specified index. /// /// The player index. /// - virtual void STDMETHODCALLTYPE BanPlayer(int index) = 0; + virtual void BanPlayer(int index) = 0; /// /// Kicks the player at the specified index. @@ -43,19 +40,19 @@ struct IGameServer : IComponent, IEventSourceThe player index. /// The reason for kicking the player. /// - virtual void STDMETHODCALLTYPE KickPlayer(int index, KickReason reason) = 0; + virtual void KickPlayer(int index, KickReason reason) = 0; /// /// Gets player data for the specified index. /// /// The zero-based player index. /// std::out_of_range - virtual const std::shared_ptr STDMETHODCALLTYPE GetPlayer(int index) const = 0; + virtual const std::shared_ptr GetPlayer(int index) const = 0; /// /// Gets data for all players in the current game. /// - virtual std::vector> STDMETHODCALLTYPE GetPlayers() const = 0; + virtual std::vector> GetPlayers() const = 0; /// /// Toggles or increments the specified game option. @@ -63,18 +60,18 @@ struct IGameServer : IComponent, IEventSource /// /// - virtual void STDMETHODCALLTYPE ChangeGameOption(GameOption option) = 0; + virtual void ChangeGameOption(GameOption option) = 0; /// /// Gets details for the current game. /// - virtual const std::shared_ptr STDMETHODCALLTYPE GetGameDetails() const = 0; + virtual const std::shared_ptr GetGameDetails() const = 0; /// /// Modifies the settings for the current game. /// /// The game details. - virtual void STDMETHODCALLTYPE ChangeGameDetails(const NetGameDetails& gameDetails) = 0; + virtual void ChangeGameDetails(const NetGameDetails& gameDetails) = 0; }; struct DECLSPEC_UUID("{B2D67EE7-8063-488F-B3B9-E7DA675CB752}") IGameServer; \ No newline at end of file diff --git a/Sdk/Include/Network/Public/IGameServerConsole.h b/Sdk/Include/Network/Public/IGameServerConsole.h new file mode 100644 index 0000000..20fa98d --- /dev/null +++ b/Sdk/Include/Network/Public/IGameServerConsole.h @@ -0,0 +1,16 @@ +#pragma once + +#include "IGameServer.h" + +/// +/// Interface for dedicated server consoles. +/// +DEFINE_SERVICE("{3B2D43AC-2557-4C28-991D-A456B59D76CB}", IGameServerConsole) +{ + ~IGameServerConsole() = default; + + virtual void CloseDialog() = 0; + virtual void ShowDialog() = 0; + + static const int ApiVersion = 1; +}; \ No newline at end of file diff --git a/Sdk/Include/Network/Public/IGiantsApiClient.h b/Sdk/Include/Network/Public/IGiantsApiClient.h new file mode 100644 index 0000000..37c1bef --- /dev/null +++ b/Sdk/Include/Network/Public/IGiantsApiClient.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include "PlayerInfoResponse.h" +#include "ServerInfoResponse.h" + +typedef std::future> ServerInfoFuture; + +// {EE129A81-0A86-49C4-8D23-A771A7350952} +inline const GUID IID_IGiantsApiClient = { 0xee129a81, 0xa86, 0x49c4, 0x8d, 0x23, 0xa7, 0x71, 0xa7, 0x35, 0x9, 0x52 }; + +DEFINE_SERVICE("{EE129A81-0A86-49C4-8D23-A771A7350952}", IGiantsApiClient) +{ + virtual ~IGiantsApiClient() = default; + + virtual void DeleteServerInformationAsync(tstring_view gameName, int hostPort) = 0; + virtual ServerInfoFuture GetServerInformationAsync() = 0; + virtual void PostServerInformationAsync(const nlohmann::json& requestBody) = 0; +}; + +struct DECLSPEC_UUID("{EE129A81-0A86-49C4-8D23-A771A7350952}") IGiantsApiClient; \ No newline at end of file diff --git a/Sdk/Include/NetCommon.h b/Sdk/Include/Network/Public/NetCommon.h similarity index 96% rename from Sdk/Include/NetCommon.h rename to Sdk/Include/Network/Public/NetCommon.h index 6fa358d..921e656 100644 --- a/Sdk/Include/NetCommon.h +++ b/Sdk/Include/Network/Public/NetCommon.h @@ -17,6 +17,14 @@ enum class ChatColor : int Mission = 6 }; +enum class ChatFlag +{ + None = 0, + + InsertName = 1 << 6, + Team = 1 << 5 +}; + enum class GameOption : unsigned char { DamageTeammates, // Toggle damage teammates on/off diff --git a/Sdk/Include/MasterServer/PlayerInfoResponse.h b/Sdk/Include/Network/Public/PlayerInfoResponse.h similarity index 100% rename from Sdk/Include/MasterServer/PlayerInfoResponse.h rename to Sdk/Include/Network/Public/PlayerInfoResponse.h diff --git a/Sdk/Include/MasterServer/ServerInfoResponse.h b/Sdk/Include/Network/Public/ServerInfoResponse.h similarity index 97% rename from Sdk/Include/MasterServer/ServerInfoResponse.h rename to Sdk/Include/Network/Public/ServerInfoResponse.h index 6924833..36cac67 100644 --- a/Sdk/Include/MasterServer/ServerInfoResponse.h +++ b/Sdk/Include/Network/Public/ServerInfoResponse.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "Core/Public/Version.h" using njson = nlohmann::json; diff --git a/ServerConsoleExample/ServerConsoleApp.cpp b/ServerConsoleExample/ServerConsoleApp.cpp index 3df4ec7..4fc5f39 100644 --- a/ServerConsoleExample/ServerConsoleApp.cpp +++ b/ServerConsoleExample/ServerConsoleApp.cpp @@ -32,19 +32,21 @@ BOOL ServerConsoleApp::ExitInstance() return CWinApp::ExitInstance(); } -IGameServerConsole* ServerConsoleApp::InitializeDialog(IComponentContainer* container) +void ServerConsoleApp::InitializeDialog(IGameServiceProvider* serviceProvider) { // Create the server console window. // As this is also a Component, Giants will clean up this object automatically once // it is no longer needed (i.e, there is no need to call delete). - auto* dialog = new ServerDialog(container); - m_pMainWnd = dialog; - return dialog; + auto dialog = std::make_shared(serviceProvider); + m_pMainWnd = dialog.get(); + + serviceProvider->Add(dialog); } __declspec(dllexport) void CreateServerConsole( int apiVersion, - IComponentContainer* container) + HWND hWnd, + IGameServiceProvider* serviceProvider) { if (apiVersion > 1) { @@ -52,5 +54,5 @@ __declspec(dllexport) void CreateServerConsole( } AFX_MANAGE_STATE(AfxGetStaticModuleState()); - ConsoleApp.InitializeDialog(container); + ConsoleApp.InitializeDialog(serviceProvider); } \ No newline at end of file diff --git a/ServerConsoleExample/ServerConsoleApp.h b/ServerConsoleExample/ServerConsoleApp.h index 1386dc1..58d28e1 100644 --- a/ServerConsoleExample/ServerConsoleApp.h +++ b/ServerConsoleExample/ServerConsoleApp.h @@ -6,8 +6,7 @@ #include "resource.h" // main symbols #include "ServerDialog.h" -#include -#include +#include "Network/Public/IGameServerConsole.h" class ServerConsoleApp : public CWinApp { @@ -18,7 +17,7 @@ public: BOOL InitInstance() override; BOOL ExitInstance() override; - IGameServerConsole* InitializeDialog(IComponentContainer* container); + void InitializeDialog(IGameServiceProvider* serviceProvider); DECLARE_MESSAGE_MAP() }; diff --git a/ServerConsoleExample/ServerDialog.cpp b/ServerConsoleExample/ServerDialog.cpp index 2305716..45e5cb1 100644 --- a/ServerConsoleExample/ServerDialog.cpp +++ b/ServerConsoleExample/ServerDialog.cpp @@ -6,11 +6,11 @@ IMPLEMENT_DYNAMIC(ServerDialog, CDialogEx) -ServerDialog::ServerDialog(IComponentContainer* container, CWnd* parent) - : ComponentBase(container), - CDialogEx(IDD_SERVER, parent) +ServerDialog::ServerDialog(IGameServiceProvider* serviceProvider, CWnd* parent) + : CDialogEx(IDD_SERVER, parent), + m_serviceProvider(serviceProvider) { - const auto& pGameServer = m_pContainer->Get(); + const auto& pGameServer = m_serviceProvider->Get(); using namespace std::placeholders; m_playerConnectedEventHandle = pGameServer->Listen(GameServerEventType::PlayerConnected, std::bind(&ServerDialog::HandlePlayerConnected, this, _1)); @@ -23,7 +23,7 @@ ServerDialog::~ServerDialog() { try { - const auto& pGameServer = m_pContainer->Get(); + const auto& pGameServer = m_serviceProvider->Get(); pGameServer->Unlisten(GameServerEventType::PlayerConnected, m_playerConnectedEventHandle); pGameServer->Unlisten(GameServerEventType::PlayerDisconnected, m_playerDisconnectedEventHandle); @@ -128,8 +128,8 @@ void ServerDialog::RefreshPlayers() PlayersListCtrl.DeleteAllItems(); - const auto& pTextLookupService = m_pContainer->Get(); - const auto& pGameServer = m_pContainer->Get(); + const auto& pTextLookupService = m_serviceProvider->Get(); + const auto& pGameServer = m_serviceProvider->Get(); for (const auto& player : pGameServer->GetPlayers()) { if (player->host) @@ -179,7 +179,7 @@ void ServerDialog::HandleWorldLoaded(const GameServerEvent& event) { AFX_MANAGE_STATE(AfxGetStaticModuleState()); - const auto& pGameServer = m_pContainer->Get(); + const auto& pGameServer = m_serviceProvider->Get(); auto details = pGameServer->GetGameDetails(); @@ -206,7 +206,7 @@ void ServerDialog::OnBnClickedBan() const PlayerIndex playerIndex = (PlayerIndex)PlayersListCtrl.GetItemData(selection); if (playerIndex > 0) { - const auto& pGameServer = m_pContainer->Get(); + const auto& pGameServer = m_serviceProvider->Get(); pGameServer->BanPlayer(playerIndex); } } @@ -222,7 +222,7 @@ void ServerDialog::OnBnClickedKick() const PlayerIndex playerIndex = (PlayerIndex)PlayersListCtrl.GetItemData(selection); if (playerIndex > 0) { - const auto& pGameServer = m_pContainer->Get(); + const auto& pGameServer = m_serviceProvider->Get(); pGameServer->KickPlayer(playerIndex, KickReason::Removed); } } diff --git a/ServerConsoleExample/ServerDialog.h b/ServerConsoleExample/ServerDialog.h index 2e992c6..9a9d911 100644 --- a/ServerConsoleExample/ServerDialog.h +++ b/ServerConsoleExample/ServerDialog.h @@ -1,27 +1,26 @@ #pragma once -#include -#include -#include -#include -#include +#include "Network/Public/IGameServer.h" +#include "Network/Public/IGameServerConsole.h" +#include "Core/Public/IGameServiceProvider.h" +#include "Core/Public/ITextLookupService.h" // ServerDialog dialog -class ServerDialog : public CDialogEx, public ComponentBase +class ServerDialog : public CDialogEx, public IGameServerConsole { DECLARE_DYNAMIC(ServerDialog) public: ~ServerDialog(); - ServerDialog(IComponentContainer* container, CWnd* parent = nullptr); + ServerDialog(IGameServiceProvider* container, CWnd* parent = nullptr); void CreateColumns(); void RefreshPlayers(); static void STDMETHODCALLTYPE TimerCallback(HWND hwnd, UINT uMsg, UINT idEvent, DWORD dwTime); - void STDMETHODCALLTYPE CloseDialog() override; - void STDMETHODCALLTYPE ShowDialog() override; + void CloseDialog() override; + void ShowDialog() override; void HandlePlayerConnected(const GameServerEvent& event); void HandlePlayerDisconnected(const GameServerEvent& event); @@ -57,6 +56,7 @@ private: UUID m_playerDisconnectedEventHandle{}; UUID m_playerChatMessageHandle{}; UUID m_worldLoadedHandle{}; + IGameServiceProvider* m_serviceProvider{}; const int NumColumns = 5; };