GiantsTools/Sdk/External/DirectXMath/SHMath/DirectXSHD3D11.cpp

//-------------------------------------------------------------------------------------
// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/p/?LinkId=262885
//-------------------------------------------------------------------------------------

#pragma warning( disable : 4616 4619 4061 4265 4626 5039 )
// C4616/C4619 #pragma warning warnings
// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label
// C4265 class has virtual functions, but destructor is not virtual
// C4626 assignment operator was implicitly defined as deleted
// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc

#pragma warning(push)
#pragma warning(disable: 4365)
#include <d3d11_1.h>
#pragma warning(pop)

#include "DirectXSH.h"

#include <DirectXPackedVector.h>

#include <cassert>
#include <memory>
#include <malloc.h>

#include <wrl/client.h>

#ifdef __clang__
#pragma clang diagnostic ignored "-Wcovered-switch-default"
#pragma clang diagnostic ignored "-Wswitch-enum"
#endif

using namespace DirectX;

using Microsoft::WRL::ComPtr;

namespace
{
    struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };

    using ScopedAlignedArrayXMVECTOR = std::unique_ptr<DirectX::XMVECTOR, aligned_deleter>;

    //-------------------------------------------------------------------------------------
    // This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926
    // If you need additional DXGI format support, see DirectXTexConvert.cpp
    //-------------------------------------------------------------------------------------
#define LOAD_SCANLINE( type, func )\
        if ( size >= sizeof(type) )\
        {\
            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
            {\
                if ( dPtr >= ePtr ) break;\
                *(dPtr++) = func( sPtr++ );\
            }\
            return true;\
        }\
        return false;

#define LOAD_SCANLINE3( type, func, defvec )\
        if ( size >= sizeof(type) )\
        {\
            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
            {\
                XMVECTOR v = func( sPtr++ );\
                if ( dPtr >= ePtr ) break;\
                *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
            }\
            return true;\
        }\
        return false;

#define LOAD_SCANLINE2( type, func, defvec )\
        if ( size >= sizeof(type) )\
        {\
            const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
            for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
            {\
                XMVECTOR v = func( sPtr++ );\
                if ( dPtr >= ePtr ) break;\
                *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
            }\
            return true;\
        }\
        return false;

#pragma warning(push)
#pragma warning(disable : 6101)
    _Success_(return)
        bool _LoadScanline(
            _Out_writes_(count) DirectX::XMVECTOR* pDestination,
            size_t count,
            _In_reads_bytes_(size) LPCVOID pSource,
            size_t size,
            DXGI_FORMAT format)
    {
        assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));
        assert(pSource && size > 0);

        using namespace DirectX::PackedVector;

        XMVECTOR* __restrict dPtr = pDestination;
        if (!dPtr)
            return false;

        const XMVECTOR* ePtr = pDestination + count;

        switch (format)
        {
        case DXGI_FORMAT_R32G32B32A32_FLOAT:
        {
            size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
            memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize);
        }
        return true;

        case DXGI_FORMAT_R32G32B32_FLOAT:
            LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)

        case DXGI_FORMAT_R16G16B16A16_FLOAT:
            LOAD_SCANLINE(XMHALF4, XMLoadHalf4)

        case DXGI_FORMAT_R32G32_FLOAT:
            LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)

        case DXGI_FORMAT_R11G11B10_FLOAT:
            LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)

        case DXGI_FORMAT_R16G16_FLOAT:
            LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)

        case DXGI_FORMAT_R32_FLOAT:
            if (size >= sizeof(float))
            {
                const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
                for (size_t icount = 0; icount < size; icount += sizeof(float))
                {
                    XMVECTOR v = XMLoadFloat(sPtr++);
                    if (dPtr >= ePtr) break;
                    *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
                }
                return true;
            }
            return false;

        case DXGI_FORMAT_R16_FLOAT:
            if (size >= sizeof(HALF))
            {
                const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
                for (size_t icount = 0; icount < size; icount += sizeof(HALF))
                {
                    if (dPtr >= ePtr) break;
                    *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f);
                }
                return true;
            }
            return false;

        default:
            return false;
        }
    }
#pragma warning(pop)

} // namespace anonymous

//-------------------------------------------------------------------------------------
// Projects a function represented in a cube map into spherical harmonics.
//
// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx
//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT DirectX::SHProjectCubeMap(
    ID3D11DeviceContext *context,
    size_t order,
    ID3D11Texture2D *cubeMap,
    float *resultR,
    float *resultG,
    float* resultB) noexcept
{
    if (!context || !cubeMap)
        return E_INVALIDARG;

    if (order < XM_SH_MINORDER || order > XM_SH_MAXORDER)
        return E_INVALIDARG;

    D3D11_TEXTURE2D_DESC desc;
    cubeMap->GetDesc(&desc);

    if ((desc.ArraySize != 6)
        || (desc.Width != desc.Height)
        || (desc.SampleDesc.Count > 1))
        return E_FAIL;

    switch (desc.Format)
    {
    case DXGI_FORMAT_R32G32B32A32_FLOAT:
    case DXGI_FORMAT_R32G32B32_FLOAT:
    case DXGI_FORMAT_R16G16B16A16_FLOAT:
    case DXGI_FORMAT_R32G32_FLOAT:
    case DXGI_FORMAT_R11G11B10_FLOAT:
    case DXGI_FORMAT_R16G16_FLOAT:
    case DXGI_FORMAT_R32_FLOAT:
    case DXGI_FORMAT_R16_FLOAT:
        // See _LoadScanline to support more pixel formats
        break;

    default:
        return E_FAIL;
    }

    //--- Create a staging resource copy (if needed) to be able to read data
    ID3D11Texture2D* texture = nullptr;

    ComPtr<ID3D11Texture2D> staging;
    if (!(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ))
    {
        D3D11_TEXTURE2D_DESC sdesc = desc;
        sdesc.BindFlags = 0;
        sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
        sdesc.Usage = D3D11_USAGE_STAGING;

        ComPtr<ID3D11Device> device;
        context->GetDevice(&device);

        HRESULT hr = device->CreateTexture2D(&sdesc, nullptr, &staging);
        if (FAILED(hr))
            return hr;

        context->CopyResource(staging.Get(), cubeMap);

        texture = staging.Get();
    }
    else
        texture = cubeMap;

    assert(texture != nullptr);

    //--- Setup for SH projection
    ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast<XMVECTOR*>(_aligned_malloc(sizeof(XMVECTOR)*desc.Width, 16)));
    if (!scanline)
        return E_OUTOFMEMORY;

    assert(desc.Width > 0);
    float fSize = static_cast<float>(desc.Width);
    float fPicSize = 1.0f / fSize;

    // index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w
    // linear function x*S +B, 1st constraint means B is (-1+1/W), plug into
    // second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did 
    // this was incorrect - but only for computing the differential solid
    // angle, where the final value was 1.0 instead of 1-1/w...

    float fB = -1.0f + 1.0f / fSize;
    float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f;

    // clear out accumulation variables
    float fWt = 0.0f;

    if (resultR)
        memset(resultR, 0, sizeof(float)*order*order);
    if (resultG)
        memset(resultG, 0, sizeof(float)*order*order);
    if (resultB)
        memset(resultB, 0, sizeof(float)*order*order);

    float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};
    float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};

    //--- Process each face of the cubemap
    for (UINT face = 0; face < 6; ++face)
    {
        UINT dindex = D3D11CalcSubresource(0, face, desc.MipLevels);

        D3D11_MAPPED_SUBRESOURCE mapped;
        HRESULT hr = context->Map(texture, dindex, D3D11_MAP_READ, 0, &mapped);
        if (FAILED(hr))
            return hr;

        const uint8_t *pSrc = reinterpret_cast<const uint8_t*>(mapped.pData);
        for (UINT y = 0; y < desc.Height; ++y)
        {
            XMVECTOR* ptr = scanline.get();
            if (!_LoadScanline(ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format))
            {
                context->Unmap(texture, dindex);
                return E_FAIL;
            }

            const float v = float(y) * fS + fB;

            XMVECTOR* pixel = ptr;
            for (UINT x = 0; x < desc.Width; ++x, ++pixel)
            {
                const float u = float(x) * fS + fB;

                float ix, iy, iz;
                switch (face)
                {
                case 0: // Positive X
                    iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
                    ix = 1.0f;
                    break;

                case 1: // Negative X
                    iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
                    ix = -1;
                    break;

                case 2: // Positive Y
                    iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize;
                    iy = 1.0f;
                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
                    break;

                case 3: // Negative Y
                    iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
                    iy = -1.0f;
                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
                    break;

                case 4: // Positive Z
                    iz = 1.0f;
                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
                    ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;
                    break;

                case 5: // Negative Z
                    iz = -1.0f;
                    iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;
                    ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;
                    break;

                default:
                    ix = iy = iz = 0.f;
                    assert(false);
                    break;
                }

                XMVECTOR dir = XMVectorSet(ix, iy, iz, 0);
                dir = XMVector3Normalize(dir);

                const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)*sqrtf(1.0f + u * u + v * v));
                fWt += fDiffSolid;

                XMSHEvalDirection(shBuff, order, dir);

                XMFLOAT3A clr;
                XMStoreFloat3A(&clr, *pixel);

                if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid));
                if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid));
                if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid));
            }

            pSrc += mapped.RowPitch;
        }

        context->Unmap(texture, dindex);
    }

    const float fNormProj = (4.0f*XM_PI) / fWt;

    if (resultR) XMSHScale(resultR, order, resultR, fNormProj);
    if (resultG) XMSHScale(resultG, order, resultG, fNormProj);
    if (resultB) XMSHScale(resultB, order, resultB, fNormProj);

    return S_OK;
}
Update to new SDK. 2021-01-24 00:40:09 +01:00			`//-------------------------------------------------------------------------------------`
			`// DirectXSHD3D11.cpp -- C++ Spherical Harmonics Math Library`
			`//`
			`// Copyright (c) Microsoft Corporation. All rights reserved.`
			`// Licensed under the MIT License.`
			`//`
			`// http://go.microsoft.com/fwlink/p/?LinkId=262885`
			`//-------------------------------------------------------------------------------------`

			`#pragma warning( disable : 4616 4619 4061 4265 4626 5039 )`
			`// C4616/C4619 #pragma warning warnings`
			`// C4061 numerator 'identifier' in switch of enum 'enumeration' is not explicitly handled by a case label`
			`// C4265 class has virtual functions, but destructor is not virtual`
			`// C4626 assignment operator was implicitly defined as deleted`
			`// C5039 pointer or reference to potentially throwing function passed to extern C function under - EHc`

			`#pragma warning(push)`
			`#pragma warning(disable: 4365)`
			`#include <d3d11_1.h>`
			`#pragma warning(pop)`

			`#include "DirectXSH.h"`

			`#include <DirectXPackedVector.h>`

			`#include <cassert>`
			`#include <memory>`
			`#include <malloc.h>`

			`#include <wrl/client.h>`

			`#ifdef __clang__`
			`#pragma clang diagnostic ignored "-Wcovered-switch-default"`
			`#pragma clang diagnostic ignored "-Wswitch-enum"`
			`#endif`

			`using namespace DirectX;`

			`using Microsoft::WRL::ComPtr;`

			`namespace`
			`{`
			`struct aligned_deleter { void operator()(void* p) { _aligned_free(p); } };`

			`using ScopedAlignedArrayXMVECTOR = std::unique_ptr<DirectX::XMVECTOR, aligned_deleter>;`

			`//-------------------------------------------------------------------------------------`
			`// This code is lifted from DirectXTex http://go.microsoft.com/fwlink/?LinkId=248926`
			`// If you need additional DXGI format support, see DirectXTexConvert.cpp`
			`//-------------------------------------------------------------------------------------`
			`#define LOAD_SCANLINE( type, func )\`
			`if ( size >= sizeof(type) )\`
			`{\`
			`const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\`
			`for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\`
			`{\`
			`if ( dPtr >= ePtr ) break;\`
			`*(dPtr++) = func( sPtr++ );\`
			`}\`
			`return true;\`
			`}\`
			`return false;`

			`#define LOAD_SCANLINE3( type, func, defvec )\`
			`if ( size >= sizeof(type) )\`
			`{\`
			`const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\`
			`for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\`
			`{\`
			`XMVECTOR v = func( sPtr++ );\`
			`if ( dPtr >= ePtr ) break;\`
			`*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\`
			`}\`
			`return true;\`
			`}\`
			`return false;`

			`#define LOAD_SCANLINE2( type, func, defvec )\`
			`if ( size >= sizeof(type) )\`
			`{\`
			`const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\`
			`for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\`
			`{\`
			`XMVECTOR v = func( sPtr++ );\`
			`if ( dPtr >= ePtr ) break;\`
			`*(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\`
			`}\`
			`return true;\`
			`}\`
			`return false;`

			`#pragma warning(push)`
			`#pragma warning(disable : 6101)`
			`_Success_(return)`
			`bool _LoadScanline(`
			`_Out_writes_(count) DirectX::XMVECTOR* pDestination,`
			`size_t count,`
			`_In_reads_bytes_(size) LPCVOID pSource,`
			`size_t size,`
			`DXGI_FORMAT format)`
			`{`
			`assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));`
			`assert(pSource && size > 0);`

			`using namespace DirectX::PackedVector;`

			`XMVECTOR* __restrict dPtr = pDestination;`
			`if (!dPtr)`
			`return false;`

			`const XMVECTOR* ePtr = pDestination + count;`

			`switch (format)`
			`{`
			`case DXGI_FORMAT_R32G32B32A32_FLOAT:`
			`{`
			`size_t msize = (size > (sizeof(XMVECTOR)count)) ? (sizeof(XMVECTOR)count) : size;`
			`memcpy_s(dPtr, sizeof(XMVECTOR)*count, pSource, msize);`
			`}`
			`return true;`

			`case DXGI_FORMAT_R32G32B32_FLOAT:`
			`LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)`

			`case DXGI_FORMAT_R16G16B16A16_FLOAT:`
			`LOAD_SCANLINE(XMHALF4, XMLoadHalf4)`

			`case DXGI_FORMAT_R32G32_FLOAT:`
			`LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)`

			`case DXGI_FORMAT_R11G11B10_FLOAT:`
			`LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)`

			`case DXGI_FORMAT_R16G16_FLOAT:`
			`LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)`

			`case DXGI_FORMAT_R32_FLOAT:`
			`if (size >= sizeof(float))`
			`{`
			`const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);`
			`for (size_t icount = 0; icount < size; icount += sizeof(float))`
			`{`
			`XMVECTOR v = XMLoadFloat(sPtr++);`
			`if (dPtr >= ePtr) break;`
			`*(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);`
			`}`
			`return true;`
			`}`
			`return false;`

			`case DXGI_FORMAT_R16_FLOAT:`
			`if (size >= sizeof(HALF))`
			`{`
			`const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);`
			`for (size_t icount = 0; icount < size; icount += sizeof(HALF))`
			`{`
			`if (dPtr >= ePtr) break;`
			`(dPtr++) = XMVectorSet(XMConvertHalfToFloat(sPtr++), 0.f, 0.f, 1.f);`
			`}`
			`return true;`
			`}`
			`return false;`

			`default:`
			`return false;`
			`}`
			`}`
			`#pragma warning(pop)`

			`} // namespace anonymous`

			`//-------------------------------------------------------------------------------------`
			`// Projects a function represented in a cube map into spherical harmonics.`
			`//`
			`// http://msdn.microsoft.com/en-us/library/windows/desktop/ff476300.aspx`
			`//-------------------------------------------------------------------------------------`
			`_Use_decl_annotations_`
			`HRESULT DirectX::SHProjectCubeMap(`
			`ID3D11DeviceContext *context,`
			`size_t order,`
			`ID3D11Texture2D *cubeMap,`
			`float *resultR,`
			`float *resultG,`
			`float* resultB) noexcept`
			`{`
			`if (!context \|\| !cubeMap)`
			`return E_INVALIDARG;`

			`if (order < XM_SH_MINORDER \|\| order > XM_SH_MAXORDER)`
			`return E_INVALIDARG;`

			`D3D11_TEXTURE2D_DESC desc;`
			`cubeMap->GetDesc(&desc);`

			`if ((desc.ArraySize != 6)`
			`\|\| (desc.Width != desc.Height)`
			`\|\| (desc.SampleDesc.Count > 1))`
			`return E_FAIL;`

			`switch (desc.Format)`
			`{`
			`case DXGI_FORMAT_R32G32B32A32_FLOAT:`
			`case DXGI_FORMAT_R32G32B32_FLOAT:`
			`case DXGI_FORMAT_R16G16B16A16_FLOAT:`
			`case DXGI_FORMAT_R32G32_FLOAT:`
			`case DXGI_FORMAT_R11G11B10_FLOAT:`
			`case DXGI_FORMAT_R16G16_FLOAT:`
			`case DXGI_FORMAT_R32_FLOAT:`
			`case DXGI_FORMAT_R16_FLOAT:`
			`// See _LoadScanline to support more pixel formats`
			`break;`

			`default:`
			`return E_FAIL;`
			`}`

			`//--- Create a staging resource copy (if needed) to be able to read data`
			`ID3D11Texture2D* texture = nullptr;`

			`ComPtr<ID3D11Texture2D> staging;`
			`if (!(desc.CPUAccessFlags & D3D11_CPU_ACCESS_READ))`
			`{`
			`D3D11_TEXTURE2D_DESC sdesc = desc;`
			`sdesc.BindFlags = 0;`
			`sdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;`
			`sdesc.Usage = D3D11_USAGE_STAGING;`

			`ComPtr<ID3D11Device> device;`
			`context->GetDevice(&device);`

			`HRESULT hr = device->CreateTexture2D(&sdesc, nullptr, &staging);`
			`if (FAILED(hr))`
			`return hr;`

			`context->CopyResource(staging.Get(), cubeMap);`

			`texture = staging.Get();`
			`}`
			`else`
			`texture = cubeMap;`

			`assert(texture != nullptr);`

			`//--- Setup for SH projection`
			`ScopedAlignedArrayXMVECTOR scanline(reinterpret_cast<XMVECTOR>(_aligned_malloc(sizeof(XMVECTOR)desc.Width, 16)));`
			`if (!scanline)`
			`return E_OUTOFMEMORY;`

			`assert(desc.Width > 0);`
			`float fSize = static_cast<float>(desc.Width);`
			`float fPicSize = 1.0f / fSize;`

			`// index from [0,W-1], f(0) maps to -1 + 1/W, f(W-1) maps to 1 - 1/w`
			`// linear function x*S +B, 1st constraint means B is (-1+1/W), plug into`
			`// second and solve for S: S = 2*(1-1/W)/(W-1). The old code that did`
			`// this was incorrect - but only for computing the differential solid`
			`// angle, where the final value was 1.0 instead of 1-1/w...`

			`float fB = -1.0f + 1.0f / fSize;`
			`float fS = (desc.Width > 1) ? (2.0f*(1.0f - 1.0f / fSize) / (fSize - 1.0f)) : 0.f;`

			`// clear out accumulation variables`
			`float fWt = 0.0f;`

			`if (resultR)`
			`memset(resultR, 0, sizeof(float)orderorder);`
			`if (resultG)`
			`memset(resultG, 0, sizeof(float)orderorder);`
			`if (resultB)`
			`memset(resultB, 0, sizeof(float)orderorder);`

			`float shBuff[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};`
			`float shBuffB[XM_SH_MAXORDER*XM_SH_MAXORDER] = {};`

			`//--- Process each face of the cubemap`
			`for (UINT face = 0; face < 6; ++face)`
			`{`
			`UINT dindex = D3D11CalcSubresource(0, face, desc.MipLevels);`

			`D3D11_MAPPED_SUBRESOURCE mapped;`
			`HRESULT hr = context->Map(texture, dindex, D3D11_MAP_READ, 0, &mapped);`
			`if (FAILED(hr))`
			`return hr;`

			`const uint8_t pSrc = reinterpret_cast<const uint8_t>(mapped.pData);`
			`for (UINT y = 0; y < desc.Height; ++y)`
			`{`
			`XMVECTOR* ptr = scanline.get();`
			`if (!_LoadScanline(ptr, desc.Width, pSrc, mapped.RowPitch, desc.Format))`
			`{`
			`context->Unmap(texture, dindex);`
			`return E_FAIL;`
			`}`

			`const float v = float(y) * fS + fB;`

			`XMVECTOR* pixel = ptr;`
			`for (UINT x = 0; x < desc.Width; ++x, ++pixel)`
			`{`
			`const float u = float(x) * fS + fB;`

			`float ix, iy, iz;`
			`switch (face)`
			`{`
			`case 0: // Positive X`
			`iz = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;`
			`iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;`
			`ix = 1.0f;`
			`break;`

			`case 1: // Negative X`
			`iz = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;`
			`iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;`
			`ix = -1;`
			`break;`

			`case 2: // Positive Y`
			`iz = -1.0f + (2.0f * float(y) + 1.0f) * fPicSize;`
			`iy = 1.0f;`
			`ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;`
			`break;`

			`case 3: // Negative Y`
			`iz = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;`
			`iy = -1.0f;`
			`ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;`
			`break;`

			`case 4: // Positive Z`
			`iz = 1.0f;`
			`iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;`
			`ix = -1.0f + (2.0f * float(x) + 1.0f) * fPicSize;`
			`break;`

			`case 5: // Negative Z`
			`iz = -1.0f;`
			`iy = 1.0f - (2.0f * float(y) + 1.0f) * fPicSize;`
			`ix = 1.0f - (2.0f * float(x) + 1.0f) * fPicSize;`
			`break;`

			`default:`
			`ix = iy = iz = 0.f;`
			`assert(false);`
			`break;`
			`}`

			`XMVECTOR dir = XMVectorSet(ix, iy, iz, 0);`
			`dir = XMVector3Normalize(dir);`

			`const float fDiffSolid = 4.0f / ((1.0f + u * u + v * v)sqrtf(1.0f + u u + v * v));`
			`fWt += fDiffSolid;`

			`XMSHEvalDirection(shBuff, order, dir);`

			`XMFLOAT3A clr;`
			`XMStoreFloat3A(&clr, *pixel);`

			`if (resultR) XMSHAdd(resultR, order, resultR, XMSHScale(shBuffB, order, shBuff, clr.x*fDiffSolid));`
			`if (resultG) XMSHAdd(resultG, order, resultG, XMSHScale(shBuffB, order, shBuff, clr.y*fDiffSolid));`
			`if (resultB) XMSHAdd(resultB, order, resultB, XMSHScale(shBuffB, order, shBuff, clr.z*fDiffSolid));`
			`}`

			`pSrc += mapped.RowPitch;`
			`}`

			`context->Unmap(texture, dindex);`
			`}`

			`const float fNormProj = (4.0f*XM_PI) / fWt;`

			`if (resultR) XMSHScale(resultR, order, resultR, fNormProj);`
			`if (resultG) XMSHScale(resultG, order, resultG, fNormProj);`
			`if (resultB) XMSHScale(resultB, order, resultB, fNormProj);`

			`return S_OK;`
			`}`